Source code for imputer.marginal_imputer

from .base import BaseImputer
import random


[docs]class MarginalImputer(BaseImputer): def __init__(self, model_function, sampling_strategy, storage_object): self.sampling_strategy = sampling_strategy self.storage_object = storage_object super().__init__( model_function=model_function ) # TODO - random seed - create separate issue def _sample(self, storage_object, feature_subset): features, _ = storage_object.get_data() if self.sampling_strategy == 'joint': sampled_features = self._sample_marginals(features, feature_subset) else: sampled_features = self._sample_product_marginals(features, feature_subset) return sampled_features
[docs] def impute(self, feature_subset, x_i, n_samples=1): predictions = [] for _ in range(n_samples): sampled_values = self._sample(self.storage_object, feature_subset) prediction = self.model_function({**x_i, **sampled_values}) predictions.append(prediction) return predictions
@staticmethod def _sample_marginals(features, feature_subset): rand_idx = random.randrange(len(features)) sampled_instance = features[rand_idx].copy() sampled_features = {feature_name: sampled_instance[feature_name] for feature_name in feature_subset} return sampled_features @staticmethod def _sample_product_marginals(features, feature_subset): sampled_features = {} for feature_name in feature_subset: rand_idx = random.randrange(len(features)) sampled_features[feature_name] = features[ rand_idx].copy()[feature_name] return sampled_features