Coverage for credoai/evaluators/feature

1"""Feature Drift evaluator"""

2import pandas as pd

3from connect.evidence import MetricContainer, TableContainer

5from credoai.artifacts import ClassificationModel

6from credoai.evaluators.evaluator import Evaluator

7from credoai.evaluators.utils.validation import check_requirements_existence

8from credoai.modules.metrics_credoai import population_stability_index

11class FeatureDrift(Evaluator):

12 """

13 Measure Feature Drift using population stability index. (Experimental)

15 This evaluator measures feature drift in:

17 1. Model prediction: the prediction for the assessment dataset is compared

18 to the prediction for the training dataset.

19 In the case of classifiers, the prediction is performed with predict proba if available.

20 If it is not available, the prediction is treated like a categorical variable, see the

21 processing of categorical variables in the item below.

23 2. Dataset features: 1 to 1 comparison across all features for the datasets. This is also

24 referred to as "characteristic stability index" (CSI). Features are processed depending

25 on their type:

27 - Numerical features are directly fed into the population_stability_index metric, and

28 binned according to the parameters specified at init time.

29 - Categorical features percentage distribution is manually calculated. The % amount of

30 samples per each class is calculated and then fed into the population_stability_index metric.

31 The percentage flag in the metric is set to True, to bypass the internal binning process.

34 Parameters

35 ----------

36 buckets : int, optional

37 Number of buckets to consider to bin the predictions, by default 10

38 buckettype : Literal["bins", "quantiles"]

39 Type of strategy for creating buckets, bins splits into even splits,

40 quantiles splits into quantiles buckets, by default "bins"

41 csi_calculation : bool, optional

42 Calculate characteristic stability index, i.e., PSI for all features in the datasets,

43 by default False

44 """

46 required_artifacts = {"model", "assessment_data", "training_data"}

48 def __init__(self, buckets: int = 10, buckettype="bins", csi_calculation=False):

50 self.bucket_number = buckets

51 self.buckettype = buckettype

52 self.csi_calculation = csi_calculation

53 self.percentage = False

54 super().__init__()

56 def _validate_arguments(self):

57 check_requirements_existence(self)

59 def _setup(self):

60 # Default prediction to predict method

61 prediction_method = self.model.predict

62 if isinstance(self.model, ClassificationModel):

63 if hasattr(self.model, "predict_proba"):

64 prediction_method = self.model.predict_proba

65 else:

66 self.percentage = True

68 self.expected_prediction = prediction_method(self.training_data.X)

69 self.actual_prediction = prediction_method(self.assessment_data.X)

71 # Create the bins manually for categorical prediction if predict_proba

72 # is not available.

73 if self.percentage:

74 (

75 self.expected_prediction,

76 self.actual_prediction,

77 ) = self._create_bin_percentage(

78 self.expected_prediction, self.actual_prediction

79 )

81 def evaluate(self):

82 prediction_psi = self._calculate_psi_on_prediction()

83 self.results = [MetricContainer(prediction_psi, **self.get_info())]

84 if self.csi_calculation:

85 csi = self._calculate_csi()

86 self.results.append(TableContainer(csi, **self.get_info()))

87 return self

89 def _calculate_psi_on_prediction(self) -> pd.DataFrame:

90 """

91 Calculate the psi index on the model prediction.

93 Returns

94 -------

95 DataFrame

96 Formatted for metric container.

97 """

98 psi = population_stability_index(

99 self.expected_prediction,

100 self.actual_prediction,

101 percentage=self.percentage,

102 buckets=self.bucket_number,

103 buckettype=self.buckettype,

104 )

105 res = pd.DataFrame(

106 {"value": psi, "type": "population_stability_index"}, index=[0]

107 )

108 return res

109

110 def _calculate_csi(self) -> pd.DataFrame:

111 """

112 Calculate psi for all the columns in the dataframes.

113

114 Returns

115 -------

116 DataFrame

117 Formatted for the table container.

118 """

119 columns_names = list(self.assessment_data.X.columns)

120 psis = {}

121 for col_name in columns_names:

122 train_data = self.training_data.X[col_name]

123 assess_data = self.assessment_data.X[col_name]

124 if self.assessment_data.X[col_name].dtype == "category":

125 train, assess = self._create_bin_percentage(train_data, assess_data)

126 psis[col_name] = population_stability_index(train, assess, True)

127 else:

128 psis[col_name] = population_stability_index(train_data, assess_data)

129 psis = pd.DataFrame.from_dict(psis, orient="index")

130 psis = psis.reset_index()

131 psis.columns = ["feature_names", "value"]

132 psis.name = "Characteristic Stability Index"

133 return psis

134

135 @staticmethod

136 def _create_bin_percentage(train: pd.Series, assess: pd.Series) -> tuple:

137 """

138 In case of categorical values proceed to count the instances

139 of each class and divide by the total amount of samples to get

140 the ratios.

141

142 Parameters

143 ----------

144 train : Series

145 Array of values, dtype == category

146 assess : Series

147 Array of values, dtype == category

148

149 Returns

150 -------

151 tuple

152 Class percentages for both arrays

153 """

154 len_training = len(train)

155 len_assessment = len(assess)

156 train_bin_perc = train.value_counts() / len_training

157 assess_bin_perc = assess.value_counts() / len_assessment

158 return train_bin_perc, assess_bin_perc

Coverage for credoai/evaluators/feature_drift.py: 98%

59 statements