Coverage for credoai/evaluators/performance.py: 92%

1import numpy as np

2import pandas as pd

3from connect.evidence import MetricContainer, TableContainer

4from sklearn.metrics import confusion_matrix

6from credoai.artifacts import ClassificationModel

7from credoai.evaluators.evaluator import Evaluator

8from credoai.evaluators.utils.fairlearn import setup_metric_frames

9from credoai.evaluators.utils.validation import check_data_for_nulls, check_existence

10from credoai.modules.metrics import process_metrics

11from credoai.utils.common import ValidationError

14class Performance(Evaluator):

15 """

16 Performance evaluator for Credo AI.

18 This evaluator calculates overall performance metrics.

19 Handles any metric that can be calculated on a set of ground truth labels and predictions,

20 e.g., binary classification, multi class classification, regression.

22 This module takes in a set of metrics and provides functionality to:

24 - calculate the metrics

25 - create disaggregated metrics

27 Parameters

28 ----------

29 metrics : List-like

30 list of metric names as strings or list of Metric objects (credoai.modules.metrics.Metric).

31 Metric strings should in list returned by credoai.modules.metric_utils.list_metrics().

32 Note for performance parity metrics like

33 "false negative rate parity" just list "false negative rate". Parity metrics

34 are calculated automatically if the performance metric is supplied

35 y_true : (List, pandas.Series, numpy.ndarray)

36 The ground-truth labels (for classification) or target values (for regression).

37 y_pred : (List, pandas.Series, numpy.ndarray)

38 The predicted labels for classification

39 y_prob : (List, pandas.Series, numpy.ndarray), optional

40 The unthresholded predictions, confidence values or probabilities.

41 """

43 required_artifacts = {"model", "assessment_data"}

45 def __init__(self, metrics=None):

46 super().__init__()

47 # assign variables

48 self.metrics = metrics

49 self.metric_frames = {}

50 self.processed_metrics = None

52 def _validate_arguments(self):

53 check_existence(self.metrics, "metrics")

54 check_existence(self.assessment_data.y, "y")

55 check_data_for_nulls(

56 self.assessment_data, "Data", check_X=True, check_y=True, check_sens=False

57 )

59 def _setup(self):

60 # data variables

61 self.y_true = self.assessment_data.y

62 self.y_pred = self.model.predict(self.assessment_data.X)

63 try:

64 self.y_prob = self.model.predict_proba(self.assessment_data.X)

65 except:

66 self.y_prob = None

67 self.update_metrics(self.metrics)

68 return self

70 def evaluate(self):

71 """

72 Run performance base module

73 """

74 results = []

75 overall_metrics = self.get_overall_metrics()

76 threshold_metrics = self.get_overall_threshold_metrics()

78 if overall_metrics is not None:

79 results.append(overall_metrics)

80 if threshold_metrics is not None:

81 results += threshold_metrics

83 if isinstance(self.model, ClassificationModel):

84 results.append(self._create_confusion_container())

85 self.results = results

86 return self

88 def update_metrics(self, metrics, replace=True):

89 """replace metrics

91 Parameters

92 ----------

93 metrics : List-like

94 list of metric names as string or list of Metrics (credoai.metrics.Metric).

95 Metric strings should in list returned by credoai.modules.list_metrics.

96 Note for performance parity metrics like

97 "false negative rate parity" just list "false negative rate". Parity metrics

98 are calculated automatically if the performance metric is supplied

99 """

100 if replace:

101 self.metrics = metrics

102 else:

103 self.metrics += metrics

104

105 self.processed_metrics, _ = process_metrics(self.metrics, self.model.type)

106

107 dummy_sensitive = pd.Series(["NA"] * len(self.y_true), name="NA")

108 self.metric_frames = setup_metric_frames(

109 self.processed_metrics,

110 self.y_pred,

111 self.y_prob,

112 self.y_true,

113 dummy_sensitive,

114 )

115

116 def get_df(self):

117 """Return dataframe of input arrays

118

119 Returns

120 -------

121 pandas.DataFrame

122 Dataframe containing the input arrays

123 """

124 df = pd.DataFrame({"true": self.y_true, "pred": self.y_pred})

125 if self.y_prob is not None:

126 y_prob_df = pd.DataFrame(self.y_prob)

127 y_prob_df.columns = [f"y_prob_{i}" for i in range(y_prob_df.shape[1])]

128 df = pd.concat([df, y_prob_df], axis=1)

129

130 return df

131

132 def get_overall_metrics(self):

133 """Return scalar performance metrics for each group

134

135 Returns

136 -------

137 pandas.Series

138 The overall performance metrics

139 """

140 # retrieve overall metrics for one of the sensitive features only as they are the same

141 overall_metrics = [

142 metric_frame.overall

143 for name, metric_frame in self.metric_frames.items()

144 if name != "thresh"

145 ]

146 if not overall_metrics:

147 return

148

149 output_series = (

150 pd.concat(overall_metrics, axis=0).rename(index="value").to_frame()

151 )

152 output_series = output_series.reset_index().rename({"index": "type"}, axis=1)

153

154 return MetricContainer(output_series, **self.get_info())

155

156 def get_overall_threshold_metrics(self):

157 """Return performance metrics for each group

158

159 Returns

160 -------

161 pandas.Series

162 The overall performance metrics

163 """

164 # retrieve overall metrics for one of the sensitive features only as they are the same

165 if not "thresh" in self.metric_frames:

166 return

167

168 threshold_results = (

169 pd.concat([self.metric_frames["thresh"].overall], axis=0)

170 .rename(index="value")

171 .to_frame()

172 )

173 threshold_results = threshold_results.reset_index().rename(

174 {"index": "threshold_metric"}, axis=1

175 )

176 threshold_results.name = "threshold_metric_performance"

177

178 results = []

179 for _, threshold_metric in threshold_results.iterrows():

180 metric = threshold_metric.threshold_metric

181 threshold_metric.value.name = "threshold_dependent_performance"

182 results.append(

183 TableContainer(

184 threshold_metric.value,

185 **self.get_info({"metric_type": metric}),

186 )

187 )

188

189 return results

190

191 def _create_confusion_container(self):

192 confusion_container = TableContainer(

193 create_confusion_matrix(self.y_true, self.y_pred),

194 **self.get_info(),

195 )

196 return confusion_container

197

198

199############################################

200## Evaluation helper functions

201

202## Helper functions create evidences

203## to be passed to .evaluate to be wrapped

204## by evidence containers

205############################################

206def create_confusion_matrix(y_true, y_pred):

207 """Create a confusion matrix as a dataframe

208

209 Parameters

210 ----------

211 y_true : pd.Series of shape (n_samples,)

212 Ground truth (correct) target values.

213

214 y_pred : array-like of shape (n_samples,)

215 Estimated targets as returned by a classifier.

216

217 """

218 labels = np.unique(y_true)

219 confusion = confusion_matrix(y_true, y_pred, normalize="true", labels=labels)

220 confusion_df = pd.DataFrame(confusion, index=labels.copy(), columns=labels)

221 confusion_df.index.name = "true_label"

222 confusion_df = confusion_df.reset_index().melt(

223 id_vars=["true_label"], var_name="predicted_label"

224 )

225 confusion_df.name = "Confusion Matrix"

226 return confusion_df