Coverage for credoai/evaluators/performance.py: 92%
86 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
1import numpy as np
2import pandas as pd
3from connect.evidence import MetricContainer, TableContainer
4from sklearn.metrics import confusion_matrix
6from credoai.artifacts import ClassificationModel
7from credoai.evaluators.evaluator import Evaluator
8from credoai.evaluators.utils.fairlearn import setup_metric_frames
9from credoai.evaluators.utils.validation import check_data_for_nulls, check_existence
10from credoai.modules.metrics import process_metrics
11from credoai.utils.common import ValidationError
14class Performance(Evaluator):
15 """
16 Performance evaluator for Credo AI.
18 This evaluator calculates overall performance metrics.
19 Handles any metric that can be calculated on a set of ground truth labels and predictions,
20 e.g., binary classification, multi class classification, regression.
22 This module takes in a set of metrics and provides functionality to:
24 - calculate the metrics
25 - create disaggregated metrics
27 Parameters
28 ----------
29 metrics : List-like
30 list of metric names as strings or list of Metric objects (credoai.modules.metrics.Metric).
31 Metric strings should in list returned by credoai.modules.metric_utils.list_metrics().
32 Note for performance parity metrics like
33 "false negative rate parity" just list "false negative rate". Parity metrics
34 are calculated automatically if the performance metric is supplied
35 y_true : (List, pandas.Series, numpy.ndarray)
36 The ground-truth labels (for classification) or target values (for regression).
37 y_pred : (List, pandas.Series, numpy.ndarray)
38 The predicted labels for classification
39 y_prob : (List, pandas.Series, numpy.ndarray), optional
40 The unthresholded predictions, confidence values or probabilities.
41 """
43 required_artifacts = {"model", "assessment_data"}
45 def __init__(self, metrics=None):
46 super().__init__()
47 # assign variables
48 self.metrics = metrics
49 self.metric_frames = {}
50 self.processed_metrics = None
52 def _validate_arguments(self):
53 check_existence(self.metrics, "metrics")
54 check_existence(self.assessment_data.y, "y")
55 check_data_for_nulls(
56 self.assessment_data, "Data", check_X=True, check_y=True, check_sens=False
57 )
59 def _setup(self):
60 # data variables
61 self.y_true = self.assessment_data.y
62 self.y_pred = self.model.predict(self.assessment_data.X)
63 try:
64 self.y_prob = self.model.predict_proba(self.assessment_data.X)
65 except:
66 self.y_prob = None
67 self.update_metrics(self.metrics)
68 return self
70 def evaluate(self):
71 """
72 Run performance base module
73 """
74 results = []
75 overall_metrics = self.get_overall_metrics()
76 threshold_metrics = self.get_overall_threshold_metrics()
78 if overall_metrics is not None:
79 results.append(overall_metrics)
80 if threshold_metrics is not None:
81 results += threshold_metrics
83 if isinstance(self.model, ClassificationModel):
84 results.append(self._create_confusion_container())
85 self.results = results
86 return self
88 def update_metrics(self, metrics, replace=True):
89 """replace metrics
91 Parameters
92 ----------
93 metrics : List-like
94 list of metric names as string or list of Metrics (credoai.metrics.Metric).
95 Metric strings should in list returned by credoai.modules.list_metrics.
96 Note for performance parity metrics like
97 "false negative rate parity" just list "false negative rate". Parity metrics
98 are calculated automatically if the performance metric is supplied
99 """
100 if replace:
101 self.metrics = metrics
102 else:
103 self.metrics += metrics
105 self.processed_metrics, _ = process_metrics(self.metrics, self.model.type)
107 dummy_sensitive = pd.Series(["NA"] * len(self.y_true), name="NA")
108 self.metric_frames = setup_metric_frames(
109 self.processed_metrics,
110 self.y_pred,
111 self.y_prob,
112 self.y_true,
113 dummy_sensitive,
114 )
116 def get_df(self):
117 """Return dataframe of input arrays
119 Returns
120 -------
121 pandas.DataFrame
122 Dataframe containing the input arrays
123 """
124 df = pd.DataFrame({"true": self.y_true, "pred": self.y_pred})
125 if self.y_prob is not None:
126 y_prob_df = pd.DataFrame(self.y_prob)
127 y_prob_df.columns = [f"y_prob_{i}" for i in range(y_prob_df.shape[1])]
128 df = pd.concat([df, y_prob_df], axis=1)
130 return df
132 def get_overall_metrics(self):
133 """Return scalar performance metrics for each group
135 Returns
136 -------
137 pandas.Series
138 The overall performance metrics
139 """
140 # retrieve overall metrics for one of the sensitive features only as they are the same
141 overall_metrics = [
142 metric_frame.overall
143 for name, metric_frame in self.metric_frames.items()
144 if name != "thresh"
145 ]
146 if not overall_metrics:
147 return
149 output_series = (
150 pd.concat(overall_metrics, axis=0).rename(index="value").to_frame()
151 )
152 output_series = output_series.reset_index().rename({"index": "type"}, axis=1)
154 return MetricContainer(output_series, **self.get_info())
156 def get_overall_threshold_metrics(self):
157 """Return performance metrics for each group
159 Returns
160 -------
161 pandas.Series
162 The overall performance metrics
163 """
164 # retrieve overall metrics for one of the sensitive features only as they are the same
165 if not "thresh" in self.metric_frames:
166 return
168 threshold_results = (
169 pd.concat([self.metric_frames["thresh"].overall], axis=0)
170 .rename(index="value")
171 .to_frame()
172 )
173 threshold_results = threshold_results.reset_index().rename(
174 {"index": "threshold_metric"}, axis=1
175 )
176 threshold_results.name = "threshold_metric_performance"
178 results = []
179 for _, threshold_metric in threshold_results.iterrows():
180 metric = threshold_metric.threshold_metric
181 threshold_metric.value.name = "threshold_dependent_performance"
182 results.append(
183 TableContainer(
184 threshold_metric.value,
185 **self.get_info({"metric_type": metric}),
186 )
187 )
189 return results
191 def _create_confusion_container(self):
192 confusion_container = TableContainer(
193 create_confusion_matrix(self.y_true, self.y_pred),
194 **self.get_info(),
195 )
196 return confusion_container
199############################################
200## Evaluation helper functions
202## Helper functions create evidences
203## to be passed to .evaluate to be wrapped
204## by evidence containers
205############################################
206def create_confusion_matrix(y_true, y_pred):
207 """Create a confusion matrix as a dataframe
209 Parameters
210 ----------
211 y_true : pd.Series of shape (n_samples,)
212 Ground truth (correct) target values.
214 y_pred : array-like of shape (n_samples,)
215 Estimated targets as returned by a classifier.
217 """
218 labels = np.unique(y_true)
219 confusion = confusion_matrix(y_true, y_pred, normalize="true", labels=labels)
220 confusion_df = pd.DataFrame(confusion, index=labels.copy(), columns=labels)
221 confusion_df.index.name = "true_label"
222 confusion_df = confusion_df.reset_index().melt(
223 id_vars=["true_label"], var_name="predicted_label"
224 )
225 confusion_df.name = "Confusion Matrix"
226 return confusion_df