Coverage for credoai/evaluators/equity.py: 95%
113 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
1import numpy as np
2import pandas as pd
3from connect.evidence import MetricContainer, StatisticTestContainer, TableContainer
5from credoai.artifacts import TabularData
6from credoai.evaluators.evaluator import Evaluator
7from credoai.evaluators.utils.validation import (
8 check_data_for_nulls,
9 check_data_instance,
10 check_existence,
11)
12from credoai.modules.stats import ChiSquare, OneWayAnova
13from credoai.utils.model_utils import type_of_target
16class DataEquity(Evaluator):
17 """
18 Data Equity evaluator for Credo AI (Experimental)
20 This evaluator assesses whether outcomes are distributed equally across a sensitive
21 feature. Depending on the kind of outcome, different tests will be performed.
23 - Discrete: chi-squared contingency tests,
24 followed by bonferronni corrected posthoc chi-sq tests
25 - Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
26 - Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
27 proceed as normal for continuous
29 Parameters
30 ----------
31 p_value : float
32 The significance value to evaluate statistical tests
33 """
35 required_artifacts = {"data", "sensitive_feature"}
37 def __init__(self, p_value=0.01):
38 self.pvalue = p_value
39 super().__init__()
41 def _validate_arguments(self):
42 check_data_instance(self.data, TabularData)
43 check_existence(self.data.sensitive_features, "sensitive_features")
44 check_data_for_nulls(self.data, "Data")
46 def _setup(self):
47 self.sensitive_features = self.data.sensitive_feature
48 self.y = self.data.y
49 self.type_of_target = self.data.y_type
51 self.df = pd.concat([self.sensitive_features, self.y], axis=1)
52 self.labels = {
53 "sensitive_feature": self.sensitive_features.name,
54 "outcome": self.y.name,
55 }
56 return self
58 def evaluate(self):
59 summary, parity_results = self._describe()
60 outcome_distribution = self._outcome_distributions()
61 overall_equity, posthoc_tests = self._get_formatted_stats()
63 # Combine
64 equity_containers = [
65 summary,
66 outcome_distribution,
67 parity_results,
68 overall_equity,
69 ]
71 # Add posthoc if available
72 if posthoc_tests is not None:
73 equity_containers.append(posthoc_tests)
75 self.results = equity_containers
76 return self
78 def _describe(self):
79 """Create descriptive output"""
80 means = self.df.groupby(self.sensitive_features.name).mean()
81 results = {"summary": means}
83 summary = results["summary"]
84 results["sensitive_feature"] = self.sensitive_features.name
85 results["highest_group"] = summary[self.y.name].idxmax()
86 results["lowest_group"] = summary[self.y.name].idxmin()
87 results["demographic_parity_difference"] = (
88 summary[self.y.name].max() - summary[self.y.name].min()
89 )
90 results["demographic_parity_ratio"] = (
91 summary[self.y.name].min() / summary[self.y.name].max()
92 )
94 summary.name = f"Average Outcome Per Group"
96 # Format summary results
97 summary = TableContainer(
98 results["summary"],
99 **self.get_info(labels=self.labels),
100 )
102 # Format parity results
103 parity_results = pd.DataFrame(
104 [
105 {"type": k, "value": v}
106 for k, v in results.items()
107 if "demographic_parity" in k
108 ]
109 )
110 parity_results = MetricContainer(
111 parity_results,
112 **self.get_info(labels=self.labels),
113 )
115 return summary, parity_results
117 def _outcome_distributions(self):
118 out = TableContainer(
119 outcome_distribution(
120 self.df, self.sensitive_features.name, self.y.name, self.type_of_target
121 ),
122 **self.get_info(labels=self.labels),
123 )
124 return out
126 def _get_formatted_stats(self) -> tuple:
127 """
128 Select statistics based on classification type, add formatting.
130 Returns
131 -------
132 tuple
133 Overall equity, posthoc tests
134 """
135 if self.type_of_target in ("binary", "multiclass"):
136 statistics = self.discrete_stats()
137 else:
138 statistics = self.continuous_stats()
140 overall_equity = {
141 "statistic_type": statistics["test_type"],
142 "test_statistic": statistics["statistic"],
143 "p_value": statistics["pvalue"],
144 "significance_threshold": self.pvalue,
145 "significant": statistics["pvalue"] <= self.pvalue,
146 }
148 overall_equity = StatisticTestContainer(
149 pd.DataFrame(overall_equity, index=[0]), **self.get_info()
150 )
152 posthoc_tests = None
153 if "significant_posthoc_tests" in statistics:
154 posthoc_tests = pd.DataFrame(statistics["significant_posthoc_tests"])
155 posthoc_tests.name = f"{statistics['test_type']}_posthoc"
156 posthoc_tests = TableContainer(posthoc_tests, **self.get_info())
158 return overall_equity, posthoc_tests
160 def discrete_stats(self):
161 """Run statistics on discrete outcomes"""
162 test = ChiSquare(self.pvalue)
163 return test.run(self.df, self.sensitive_features.name, self.y.name)
165 def continuous_stats(self):
166 """Run statistics on continuous outcomes"""
167 # check for proportional bounding and transform
168 if self._check_range(self.y, 0, 1):
169 self._proportion_transformation()
170 return OneWayAnova(self.pvalue).run(
171 self.df, self.sensitive_features.name, self.y.name
172 )
174 # helper functions
175 def _check_range(self, lst, lower_bound, upper_bound):
176 return min(lst) >= lower_bound and max(lst) <= upper_bound
178 def _proportion_transformation(self):
179 """Transforms bounded values between 0-1 into a continuous space"""
181 def logit(x):
182 eps = 1e-6
183 return np.log(x / (1 - x + eps) + eps)
185 self.df[self.y.name] = self.df[self.y.name].apply(logit)
188class ModelEquity(DataEquity):
189 """
190 Evaluates the equity of a model's predictions.
192 This evaluator assesses whether model predictions are distributed equally across a sensitive
193 feature. Depending on the kind of outcome, different tests will be performed.
195 * Discrete: chi-squared contingency tests,
196 followed by bonferronni corrected posthoc chi-sq tests
197 * Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
198 * Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
199 proceed as normal for continuous
201 Parameters
202 ----------
203 use_predict_proba : bool, optional
204 Defines which predict method will be used, if True predict_proba will be used.
205 This methods outputs probabilities rather then class predictions. The availability
206 of predict_proba is dependent on the model under assessment. By default False
207 p_value : float, optional
208 The significance value to evaluate statistical tests, by default 0.01
209 """
211 required_artifacts = {"model", "assessment_data", "sensitive_feature"}
213 def __init__(self, use_predict_proba=False, p_value=0.01):
214 self.use_predict_proba = use_predict_proba
215 super().__init__(p_value)
217 def _setup(self):
218 self.sensitive_features = self.assessment_data.sensitive_feature
219 fun = self.model.predict_proba if self.use_predict_proba else self.model.predict
220 self.y = pd.Series(
221 fun(self.assessment_data.X),
222 index=self.sensitive_features.index,
223 )
224 prefix = "predicted probability" if self.use_predict_proba else "predicted"
225 try:
226 self.y.name = f"{prefix} {self.assessment_data.y.name}"
227 except:
228 self.y.name = f"{prefix} outcome"
230 self.type_of_target = type_of_target(self.y)
232 self.df = pd.concat([self.sensitive_features, self.y], axis=1)
233 self.labels = {
234 "sensitive_feature": self.sensitive_features.name,
235 "outcome": self.y.name,
236 }
237 return self
239 def _validate_arguments(self):
240 check_data_instance(self.assessment_data, TabularData)
241 check_existence(self.assessment_data.sensitive_features, "sensitive_features")
242 check_data_for_nulls(
243 self.assessment_data, "Data", check_X=True, check_y=True, check_sens=True
244 )
247############################################
248## Evaluation helper functions
250## Helper functions create evidences
251## to be passed to .evaluate to be wrapped
252## by evidence containers
253############################################
256def outcome_distribution(df, grouping_col, outcome_col, type_of_target, bins=10):
257 """Returns outcome distribution over a grouping factor
259 For binary/multiclass outcomes, returns the counts for each set of outcomes/grouping.
260 For a continuous outcome, bins the outcome and reports the number of records in each bin
261 for each group.
263 Parameters
264 ----------
265 df : pd.DataFrame
266 Dataframe with at least two columns for grouping and outcome
267 grouping_col : str
268 Name of the grouping column, must refer to a categorical column
269 outcome_col : str
270 Name of the outcome column
271 type_of_target : str
272 The type of outcome column. Anything besides "binary" and "multiclass" will be treated
273 as continuous.
274 bins : int
275 Number of bins to use in the case of a continuous outcome
277 Returns
278 -------
279 pd.DataFrame
280 _description_
281 """
283 df = df.loc[:, [grouping_col, outcome_col]]
284 if type_of_target in ("binary", "multiclass"):
285 distribution = df.value_counts().sort_index().reset_index(name="count")
286 distribution["proportion"] = distribution["count"] / distribution["count"].sum()
287 # histogram binning for continuous
288 else:
289 distribution = []
290 for i, group in df.groupby(grouping_col):
291 counts, edges = np.histogram(group[outcome_col], bins=bins)
292 bins = edges # ensure all groups have same bins
293 bin_centers = 0.5 * (edges[:-1] + edges[1:])
294 tmp = pd.DataFrame(
295 {
296 grouping_col: i,
297 outcome_col: bin_centers,
298 "count": counts,
299 }
300 )
301 distribution.append(tmp)
302 distribution = pd.concat(distribution, axis=0)
303 distribution.name = "Outcome Distributions"
305 return distribution