Coverage for credoai/artifacts/model/classification_model.py: 58%
57 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
1"""Model artifact wrapping any classification model"""
2from .base_model import Model
4from credoai.utils import global_logger
6import numpy as np
8from sklearn.utils import check_array
10from .constants_model import (
11 SKLEARN_LIKE_FRAMEWORKS,
12 MLP_FRAMEWORKS,
13 FRAMEWORK_VALIDATION_FUNCTIONS,
14)
17class ClassificationModel(Model):
18 """Class wrapper around classification model to be assessed
20 ClassificationModel serves as an adapter between arbitrary binary or multi-class
21 classification models and the evaluations in Lens. Evaluations depend on
22 ClassificationModel instantiating `predict` and (optionally) `predict_proba`
24 Parameters
25 ----------
26 name : str
27 Label of the model
28 model_like : model_like
29 A binary or multi-class classification model or pipeline. It must have a
30 `predict` function that returns an array containing model outputs for each sample.
31 It can also optionally have a `predict_proba` function that returns array containing
32 the class label probabilities for each sample.
34 If the supplied model_like is from the sklearn or xgboost framework, `predict` is assumed
35 to return a column vector with a single value for each sample (i.e. thresholded predictions).
37 If the supplied model_like is from the Keras framework, the assumed form of `predict` outputs
38 depends on the final-layer activation.
39 If the final layer is softmax, this wrapper assumes the
40 return value is a is a matrix with shape (n_samples, n_classes) corresponding to probability
41 values (i.e., without argmax), similar to sklearn.predict_proba. The wrapper applies argmax
42 where necessary to obtain discrete labels.
43 If the final layer is sigmoid, this wrapper assumes the return value is an (n_samples, 1)
44 column vector with per-sample probabilities. The wrapper rounds (.5 as default threshold)
45 values where necessary to obtain discrete labels.
47 For custom model_like objects, users may optionally specify a `framework_like` attribute
48 of type string. framework_like serves as a flag to enable expected functionality to carry over
49 from an external framework to Lens. Presently "sklearn", "xgboost", and "keras" are supported.
50 The former two serve as a flags to notify Lens that model_like respects sklearn's predict API
51 (and the predict_proba API, if relevant). The latter serves as a flag to Lens that model_like
52 respects Keras's predict API with either a sigmoid or softmax final layer.
54 tags : optional
55 Additional metadata to add to model
56 E.g., {'model_type': 'binary_classification'}
57 """
59 def __init__(self, name: str, model_like=None, tags=None):
60 super().__init__(
61 "CLASSIFICATION",
62 ["predict", "predict_proba"],
63 ["predict"],
64 # TODO this will not work once we incorporate PyTorch
65 # PyTorch allows callables and Module.forward()
66 # predict not required
67 name,
68 model_like,
69 tags,
70 )
72 def _validate_framework(self):
73 try:
74 FRAMEWORK_VALIDATION_FUNCTIONS[self.model_info["framework"]](
75 self.model_like, self.model_info
76 )
77 except:
78 message = """Provided model is from unsupported framework.
79 Lens behavior has not been tested or assured with unsupported modeling frameworks."""
80 global_logger.warning(message)
82 def __post_init__(self):
83 """Conditionally updates functionality based on framework"""
84 # This needs to remain a big if-statement for now if we're going to keep
85 # all classifiers in one class since we're making direct assignments to the class object
87 if self.model_info["framework"] in SKLEARN_LIKE_FRAMEWORKS:
88 func = getattr(self, "predict_proba", None)
89 if len(self.model_like.classes_) == 2:
90 self.type = "BINARY_CLASSIFICATION"
91 # if binary, replace probability array with one-dimensional vector
92 if func:
93 self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]
94 else:
95 self.type = "MULTICLASS_CLASSIFICATION"
97 elif self.model_info["framework"] in MLP_FRAMEWORKS:
98 # TODO change this to '__call__' when adding in general TF and PyTorch
99 pred_func = getattr(self, "predict", None)
100 if pred_func:
101 if self.model_like.layers[-1].output_shape == (None, 1):
102 # Assumes sigmoid -> probabilities need to be rounded
103 self.__dict__["predict"] = lambda x: pred_func(x).round()
104 # Single-output sigmoid is binary by definition
105 self.type = "BINARY_CLASSIFICATION"
106 else:
107 # Assumes softmax -> probabilities need to be argmaxed
108 self.__dict__["predict"] = lambda x: np.argmax(pred_func(x), axis=1)
109 if self.model_like.layers[-1].output_shape[1] == 2:
110 self.type = "BINARY_CLASSIFICATION"
111 else:
112 self.type = "MULTICLASS_CLASSIFICATION"
114 if self.model_like.layers[-1].output_shape == (None, 2):
115 self.__dict__["predict_proba"] = lambda x: pred_func(x)[:, 1]
116 elif (
117 len(self.model_like.layers[-1].output_shape) == 2
118 and self.model_like.layers[-1].output_shape[1] == 1
119 ):
120 # Sigmoid -> needs to be (n_samples, ) to work with sklearn metrics
121 self.__dict__["predict_proba"] = lambda x: np.reshape(
122 pred_func(x), (-1, 1)
123 )
124 elif (
125 len(self.model_like.layers[-1].output_shape) == 2
126 and self.model_like.layers[-1].output_shape[1] > 2
127 ):
128 self.__dict__["predict_proba"] = pred_func
129 else:
130 pass
131 # predict_proba is not valid (for now)
133 elif self.model_info["framework"] == "credoai":
134 # Functionality for DummyClassifier
135 if self.model_like.model_like is not None:
136 self.model_like = self.model_like.model_like
137 # If the dummy model has a model_like specified, reassign
138 # the classifier's model_like attribute to match the dummy's
139 # so that downstream evaluators (ModelProfiler) can use it
141 self.type = self.model_like.type
142 # DummyClassifier model type is set in the constructor based on whether it
143 # is binary or multiclass
145 # Predict and Predict_Proba should already be specified
148class DummyClassifier:
149 """Class wrapper around classification model predictions
151 This class can be used when a classification model's outputs have been precomputed.
152 The output include the array containing the predicted class labels and/or the array
153 containing the class labels probabilities.
154 Wrap the outputs with this class into a dummy classifier and pass it as
155 the model to `ClassificationModel`.
157 Parameters
158 ----------
159 name : str
160 Label of the model
161 model_like : model_like, optional
162 While predictions are pre-computed, the model object, itself, may be of use for
163 some evaluations (e.g. ModelProfiler).
164 binary_clf : bool, optional, default = True
165 Type of classification model.
166 Used when wrapping with ClassificationModel.
167 If binary == True, ClassificationModel.type will be set to `BINARY_CLASSIFICATION',
168 which enables use of binary metrics.
169 If binary == False, ClassificationModel.type will be set to 'MULTICLASS_CLASSIFICATION',
170 and use those metrics.
171 predict_output : array, optional
172 Array containing per-sample class labels
173 Corresponds to sklearn-like `predict` output
174 For NN frameworks (Keras.predict, tf.__call__, torch.foward, etc.), this input assumes argmax
175 has been applied to the outputs so that they are discrete valued labels
176 predict_proba_output : array, optional
177 Array containing the per-sample class probabilities
178 Corresponds to sklearn-like `predict_proba` output
179 For NN frameworks (Keras.predict, etc.) this input assumes no post-processing after a
180 final-layer softmax (general) or sigmoid (binary only) activation
182 """
184 def __init__(
185 self,
186 name: str,
187 model_like=None,
188 binary_clf=True,
189 predict_output=None,
190 predict_proba_output=None,
191 tags=None,
192 ):
193 self.model_like = model_like
194 self._build_functionality("predict", predict_output)
195 self._build_functionality("predict_proba", predict_proba_output)
196 self.name = name
197 self.tags = tags
198 self.type = (
199 "BINARY_CLASSIFICATION" if binary_clf else "MULTICLASS_CLASSIFICATION"
200 )
202 def _wrap_array(self, array):
203 return lambda X=None: array
204 # Keeping X as an optional argument to maintain potential backward compatibility
205 # Some uses of DummyClassifier may use predict() with no argument
207 def _build_functionality(self, function_name, array):
208 if array is not None:
209 array = check_array(array, ensure_2d=False, allow_nd=True)
210 self.__dict__[function_name] = self._wrap_array(array)