Coverage for credoai/artifacts/model/classification

1"""Model artifact wrapping any classification model"""

2from .base_model import Model

4from credoai.utils import global_logger

6import numpy as np

8from sklearn.utils import check_array

10from .constants_model import (

11 SKLEARN_LIKE_FRAMEWORKS,

12 MLP_FRAMEWORKS,

13 FRAMEWORK_VALIDATION_FUNCTIONS,

14)

17class ClassificationModel(Model):

18 """Class wrapper around classification model to be assessed

20 ClassificationModel serves as an adapter between arbitrary binary or multi-class

21 classification models and the evaluations in Lens. Evaluations depend on

22 ClassificationModel instantiating `predict` and (optionally) `predict_proba`

24 Parameters

25 ----------

26 name : str

27 Label of the model

28 model_like : model_like

29 A binary or multi-class classification model or pipeline. It must have a

30 `predict` function that returns an array containing model outputs for each sample.

31 It can also optionally have a `predict_proba` function that returns array containing

32 the class label probabilities for each sample.

34 If the supplied model_like is from the sklearn or xgboost framework, `predict` is assumed

35 to return a column vector with a single value for each sample (i.e. thresholded predictions).

37 If the supplied model_like is from the Keras framework, the assumed form of `predict` outputs

38 depends on the final-layer activation.

39 If the final layer is softmax, this wrapper assumes the

40 return value is a is a matrix with shape (n_samples, n_classes) corresponding to probability

41 values (i.e., without argmax), similar to sklearn.predict_proba. The wrapper applies argmax

42 where necessary to obtain discrete labels.

43 If the final layer is sigmoid, this wrapper assumes the return value is an (n_samples, 1)

44 column vector with per-sample probabilities. The wrapper rounds (.5 as default threshold)

45 values where necessary to obtain discrete labels.

47 For custom model_like objects, users may optionally specify a `framework_like` attribute

48 of type string. framework_like serves as a flag to enable expected functionality to carry over

49 from an external framework to Lens. Presently "sklearn", "xgboost", and "keras" are supported.

50 The former two serve as a flags to notify Lens that model_like respects sklearn's predict API

51 (and the predict_proba API, if relevant). The latter serves as a flag to Lens that model_like

52 respects Keras's predict API with either a sigmoid or softmax final layer.

54 tags : optional

55 Additional metadata to add to model

56 E.g., {'model_type': 'binary_classification'}

57 """

59 def __init__(self, name: str, model_like=None, tags=None):

60 super().__init__(

61 "CLASSIFICATION",

62 ["predict", "predict_proba"],

63 ["predict"],

64 # TODO this will not work once we incorporate PyTorch

65 # PyTorch allows callables and Module.forward()

66 # predict not required

67 name,

68 model_like,

69 tags,

70 )

72 def _validate_framework(self):

73 try:

74 FRAMEWORK_VALIDATION_FUNCTIONS[self.model_info["framework"]](

75 self.model_like, self.model_info

76 )

77 except:

78 message = """Provided model is from unsupported framework.

79 Lens behavior has not been tested or assured with unsupported modeling frameworks."""

80 global_logger.warning(message)

82 def __post_init__(self):

83 """Conditionally updates functionality based on framework"""

84 # This needs to remain a big if-statement for now if we're going to keep

85 # all classifiers in one class since we're making direct assignments to the class object

87 if self.model_info["framework"] in SKLEARN_LIKE_FRAMEWORKS:

88 func = getattr(self, "predict_proba", None)

89 if len(self.model_like.classes_) == 2:

90 self.type = "BINARY_CLASSIFICATION"

91 # if binary, replace probability array with one-dimensional vector

92 if func:

93 self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]

94 else:

95 self.type = "MULTICLASS_CLASSIFICATION"

97 elif self.model_info["framework"] in MLP_FRAMEWORKS:

98 # TODO change this to '__call__' when adding in general TF and PyTorch

99 pred_func = getattr(self, "predict", None)

100 if pred_func:

101 if self.model_like.layers[-1].output_shape == (None, 1):

102 # Assumes sigmoid -> probabilities need to be rounded

103 self.__dict__["predict"] = lambda x: pred_func(x).round()

104 # Single-output sigmoid is binary by definition

105 self.type = "BINARY_CLASSIFICATION"

106 else:

107 # Assumes softmax -> probabilities need to be argmaxed

108 self.__dict__["predict"] = lambda x: np.argmax(pred_func(x), axis=1)

109 if self.model_like.layers[-1].output_shape[1] == 2:

110 self.type = "BINARY_CLASSIFICATION"

111 else:

112 self.type = "MULTICLASS_CLASSIFICATION"

113

114 if self.model_like.layers[-1].output_shape == (None, 2):

115 self.__dict__["predict_proba"] = lambda x: pred_func(x)[:, 1]

116 elif (

117 len(self.model_like.layers[-1].output_shape) == 2

118 and self.model_like.layers[-1].output_shape[1] == 1

119 ):

120 # Sigmoid -> needs to be (n_samples, ) to work with sklearn metrics

121 self.__dict__["predict_proba"] = lambda x: np.reshape(

122 pred_func(x), (-1, 1)

123 )

124 elif (

125 len(self.model_like.layers[-1].output_shape) == 2

126 and self.model_like.layers[-1].output_shape[1] > 2

127 ):

128 self.__dict__["predict_proba"] = pred_func

129 else:

130 pass

131 # predict_proba is not valid (for now)

132

133 elif self.model_info["framework"] == "credoai":

134 # Functionality for DummyClassifier

135 if self.model_like.model_like is not None:

136 self.model_like = self.model_like.model_like

137 # If the dummy model has a model_like specified, reassign

138 # the classifier's model_like attribute to match the dummy's

139 # so that downstream evaluators (ModelProfiler) can use it

140

141 self.type = self.model_like.type

142 # DummyClassifier model type is set in the constructor based on whether it

143 # is binary or multiclass

144

145 # Predict and Predict_Proba should already be specified

146

147

148class DummyClassifier:

149 """Class wrapper around classification model predictions

150

151 This class can be used when a classification model's outputs have been precomputed.

152 The output include the array containing the predicted class labels and/or the array

153 containing the class labels probabilities.

154 Wrap the outputs with this class into a dummy classifier and pass it as

155 the model to `ClassificationModel`.

156

157 Parameters

158 ----------

159 name : str

160 Label of the model

161 model_like : model_like, optional

162 While predictions are pre-computed, the model object, itself, may be of use for

163 some evaluations (e.g. ModelProfiler).

164 binary_clf : bool, optional, default = True

165 Type of classification model.

166 Used when wrapping with ClassificationModel.

167 If binary == True, ClassificationModel.type will be set to `BINARY_CLASSIFICATION',

168 which enables use of binary metrics.

169 If binary == False, ClassificationModel.type will be set to 'MULTICLASS_CLASSIFICATION',

170 and use those metrics.

171 predict_output : array, optional

172 Array containing per-sample class labels

173 Corresponds to sklearn-like `predict` output

174 For NN frameworks (Keras.predict, tf.__call__, torch.foward, etc.), this input assumes argmax

175 has been applied to the outputs so that they are discrete valued labels

176 predict_proba_output : array, optional

177 Array containing the per-sample class probabilities

178 Corresponds to sklearn-like `predict_proba` output

179 For NN frameworks (Keras.predict, etc.) this input assumes no post-processing after a

180 final-layer softmax (general) or sigmoid (binary only) activation

181

182 """

183

184 def __init__(

185 self,

186 name: str,

187 model_like=None,

188 binary_clf=True,

189 predict_output=None,

190 predict_proba_output=None,

191 tags=None,

192 ):

193 self.model_like = model_like

194 self._build_functionality("predict", predict_output)

195 self._build_functionality("predict_proba", predict_proba_output)

196 self.name = name

197 self.tags = tags

198 self.type = (

199 "BINARY_CLASSIFICATION" if binary_clf else "MULTICLASS_CLASSIFICATION"

200 )

201

202 def _wrap_array(self, array):

203 return lambda X=None: array

204 # Keeping X as an optional argument to maintain potential backward compatibility

205 # Some uses of DummyClassifier may use predict() with no argument

206

207 def _build_functionality(self, function_name, array):

208 if array is not None:

209 array = check_array(array, ensure_2d=False, allow_nd=True)

210 self.__dict__[function_name] = self._wrap_array(array)

Coverage for credoai/artifacts/model/classification_model.py: 58%

57 statements