Coverage for credoai/artifacts/model/classification_model.py: 58%

57 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-13 21:56 +0000

1"""Model artifact wrapping any classification model""" 

2from .base_model import Model 

3 

4from credoai.utils import global_logger 

5 

6import numpy as np 

7 

8from sklearn.utils import check_array 

9 

10from .constants_model import ( 

11 SKLEARN_LIKE_FRAMEWORKS, 

12 MLP_FRAMEWORKS, 

13 FRAMEWORK_VALIDATION_FUNCTIONS, 

14) 

15 

16 

17class ClassificationModel(Model): 

18 """Class wrapper around classification model to be assessed 

19 

20 ClassificationModel serves as an adapter between arbitrary binary or multi-class 

21 classification models and the evaluations in Lens. Evaluations depend on 

22 ClassificationModel instantiating `predict` and (optionally) `predict_proba` 

23 

24 Parameters 

25 ---------- 

26 name : str 

27 Label of the model 

28 model_like : model_like 

29 A binary or multi-class classification model or pipeline. It must have a 

30 `predict` function that returns an array containing model outputs for each sample. 

31 It can also optionally have a `predict_proba` function that returns array containing 

32 the class label probabilities for each sample. 

33 

34 If the supplied model_like is from the sklearn or xgboost framework, `predict` is assumed 

35 to return a column vector with a single value for each sample (i.e. thresholded predictions). 

36 

37 If the supplied model_like is from the Keras framework, the assumed form of `predict` outputs 

38 depends on the final-layer activation. 

39 If the final layer is softmax, this wrapper assumes the 

40 return value is a is a matrix with shape (n_samples, n_classes) corresponding to probability 

41 values (i.e., without argmax), similar to sklearn.predict_proba. The wrapper applies argmax 

42 where necessary to obtain discrete labels. 

43 If the final layer is sigmoid, this wrapper assumes the return value is an (n_samples, 1) 

44 column vector with per-sample probabilities. The wrapper rounds (.5 as default threshold) 

45 values where necessary to obtain discrete labels. 

46 

47 For custom model_like objects, users may optionally specify a `framework_like` attribute 

48 of type string. framework_like serves as a flag to enable expected functionality to carry over 

49 from an external framework to Lens. Presently "sklearn", "xgboost", and "keras" are supported. 

50 The former two serve as a flags to notify Lens that model_like respects sklearn's predict API 

51 (and the predict_proba API, if relevant). The latter serves as a flag to Lens that model_like 

52 respects Keras's predict API with either a sigmoid or softmax final layer. 

53 

54 tags : optional 

55 Additional metadata to add to model 

56 E.g., {'model_type': 'binary_classification'} 

57 """ 

58 

59 def __init__(self, name: str, model_like=None, tags=None): 

60 super().__init__( 

61 "CLASSIFICATION", 

62 ["predict", "predict_proba"], 

63 ["predict"], 

64 # TODO this will not work once we incorporate PyTorch 

65 # PyTorch allows callables and Module.forward() 

66 # predict not required 

67 name, 

68 model_like, 

69 tags, 

70 ) 

71 

72 def _validate_framework(self): 

73 try: 

74 FRAMEWORK_VALIDATION_FUNCTIONS[self.model_info["framework"]]( 

75 self.model_like, self.model_info 

76 ) 

77 except: 

78 message = """Provided model is from unsupported framework.  

79 Lens behavior has not been tested or assured with unsupported modeling frameworks.""" 

80 global_logger.warning(message) 

81 

82 def __post_init__(self): 

83 """Conditionally updates functionality based on framework""" 

84 # This needs to remain a big if-statement for now if we're going to keep 

85 # all classifiers in one class since we're making direct assignments to the class object 

86 

87 if self.model_info["framework"] in SKLEARN_LIKE_FRAMEWORKS: 

88 func = getattr(self, "predict_proba", None) 

89 if len(self.model_like.classes_) == 2: 

90 self.type = "BINARY_CLASSIFICATION" 

91 # if binary, replace probability array with one-dimensional vector 

92 if func: 

93 self.__dict__["predict_proba"] = lambda x: func(x)[:, 1] 

94 else: 

95 self.type = "MULTICLASS_CLASSIFICATION" 

96 

97 elif self.model_info["framework"] in MLP_FRAMEWORKS: 

98 # TODO change this to '__call__' when adding in general TF and PyTorch 

99 pred_func = getattr(self, "predict", None) 

100 if pred_func: 

101 if self.model_like.layers[-1].output_shape == (None, 1): 

102 # Assumes sigmoid -> probabilities need to be rounded 

103 self.__dict__["predict"] = lambda x: pred_func(x).round() 

104 # Single-output sigmoid is binary by definition 

105 self.type = "BINARY_CLASSIFICATION" 

106 else: 

107 # Assumes softmax -> probabilities need to be argmaxed 

108 self.__dict__["predict"] = lambda x: np.argmax(pred_func(x), axis=1) 

109 if self.model_like.layers[-1].output_shape[1] == 2: 

110 self.type = "BINARY_CLASSIFICATION" 

111 else: 

112 self.type = "MULTICLASS_CLASSIFICATION" 

113 

114 if self.model_like.layers[-1].output_shape == (None, 2): 

115 self.__dict__["predict_proba"] = lambda x: pred_func(x)[:, 1] 

116 elif ( 

117 len(self.model_like.layers[-1].output_shape) == 2 

118 and self.model_like.layers[-1].output_shape[1] == 1 

119 ): 

120 # Sigmoid -> needs to be (n_samples, ) to work with sklearn metrics 

121 self.__dict__["predict_proba"] = lambda x: np.reshape( 

122 pred_func(x), (-1, 1) 

123 ) 

124 elif ( 

125 len(self.model_like.layers[-1].output_shape) == 2 

126 and self.model_like.layers[-1].output_shape[1] > 2 

127 ): 

128 self.__dict__["predict_proba"] = pred_func 

129 else: 

130 pass 

131 # predict_proba is not valid (for now) 

132 

133 elif self.model_info["framework"] == "credoai": 

134 # Functionality for DummyClassifier 

135 if self.model_like.model_like is not None: 

136 self.model_like = self.model_like.model_like 

137 # If the dummy model has a model_like specified, reassign 

138 # the classifier's model_like attribute to match the dummy's 

139 # so that downstream evaluators (ModelProfiler) can use it 

140 

141 self.type = self.model_like.type 

142 # DummyClassifier model type is set in the constructor based on whether it 

143 # is binary or multiclass 

144 

145 # Predict and Predict_Proba should already be specified 

146 

147 

148class DummyClassifier: 

149 """Class wrapper around classification model predictions 

150 

151 This class can be used when a classification model's outputs have been precomputed. 

152 The output include the array containing the predicted class labels and/or the array 

153 containing the class labels probabilities. 

154 Wrap the outputs with this class into a dummy classifier and pass it as 

155 the model to `ClassificationModel`. 

156 

157 Parameters 

158 ---------- 

159 name : str 

160 Label of the model 

161 model_like : model_like, optional 

162 While predictions are pre-computed, the model object, itself, may be of use for 

163 some evaluations (e.g. ModelProfiler). 

164 binary_clf : bool, optional, default = True 

165 Type of classification model. 

166 Used when wrapping with ClassificationModel. 

167 If binary == True, ClassificationModel.type will be set to `BINARY_CLASSIFICATION', 

168 which enables use of binary metrics. 

169 If binary == False, ClassificationModel.type will be set to 'MULTICLASS_CLASSIFICATION', 

170 and use those metrics. 

171 predict_output : array, optional 

172 Array containing per-sample class labels 

173 Corresponds to sklearn-like `predict` output 

174 For NN frameworks (Keras.predict, tf.__call__, torch.foward, etc.), this input assumes argmax 

175 has been applied to the outputs so that they are discrete valued labels 

176 predict_proba_output : array, optional 

177 Array containing the per-sample class probabilities 

178 Corresponds to sklearn-like `predict_proba` output 

179 For NN frameworks (Keras.predict, etc.) this input assumes no post-processing after a 

180 final-layer softmax (general) or sigmoid (binary only) activation 

181 

182 """ 

183 

184 def __init__( 

185 self, 

186 name: str, 

187 model_like=None, 

188 binary_clf=True, 

189 predict_output=None, 

190 predict_proba_output=None, 

191 tags=None, 

192 ): 

193 self.model_like = model_like 

194 self._build_functionality("predict", predict_output) 

195 self._build_functionality("predict_proba", predict_proba_output) 

196 self.name = name 

197 self.tags = tags 

198 self.type = ( 

199 "BINARY_CLASSIFICATION" if binary_clf else "MULTICLASS_CLASSIFICATION" 

200 ) 

201 

202 def _wrap_array(self, array): 

203 return lambda X=None: array 

204 # Keeping X as an optional argument to maintain potential backward compatibility 

205 # Some uses of DummyClassifier may use predict() with no argument 

206 

207 def _build_functionality(self, function_name, array): 

208 if array is not None: 

209 array = check_array(array, ensure_2d=False, allow_nd=True) 

210 self.__dict__[function_name] = self._wrap_array(array)