Coverage for credoai/evaluators/deepchecks_credoai.py: 92%

40 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-13 21:56 +0000

1from typing import List, Optional 

2 

3from connect.evidence.deepchecks_evidence import DeepchecksContainer 

4from deepchecks.core import BaseCheck 

5from deepchecks.tabular import Dataset, Suite 

6 

7from credoai.evaluators.evaluator import Evaluator 

8from credoai.evaluators.utils.validation import check_requirements_deepchecks 

9from credoai.modules.constants_deepchecks import DEFAULT_CHECKS 

10 

11 

12class Deepchecks(Evaluator): 

13 """ 

14 `Deepchecks <https://docs.deepchecks.com/stable/getting-started/welcome.html?utm_campaign=/&utm_medium=referral&utm_source=deepchecks.com>`_ evaluator for Credo AI (Experimental) 

15 

16 This evaluator enables running of deepchecks `checks` and passing the results to 

17 the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format. 

18 See `model evaluation <https://docs.deepchecks.com/stable/api/generated/deepchecks.tabular.checks.model_evaluation.html>`_ 

19 and `SuiteResults <https://docs.deepchecks.com/stable/api/generated/deepchecks.core.SuiteResult.html>`_ 

20 and `create a custom suite <https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_create_a_custom_suite.html>`_ 

21 for more details. 

22 

23 This evaluator provides some redundant functionality. For instance, metrics which can be 

24 calculated using the Performance evaluator can potentially be calculated by deepchecks 

25 (and thus this evaluator) as well. The same applies to the FeatureDrift evaluator. 

26 When a choice exists, the best practice dictates that the "Lens native" evaluator should 

27 be used in preference to deepchecks, since output formats of other evaluators is generally 

28 consistent, while this deepchecks evaluator outputs results in a highly structured JSON format. 

29 

30 

31 Parameters 

32 ---------- 

33 suite_name : str, optional 

34 Name of the supplied deepchecks suite 

35 checks : List[BaseCheck], optional 

36 A list of instantiated deepchecks checks objects (e.g. BoostingOverfit, CalibrationScore) 

37 """ 

38 

39 required_artifacts = {"model", "assessment_data", "training_data"} 

40 # all artifacts are OPTIONAL; All that's required is that at least one of these is 

41 # provided. The evaluator's custom validation function checks for this. 

42 

43 def __init__( 

44 self, 

45 suite_name: Optional[str] = "Credo_Deepchecks_Suite", 

46 checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS, 

47 ): 

48 super().__init__() 

49 self.suite_name = suite_name 

50 # TODO allow list of strings? 

51 self.checks = checks 

52 

53 def _validate_arguments(self): 

54 """ 

55 Check that basic requirements for the run of an evaluator are met. 

56 """ 

57 check_requirements_deepchecks(self) 

58 

59 def _setup(self): 

60 # Set artifacts 

61 

62 # All artifacts are optional and thus any could be NoneType 

63 # Internal (lens) validation ensures that at least one artifact is valid 

64 self.model = self.model 

65 self.test_dataset = self.assessment_data 

66 self.train_dataset = self.training_data 

67 

68 def evaluate(self): 

69 """ 

70 Execute any data/model processing required for the evaluator. 

71 

72 Populates the self.results object. 

73 

74 Returns 

75 ------- 

76 self 

77 """ 

78 self._setup_deepchecks() 

79 self.run_suite() 

80 

81 self.results = [DeepchecksContainer(self.suite_name, self.suite_results)] 

82 

83 return self 

84 

85 def _setup_deepchecks(self): 

86 if self.test_dataset: 

87 self.test_dataset = Dataset( 

88 df=self.test_dataset.X, label=self.test_dataset.y 

89 ) 

90 

91 if self.train_dataset: 

92 self.train_dataset = Dataset( 

93 df=self.train_dataset.X, label=self.train_dataset.y 

94 ) 

95 

96 if self.model: 

97 self.deepchecks_model = self.model.model_like 

98 

99 self.suite = Suite(name=self.suite_name) 

100 for check in self.checks: 

101 self.suite.add(check) 

102 # doing this as a for-loop list seems to be the only way 

103 # deepchecks won't let you pass a whole list of checks, which is...silly? 

104 

105 def run_suite(self): 

106 if self.train_dataset and self.test_dataset: 

107 self.suite_results = self.suite.run( 

108 train_dataset=self.train_dataset, 

109 test_dataset=self.test_dataset, 

110 model=self.model.model_like, 

111 ) 

112 

113 elif self.train_dataset: 

114 self.suite_results = self.suite.run( 

115 train_dataset=self.train_dataset, model=self.model.model_like 

116 ) 

117 else: 

118 # Deepchecks expects the user to specify a train dataset if only a single 

119 # dataset is specified, even if that single dataset is supposed to be a test set 

120 # This doesn't really make sense and makes client code (like ours) less readable. 

121 # Nevertheless, there's no way around it. 

122 self.suite_results = self.suite.run( 

123 train_dataset=self.test_dataset, model=self.model.model_like 

124 )