Coverage for credoai/evaluators/deepchecks_credoai.py: 92%
40 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
1from typing import List, Optional
3from connect.evidence.deepchecks_evidence import DeepchecksContainer
4from deepchecks.core import BaseCheck
5from deepchecks.tabular import Dataset, Suite
7from credoai.evaluators.evaluator import Evaluator
8from credoai.evaluators.utils.validation import check_requirements_deepchecks
9from credoai.modules.constants_deepchecks import DEFAULT_CHECKS
12class Deepchecks(Evaluator):
13 """
14 `Deepchecks <https://docs.deepchecks.com/stable/getting-started/welcome.html?utm_campaign=/&utm_medium=referral&utm_source=deepchecks.com>`_ evaluator for Credo AI (Experimental)
16 This evaluator enables running of deepchecks `checks` and passing the results to
17 the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
18 See `model evaluation <https://docs.deepchecks.com/stable/api/generated/deepchecks.tabular.checks.model_evaluation.html>`_
19 and `SuiteResults <https://docs.deepchecks.com/stable/api/generated/deepchecks.core.SuiteResult.html>`_
20 and `create a custom suite <https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_create_a_custom_suite.html>`_
21 for more details.
23 This evaluator provides some redundant functionality. For instance, metrics which can be
24 calculated using the Performance evaluator can potentially be calculated by deepchecks
25 (and thus this evaluator) as well. The same applies to the FeatureDrift evaluator.
26 When a choice exists, the best practice dictates that the "Lens native" evaluator should
27 be used in preference to deepchecks, since output formats of other evaluators is generally
28 consistent, while this deepchecks evaluator outputs results in a highly structured JSON format.
31 Parameters
32 ----------
33 suite_name : str, optional
34 Name of the supplied deepchecks suite
35 checks : List[BaseCheck], optional
36 A list of instantiated deepchecks checks objects (e.g. BoostingOverfit, CalibrationScore)
37 """
39 required_artifacts = {"model", "assessment_data", "training_data"}
40 # all artifacts are OPTIONAL; All that's required is that at least one of these is
41 # provided. The evaluator's custom validation function checks for this.
43 def __init__(
44 self,
45 suite_name: Optional[str] = "Credo_Deepchecks_Suite",
46 checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
47 ):
48 super().__init__()
49 self.suite_name = suite_name
50 # TODO allow list of strings?
51 self.checks = checks
53 def _validate_arguments(self):
54 """
55 Check that basic requirements for the run of an evaluator are met.
56 """
57 check_requirements_deepchecks(self)
59 def _setup(self):
60 # Set artifacts
62 # All artifacts are optional and thus any could be NoneType
63 # Internal (lens) validation ensures that at least one artifact is valid
64 self.model = self.model
65 self.test_dataset = self.assessment_data
66 self.train_dataset = self.training_data
68 def evaluate(self):
69 """
70 Execute any data/model processing required for the evaluator.
72 Populates the self.results object.
74 Returns
75 -------
76 self
77 """
78 self._setup_deepchecks()
79 self.run_suite()
81 self.results = [DeepchecksContainer(self.suite_name, self.suite_results)]
83 return self
85 def _setup_deepchecks(self):
86 if self.test_dataset:
87 self.test_dataset = Dataset(
88 df=self.test_dataset.X, label=self.test_dataset.y
89 )
91 if self.train_dataset:
92 self.train_dataset = Dataset(
93 df=self.train_dataset.X, label=self.train_dataset.y
94 )
96 if self.model:
97 self.deepchecks_model = self.model.model_like
99 self.suite = Suite(name=self.suite_name)
100 for check in self.checks:
101 self.suite.add(check)
102 # doing this as a for-loop list seems to be the only way
103 # deepchecks won't let you pass a whole list of checks, which is...silly?
105 def run_suite(self):
106 if self.train_dataset and self.test_dataset:
107 self.suite_results = self.suite.run(
108 train_dataset=self.train_dataset,
109 test_dataset=self.test_dataset,
110 model=self.model.model_like,
111 )
113 elif self.train_dataset:
114 self.suite_results = self.suite.run(
115 train_dataset=self.train_dataset, model=self.model.model_like
116 )
117 else:
118 # Deepchecks expects the user to specify a train dataset if only a single
119 # dataset is specified, even if that single dataset is supposed to be a test set
120 # This doesn't really make sense and makes client code (like ours) less readable.
121 # Nevertheless, there's no way around it.
122 self.suite_results = self.suite.run(
123 train_dataset=self.test_dataset, model=self.model.model_like
124 )