Coverage for credoai/evaluators/utils/validation.py: 65%
80 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 07:32 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-08 07:32 +0000
1############# Validation related functionality ##################
3from pandas import DataFrame, Series
5from credoai.artifacts.data.tabular_data import TabularData
6from credoai.artifacts.model.base_model import Model
7from credoai.utils.common import ValidationError
10def check_instance(obj, inst_type, message=None):
11 if not message:
12 f"Object {obj} should be an instance of {inst_type.__name__}"
13 if not isinstance(obj, inst_type):
14 raise ValidationError(message)
17def check_data_instance(obj, inst_type, name="Data"):
18 message = f"{name} should be an instance of {inst_type.__name__}"
19 check_instance(obj, inst_type, message)
22def check_model_instance(obj, inst_type, name="Model"):
23 if isinstance(inst_type, tuple):
24 comp_label = " or ".join([x.__name__ for x in inst_type])
25 else:
26 comp_label = inst_type.__name__
27 message = f"{name} should be an instance of {comp_label}"
28 check_instance(obj, inst_type, message)
31def check_feature_presence(feature_name, df, name):
32 if isinstance(df, DataFrame):
33 if not feature_name in df.columns:
34 message = f"Feature {feature_name} not found in dataframe {name}"
35 raise ValidationError(message)
36 if isinstance(df, Series):
37 if not df.name == feature_name:
38 message = f"Feature {feature_name} not found in series {name}"
39 raise ValidationError(message)
42def check_existence(obj, name=None):
43 message = f"Missing object {name}"
44 if isinstance(obj, (DataFrame, Series)):
45 if obj is None:
46 raise ValidationError(message)
47 else:
48 return
49 if not obj:
50 raise ValidationError(message)
53def check_requirements_existence(self):
54 for required_name in self.required_artifacts:
55 check_existence(vars(self)[required_name], required_name)
58def check_requirements_deepchecks(self):
59 # For case when we require at least one dataset
60 # All supplied datasets must be of correct form
61 at_least_one_artifact = False
62 for required_name in self.required_artifacts:
63 if "data" in required_name:
64 try:
65 check_data_instance(vars(self)[required_name], TabularData)
66 at_least_one_artifact = True
67 except ValidationError as e:
68 if vars(self)[required_name]:
69 # Check if the artifact actually contains anything
70 # If so, raise the same error
71 raise ValidationError(e)
72 else:
73 # Do nothing. We're simply not going to have this optional artifact
74 pass
75 else:
76 # Check model
77 try:
78 check_model_instance(vars(self)[required_name], Model)
79 at_least_one_artifact = True
80 except ValidationError as e:
81 if vars(self)[required_name]:
82 # Check if model is NoneType
83 raise ValidationError(e)
84 else:
85 # Model is NoneType but model is optional for deepchecks
86 pass
88 if not at_least_one_artifact:
89 raise ValidationError(
90 "Expected at least one valid artifact. None provided or all objects passed are otherwise invalid"
91 )
94def check_for_nulls(obj, name):
95 message = f"Detected nulls in {name}"
96 if obj is not None:
97 if obj.isnull().values.any():
98 raise ValidationError(message)
101def check_artifact_for_nulls(obj, name):
102 errors = []
103 if obj.X is not None:
104 if obj.X.isnull().values.any():
105 errors.append("X")
106 if obj.y is not None:
107 if obj.y.isnull().values.any():
108 errors.append("y")
109 if obj.sensitive_features is not None:
110 if obj.sensitive_features.isnull().values.any():
111 errors.append("sensitive_features")
113 if len(errors) > 0:
114 message = f"Detected null values in {name}, in attributes: {','.join(errors)}"
115 raise ValidationError(message)
118def check_model_type(obj, type):
119 if obj.type != type:
120 message = f"Model of type {obj.type}, expected: {type}"
121 raise ValidationError(message)