Coverage for credoai/evaluators/utils/validation.py: 70%
91 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-13 21:56 +0000
1############# Validation related functionality ##################
3import inspect
5import numpy as np
6import pandas as pd
8try:
9 tf_exists = True
10 import tensorflow as tf
11except ImportError:
12 tf_exists = False
14from credoai.artifacts.data.tabular_data import TabularData
15from credoai.artifacts.model.base_model import Model
16from credoai.utils import global_logger
17from credoai.utils.common import ValidationError
19##############################
20# Checking individual artifacts
21##############################
24def check_instance(obj, inst_type, message=None):
25 if not message:
26 message = f"Object {obj} should be an instance of {inst_type.__name__}"
27 if not isinstance(obj, inst_type):
28 raise ValidationError(message)
31def check_data_instance(obj, inst_type, name="Data"):
32 message = f"{name} should be an instance of {inst_type.__name__}"
33 check_instance(obj, inst_type, message)
36def check_model_instance(obj, inst_type, name="Model"):
37 if isinstance(inst_type, tuple):
38 comp_label = " or ".join([x.__name__ for x in inst_type])
39 else:
40 comp_label = inst_type.__name__
41 message = f"{name} should be an instance of {comp_label}"
42 check_instance(obj, inst_type, message)
45def check_feature_presence(feature_name, df, name):
46 if isinstance(df, pd.DataFrame):
47 if not feature_name in df.columns:
48 message = f"Feature {feature_name} not found in dataframe {name}"
49 raise ValidationError(message)
50 if isinstance(df, pd.Series):
51 if not df.name == feature_name:
52 message = f"Feature {feature_name} not found in series {name}"
53 raise ValidationError(message)
56def check_existence(obj, name=None):
57 message = f"Missing object {name}"
58 if isinstance(obj, (pd.DataFrame, pd.Series)):
59 if obj.empty:
60 raise ValidationError(message)
61 elif obj is None or not obj:
62 raise ValidationError(message)
65def check_nulls_by_data_type(data):
66 nulls = False
67 if isinstance(data, (pd.DataFrame, pd.Series)):
68 nulls = data.isnull().to_numpy().any()
69 if isinstance(data, np.ndarray):
70 nulls = np.isnan(data).any()
71 if tf_exists and isinstance(data, tf.Tensor):
72 nulls = tf.reduce_any(tf.math.is_nan(data))
73 if (
74 tf_exists and isinstance(data, (tf.data.Dataset, tf.keras.utils.Sequence))
75 ) or inspect.isgeneratorfunction(data):
76 message = """
77 Evaluator Validation: Checking for nulls in generator-based or mapped data is not currently
78 supported. Please be sure to sanitize your data. Downstream errors may arise due to nulls in
79 image or other tensor data.
80 """
81 global_logger.warning(message)
82 return nulls
85#################################
86# Checking evaluator requirements
87#################################
90def check_data_for_nulls(obj, name, check_X=True, check_y=True, check_sens=True):
91 errors = []
92 if check_X and obj.X is not None:
93 if check_nulls_by_data_type(obj.X):
94 errors.append("X")
95 if check_y and obj.y is not None:
96 if check_nulls_by_data_type(obj.y):
97 errors.append("y")
98 if check_sens and obj.sensitive_features is not None:
99 if check_nulls_by_data_type(obj.sensitive_features):
100 errors.append("sensitive_features")
102 if len(errors) > 0:
103 message = f"Detected null values in {name}, in attributes: {','.join(errors)}"
104 raise ValidationError(message)
107def check_requirements_existence(self):
108 for required_name in self.required_artifacts:
109 check_existence(vars(self)[required_name], required_name)
112def check_requirements_deepchecks(self):
113 # For case when we require at least one dataset
114 # All supplied datasets must be of correct form
115 at_least_one_artifact = False
116 for required_name in self.required_artifacts:
117 if "data" in required_name:
118 try:
119 check_data_instance(vars(self)[required_name], TabularData)
120 at_least_one_artifact = True
121 except ValidationError as e:
122 if vars(self)[required_name]:
123 # Check if the artifact actually contains anything
124 # If so, raise the same error
125 raise ValidationError(e)
126 else:
127 # Do nothing. We're simply not going to have this optional artifact
128 pass
129 else:
130 # Check model
131 try:
132 check_model_instance(vars(self)[required_name], Model)
133 at_least_one_artifact = True
134 except ValidationError as e:
135 if vars(self)[required_name]:
136 # Check if model is NoneType
137 raise ValidationError(e)
138 else:
139 # Model is NoneType but model is optional for deepchecks
140 pass
142 if not at_least_one_artifact:
143 raise ValidationError(
144 "Expected at least one valid artifact. None provided or all objects passed are otherwise invalid"
145 )