Coverage for credoai/evaluators/utils/validation.py: 65%

80 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 07:32 +0000

1############# Validation related functionality ################## 

2 

3from pandas import DataFrame, Series 

4 

5from credoai.artifacts.data.tabular_data import TabularData 

6from credoai.artifacts.model.base_model import Model 

7from credoai.utils.common import ValidationError 

8 

9 

10def check_instance(obj, inst_type, message=None): 

11 if not message: 

12 f"Object {obj} should be an instance of {inst_type.__name__}" 

13 if not isinstance(obj, inst_type): 

14 raise ValidationError(message) 

15 

16 

17def check_data_instance(obj, inst_type, name="Data"): 

18 message = f"{name} should be an instance of {inst_type.__name__}" 

19 check_instance(obj, inst_type, message) 

20 

21 

22def check_model_instance(obj, inst_type, name="Model"): 

23 if isinstance(inst_type, tuple): 

24 comp_label = " or ".join([x.__name__ for x in inst_type]) 

25 else: 

26 comp_label = inst_type.__name__ 

27 message = f"{name} should be an instance of {comp_label}" 

28 check_instance(obj, inst_type, message) 

29 

30 

31def check_feature_presence(feature_name, df, name): 

32 if isinstance(df, DataFrame): 

33 if not feature_name in df.columns: 

34 message = f"Feature {feature_name} not found in dataframe {name}" 

35 raise ValidationError(message) 

36 if isinstance(df, Series): 

37 if not df.name == feature_name: 

38 message = f"Feature {feature_name} not found in series {name}" 

39 raise ValidationError(message) 

40 

41 

42def check_existence(obj, name=None): 

43 message = f"Missing object {name}" 

44 if isinstance(obj, (DataFrame, Series)): 

45 if obj is None: 

46 raise ValidationError(message) 

47 else: 

48 return 

49 if not obj: 

50 raise ValidationError(message) 

51 

52 

53def check_requirements_existence(self): 

54 for required_name in self.required_artifacts: 

55 check_existence(vars(self)[required_name], required_name) 

56 

57 

58def check_requirements_deepchecks(self): 

59 # For case when we require at least one dataset 

60 # All supplied datasets must be of correct form 

61 at_least_one_artifact = False 

62 for required_name in self.required_artifacts: 

63 if "data" in required_name: 

64 try: 

65 check_data_instance(vars(self)[required_name], TabularData) 

66 at_least_one_artifact = True 

67 except ValidationError as e: 

68 if vars(self)[required_name]: 

69 # Check if the artifact actually contains anything 

70 # If so, raise the same error 

71 raise ValidationError(e) 

72 else: 

73 # Do nothing. We're simply not going to have this optional artifact 

74 pass 

75 else: 

76 # Check model 

77 try: 

78 check_model_instance(vars(self)[required_name], Model) 

79 at_least_one_artifact = True 

80 except ValidationError as e: 

81 if vars(self)[required_name]: 

82 # Check if model is NoneType 

83 raise ValidationError(e) 

84 else: 

85 # Model is NoneType but model is optional for deepchecks 

86 pass 

87 

88 if not at_least_one_artifact: 

89 raise ValidationError( 

90 "Expected at least one valid artifact. None provided or all objects passed are otherwise invalid" 

91 ) 

92 

93 

94def check_for_nulls(obj, name): 

95 message = f"Detected nulls in {name}" 

96 if obj is not None: 

97 if obj.isnull().values.any(): 

98 raise ValidationError(message) 

99 

100 

101def check_artifact_for_nulls(obj, name): 

102 errors = [] 

103 if obj.X is not None: 

104 if obj.X.isnull().values.any(): 

105 errors.append("X") 

106 if obj.y is not None: 

107 if obj.y.isnull().values.any(): 

108 errors.append("y") 

109 if obj.sensitive_features is not None: 

110 if obj.sensitive_features.isnull().values.any(): 

111 errors.append("sensitive_features") 

112 

113 if len(errors) > 0: 

114 message = f"Detected null values in {name}, in attributes: {','.join(errors)}" 

115 raise ValidationError(message) 

116 

117 

118def check_model_type(obj, type): 

119 if obj.type != type: 

120 message = f"Model of type {obj.type}, expected: {type}" 

121 raise ValidationError(message)