diff --git a/CHANGELOG.md b/CHANGELOG.md index e02e761e..5207b800 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,11 @@ - (`core`) New way to add a variable to a dictionary using a complete specification. - (`sklearn`) `Text` Khiops type support at the estimator level. +### Changed +- (`core`) Dictionary API and analysis and coclustering reports, + when a requested key is not found in getters, return a ``None`` value instead + of raising a `KeyError` exception. + ### Fixed - (General) Inconsistency between the `tools.download_datasets` function and the current samples directory according to `core.api.get_samples_dir()`. diff --git a/khiops/core/analysis_results.py b/khiops/core/analysis_results.py index ec3f1b78..63a06dbe 100644 --- a/khiops/core/analysis_results.py +++ b/khiops/core/analysis_results.py @@ -575,14 +575,10 @@ def get_variable_statistics(self, variable_name): Returns ------- `VariableStatistics` - The statistics of the specified variable. - - Raises - ------ - `KeyError` - If no variable with the specified names exist. + The statistics of the specified variable. A ``None`` value is returned + if the variable name is not found. """ - return self._variables_statistics_by_name[variable_name] + return self._variables_statistics_by_name.get(variable_name) def get_tree(self, tree_name): """Returns the tree with the specified name @@ -595,14 +591,10 @@ def get_tree(self, tree_name): Returns ------- `Tree` - The tree which has the specified name. - - Raises - ------ - `KeyError` - If no tree with the specified name exists. + The tree which has the specified name. A ``None`` value is returned + if the tree name is not found. """ - return self._trees_by_name[tree_name] + return self._trees_by_name.get(tree_name) def to_dict(self): """Transforms this instance to a dict with the Khiops JSON file structure""" @@ -1045,15 +1037,12 @@ def get_variable_pair_statistics(self, variable_name_1, variable_name_2): ------- `VariablePairStatistics` The statistics of the specified pair of variables. - - Raises - ------ - `KeyError` - If no pair with the specified names exist. + A ``None`` value is returned if no pair with the + specified names exist. """ - return self._variables_pairs_statistics_by_name[ + return self._variables_pairs_statistics_by_name.get( (variable_name_1, variable_name_2) - ] + ) def to_dict(self): """Transforms this instance to a dict with the Khiops JSON file structure""" @@ -1306,14 +1295,10 @@ def get_predictor(self, predictor_name): Returns ------- `TrainedPredictor` - The predictor object for the specified name. - - Raises - ------ - `KeyError` - If there is no predictor with the specified name. + The predictor object for the specified name. A ``None`` value is + returned if the predictor name is not found. """ - return self._trained_predictors_by_name[predictor_name] + return self._trained_predictors_by_name.get(predictor_name) def get_snb_predictor(self): """Returns the Selective Naive Bayes predictor @@ -1321,12 +1306,8 @@ def get_snb_predictor(self): Returns ------- `TrainedPredictor` - The predictor object for "Selective Naive Bayes". - - Raises - ------ - `KeyError` - If there is no predictor named "Selective Naive Bayes". + The predictor object for "Selective Naive Bayes". A ``None`` value is + returned if there is no predictor named "Selective Naive Bayes". """ return self.get_predictor("Selective Naive Bayes") @@ -1588,14 +1569,10 @@ def get_predictor_performance(self, predictor_name): Returns ------- `PredictorPerformance` - The performance metrics for the specified predictor. - - Raises - ------ - `KeyError` - If no predictor with the specified name exists. + The performance metrics for the specified predictor. A ``None`` value + is returned if the predictor name is not found. """ - return self._predictors_performance_by_name[predictor_name] + return self._predictors_performance_by_name.get(predictor_name) def get_snb_performance(self): """Returns the performance metrics for the Selective Naive Bayes predictor @@ -1625,21 +1602,20 @@ def get_regressor_rec_curve(self, regressor_name): Returns ------- `PredictorCurve` - The REC curve for the specified regressor. + The REC curve for the specified regressor. A ``None`` value is + returned if the regressor name is not found. Raises ------ `ValueError` - If no regressor curves available. ( - `KeyError` - If no regressor with the specified name exists. + If no regressor curves available. """ if self.learning_task != "Regression analysis": raise ValueError("REC curves are available only for regression") for curve in self.regression_rec_curves: if curve.name == regressor_name: return curve - raise KeyError(regressor_name) + return None def get_snb_rec_curve(self): """Returns the REC curve for the Selective Naive Bayes regressor @@ -1675,12 +1651,8 @@ def get_classifier_lift_curve(self, classifier_name, target_value): ------- `PredictorCurve` The lift curve for the specified classifier and target value. - - Raises - ------ - `KeyError` - If no classifier with the specified exists or no target value with the - specified name exists. + A ``None`` value is returned if no classifier with the specified + exists or no target value with the specified name exists. """ if self.learning_task != "Classification analysis": raise ValueError("Lift curves are available only for classification") @@ -1701,8 +1673,7 @@ def get_classifier_lift_curve(self, classifier_name, target_value): for lift_curve in self.classification_lift_curves[i]: if lift_curve.name == classifier_name: return lift_curve - raise KeyError(classifier_name) - raise KeyError(target_value) + return None def get_snb_lift_curve(self, target_value): """Returns lift curve for the Selective Naive Bayes clf. given a target value @@ -1716,14 +1687,8 @@ def get_snb_lift_curve(self, target_value): ------- `PredictorCurve` The lift curve of the Selective Naive Bayes classifier for the specified - target value. - - Raises - ------ - `ValueError` - If the Selective Naive Bayes classifier information is not available. - `KeyError` - If no target value with the specified name exists. + target value. A ``None`` value is returned if no Selective Naive Bayes + classifier information is available. """ if self.learning_task != "Classification analysis": raise ValueError("Lift curves are available only for classification") @@ -1732,10 +1697,7 @@ def get_snb_lift_curve(self, target_value): for lift_curve in self.classification_lift_curves[i]: if lift_curve.name == "Selective Naive Bayes": return lift_curve - raise ValueError( - "Selective Naive Bayes classifier information not available" - ) - raise KeyError(target_value) + return None def to_dict(self): """Transforms this instance to a dict with the Khiops JSON file structure""" diff --git a/khiops/core/coclustering_results.py b/khiops/core/coclustering_results.py index 5db9ad40..da3068d8 100644 --- a/khiops/core/coclustering_results.py +++ b/khiops/core/coclustering_results.py @@ -479,14 +479,10 @@ def get_dimension(self, dimension_name): Returns ------- `CoclusteringDimension` - The specified dimension. - - Raises - ------ - `KeyError` - If no dimension with the specified names exist. + The specified dimension. A ``None`` value is returned + if the dimension name is not found. """ - return self._dimensions_by_name[dimension_name] + return self._dimensions_by_name.get(dimension_name) def to_dict(self): """Transforms this instance to a dict with the Khiops JSON file structure""" @@ -1062,14 +1058,10 @@ def get_part(self, part_name): Returns ------- `CoclusteringDimensionPart` - The part with the specified name. - - Raises - ------ - `KeyError` - If there is no part with the specified name. + The part with the specified name. A ``None`` value is returned + if the part name is not found. """ - return self._parts_by_name[part_name] + return self._parts_by_name.get(part_name) def get_cluster(self, cluster_name): """Returns the specified cluster @@ -1082,14 +1074,10 @@ def get_cluster(self, cluster_name): Returns ------- `CoclusteringCluster` - The specified cluster. - - Raises - ------ - `KeyError` - If there is no cluster with the specified name. + The specified cluster. A ``None`` value is returned + if the cluster name is not found. """ - return self._clusters_by_name[cluster_name] + return self._clusters_by_name.get(cluster_name) def to_dict(self, report_type): """Transforms this instance to a dict with the Khiops JSON file structure diff --git a/khiops/core/dictionary.py b/khiops/core/dictionary.py index f17186cc..e3638b73 100644 --- a/khiops/core/dictionary.py +++ b/khiops/core/dictionary.py @@ -238,15 +238,10 @@ def get_dictionary(self, dictionary_name): Returns ------- `Dictionary` - The specified dictionary. - - Raises - ------ - `KeyError` - If no dictionary with the specified name exist. - + The specified dictionary. A ``None`` value is returned + if the dictionary name is not found. """ - return self._dictionaries_by_name[dictionary_name] + return self._dictionaries_by_name.get(dictionary_name) def add_dictionary(self, dictionary): """Adds a dictionary to this domain @@ -409,20 +404,16 @@ def _get_dictionary_at_data_path_legacy(self, data_path): data_path_parts = data_path.split("`") source_dictionary_name = data_path_parts[0] - try: - dictionary = self.get_dictionary(source_dictionary_name) - except KeyError as error: - raise ValueError( - f"Source dictionary not found: '{source_dictionary_name}'" - ) from error + dictionary = self.get_dictionary(source_dictionary_name) + if dictionary is None: + raise ValueError(f"Source dictionary not found: '{source_dictionary_name}'") for table_variable_name in data_path_parts[1:]: - try: - table_variable = dictionary.get_variable(table_variable_name) - except KeyError as error: + table_variable = dictionary.get_variable(table_variable_name) + if table_variable is None: raise ValueError( f"Table variable '{table_variable_name}' in data path not found" - ) from error + ) if table_variable.type not in ["Table", "Entity"]: raise ValueError( @@ -430,13 +421,12 @@ def _get_dictionary_at_data_path_legacy(self, data_path): f"in data path is of type '{table_variable.type}'" ) - try: - dictionary = self.get_dictionary(table_variable.object_type) - except KeyError as error: + dictionary = self.get_dictionary(table_variable.object_type) + if dictionary is None: raise ValueError( f"Table variable '{table_variable_name}' in data path " f"points to unknown dictionary '{table_variable.object_type}'" - ) from error + ) return dictionary def _get_dictionary_at_data_path(self, data_path): @@ -447,32 +437,34 @@ def _get_dictionary_at_data_path(self, data_path): # - either it is found as such, # - or it is a Table or Entity variable whose table needs to be looked-up first_table_variable_name = data_path_parts[0] - try: - dictionary = self.get_dictionary(first_table_variable_name) - except KeyError as error: + + dictionary = self.get_dictionary(first_table_variable_name) + if dictionary is None: for a_dictionary in self.dictionaries: try: table_variable = a_dictionary.get_variable( first_table_variable_name ) - if table_variable.type not in ["Table", "Entity"]: - raise ValueError from error - dictionary = self.get_dictionary(table_variable.object_type) - break - except (KeyError, ValueError): + if table_variable is not None: + if table_variable.type not in ["Table", "Entity"]: + raise ValueError( + f"Variable '{table_variable}' " + "must be of type 'Table' or 'Entity'" + ) + dictionary = self.get_dictionary(table_variable.object_type) + if dictionary is not None: + break + except ValueError: continue else: - raise ValueError( - f"Dictionary not found in data path: '{data_path}'" - ) from error + raise ValueError(f"Dictionary not found in data path: '{data_path}'") for table_variable_name in data_path_parts[1:]: - try: - table_variable = dictionary.get_variable(table_variable_name) - except KeyError as error: + table_variable = dictionary.get_variable(table_variable_name) + if table_variable is None: raise ValueError( f"Table variable '{table_variable_name}' in data path not found" - ) from error + ) if table_variable.type not in ["Table", "Entity"]: raise ValueError( @@ -480,13 +472,12 @@ def _get_dictionary_at_data_path(self, data_path): f"in data path is of type '{table_variable.type}'" ) - try: - dictionary = self.get_dictionary(table_variable.object_type) - except KeyError as error: + dictionary = self.get_dictionary(table_variable.object_type) + if dictionary is None: raise ValueError( f"Table variable '{table_variable_name}' in data path " f"points to unknown dictionary '{table_variable.object_type}'" - ) from error + ) return dictionary def export_khiops_dictionary_file(self, kdic_file_path): @@ -740,10 +731,7 @@ def copy(self): def get_value(self, key): """Returns the metadata value associated to the specified key - Raises - ------ - `KeyError` - If the key is not found + A ``None`` value is returned if the key is not found. """ return self.meta_data.get_value(key) @@ -770,14 +758,10 @@ def get_variable(self, variable_name): Returns ------- `Variable` - The specified variable. - - Raises - ------ - `KeyError` - If no variable with the specified name exists. + The specified variable. A ``None`` value is returned + if the variable name is not found. """ - return self._variables_by_name[variable_name] + return self._variables_by_name.get(variable_name) def get_variable_block(self, variable_block_name): """Returns the specified variable block @@ -790,15 +774,11 @@ def get_variable_block(self, variable_block_name): Returns ------- `VariableBlock` - The specified variable. - - Raises - ------ - `KeyError` - If no variable block with the specified name exists. + The specified variable. A ``None`` value is returned + if the variable block name is not found. """ - return self._variable_blocks_by_name[variable_block_name] + return self._variable_blocks_by_name.get(variable_block_name) def add_variable(self, variable): """Adds a variable to this dictionary @@ -1282,10 +1262,7 @@ def copy(self): def get_value(self, key): """Returns the metadata value associated to the specified key - Raises - ------ - `KeyError` - If no metadata has this key. + A ``None`` value is returned if the key is not found. """ return self.meta_data.get_value(key) @@ -1541,10 +1518,7 @@ def remove_variable(self, variable): def get_value(self, key): """Returns the metadata value associated to the specified key - Raises - ------ - `KeyError` - If ``key`` is not found + A ``None`` value is returned if the key is not found. """ return self.meta_data.get_value(key) @@ -2019,14 +1993,13 @@ def get_value(self, key): Returns ------- int, str or float - The value at the specified key + The value at the specified key. A ``None`` value is returned + if the key is not found. Raises ------ `TypeError` If ``key`` is not str. - `KeyError` - If ``key`` is not found. """ # Check the argument types if not is_string_like(key): @@ -2036,7 +2009,7 @@ def get_value(self, key): for i, stored_key in enumerate(self.keys): if stored_key == key: return self.values[i] - raise KeyError(key) + return None def add_value(self, key, value): """Adds a value at the specified key diff --git a/tests/test_core.py b/tests/test_core.py index 7ee58cb7..ceee1045 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1409,8 +1409,9 @@ def _test_preparation_report_accessors( ) # Test anomalous access - with self.assertRaises(KeyError): - report.get_variable_statistics("INEXISTENT VARIABLE NAME") + self.assertEqual( + None, report.get_variable_statistics("INEXISTENT VARIABLE NAME") + ) def _test_bivariate_preparation_report_accessors( self, result_file_name, report, expected_outputs @@ -1433,8 +1434,10 @@ def _test_bivariate_preparation_report_accessors( ) # Test anomalous access - with self.assertRaises(KeyError): - report.get_variable_pair_statistics("INEXISTENT VARIABLE", "PAIR NAME") + self.assertEqual( + None, + report.get_variable_pair_statistics("INEXISTENT VARIABLE", "PAIR NAME"), + ) def _test_modeling_report_accessors( self, result_file_name, report, expected_outputs @@ -1455,8 +1458,7 @@ def _test_modeling_report_accessors( ) # Test anomalous access - with self.assertRaises(KeyError): - report.get_predictor("INEXISTENT REPORT NAME") + self.assertIsNone(report.get_predictor("INEXISTENT REPORT NAME")) def _test_evaluation_report_accessors( self, result_file_name, report, expected_outputs @@ -1481,8 +1483,9 @@ def _test_evaluation_report_accessors( ) # Test anomalous access - with self.assertRaises(KeyError): - report.get_predictor_performance("INEXISTENT REPORT NAME") + self.assertEqual( + None, report.get_predictor_performance("INEXISTENT REPORT NAME") + ) # Test anomalous access to performance objects for predictor_name in report.get_predictor_names(): @@ -1510,15 +1513,24 @@ def _test_evaluation_report_accessors( else: report.get_classifier_lift_curve(predictor_name, "INEXISTENT VALUE") if report.learning_task == "Classification analysis": - with self.assertRaises(KeyError): - report.get_classifier_lift_curve(predictor_name, "INEXISTENT VALUE") - with self.assertRaises(KeyError): - if report.learning_task == "Classification analysis": + self.assertEqual( + None, + report.get_classifier_lift_curve( + predictor_name, "INEXISTENT VALUE" + ), + ) + + if report.learning_task == "Classification analysis": + self.assertEqual( + None, report.get_classifier_lift_curve( "INEXISTENT PREDICTOR", report.classification_target_values[0] - ) - else: - report.get_regressor_rec_curve("INEXISTENT PREDICTOR") + ), + ) + else: + self.assertEqual( + None, report.get_regressor_rec_curve("INEXISTENT PREDICTOR") + ) # Test anomalous access to SNB curves with self.assertRaises(ValueError): @@ -1527,8 +1539,7 @@ def _test_evaluation_report_accessors( else: report.get_snb_lift_curve("INEXISTENT VALUE") if report.learning_task == "Classification analysis": - with self.assertRaises(KeyError): - report.get_snb_lift_curve("INEXISTENT VALUE") + self.assertIsNone(report.get_snb_lift_curve("INEXISTENT VALUE")) def _test_performance_report_accessors( self, result_file_name, learning_task, report, expected_outputs @@ -1796,8 +1807,7 @@ def test_dictionary_simple_edge_cases(self): with self.assertRaises(TypeError): meta_data.remove_key(object()) meta_data.add_value("key", "value") - with self.assertRaises(KeyError): - meta_data.get_value("INEXISTENT KEY") + self.assertIsNone(meta_data.get_value("INEXISTENT KEY")) with self.assertRaises(ValueError): meta_data.add_value("key", "REPEATED KEY") with self.assertRaises(KeyError): @@ -1967,8 +1977,8 @@ def test_dictionary_accessors(self): self.assertEqual(block, removed_block) self.assertIsNone(block_variable.variable_block) self.assertEqual(block.variables, []) - with self.assertRaises(KeyError): - dictionary_copy.get_variable_block(block.name) + # Nonexistent variable block name + self.assertIsNone(dictionary_copy.get_variable_block(block.name)) # Add and remove the block and remove the native variables dictionary_copy.remove_variable(block_variable.name) @@ -1981,10 +1991,12 @@ def test_dictionary_accessors(self): self.assertEqual(block, removed_block) self.assertEqual(block.variables, [block_variable]) self.assertEqual(block_variable.block, removed_block) - with self.assertRaises(KeyError): - dictionary_copy.get_variable(block_variable.name) - with self.assertRaises(KeyError): - dictionary_copy.get_variable_block(block.name) + # Nonexistent variable block name + self.assertEqual( + None, dictionary_copy.get_variable(block_variable.name) + ) + # Nonexistent variable name + self.assertIsNone(dictionary_copy.get_variable_block(block.name)) # Set the block as non-native add, and remove it dictionary_copy.add_variable_block(block) @@ -1999,10 +2011,12 @@ def test_dictionary_accessors(self): self.assertEqual(block, removed_block) self.assertEqual(block.variables, [block_variable]) self.assertEqual(block_variable.block, removed_block) - with self.assertRaises(KeyError): - dictionary_copy.get_variable(block_variable.name) - with self.assertRaises(KeyError): - dictionary_copy.get_variable_block(block.name) + # Nonexistent variable block name + self.assertEqual( + None, dictionary_copy.get_variable(block_variable.name) + ) + # Nonexistent variable name + self.assertIsNone(dictionary_copy.get_variable_block(block.name)) # Test Dictionary variable and block accessors by cleaning the dict. for variable_name in [