diff --git a/doc/samples/samples_sklearn.rst b/doc/samples/samples_sklearn.rst index e3fa6e25..b2ace60b 100644 --- a/doc/samples/samples_sklearn.rst +++ b/doc/samples/samples_sklearn.rst @@ -168,9 +168,8 @@ Samples # Create the dataset spec and the target X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, } @@ -224,18 +223,12 @@ Samples # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, "AccidentId"), + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } # Load the target variable "Gravity" @@ -411,16 +404,14 @@ Samples # Create the dataset multitable specification for the train/test split # We specify each table with a name and a tuple (dataframe, key_columns) X_train = { - "main_table": "Accidents", - "tables": { - "Accidents": (X_train_main, "AccidentId"), + "main_table": (X_train_main, ["AccidentId"]), + "additional_data_tables": { "Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]), }, } X_test = { - "main_table": "Accidents", - "tables": { - "Accidents": (X_test_main, "AccidentId"), + "main_table": (X_test_main, ["AccidentId"]), + "additional_data_tables": { "Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]), }, } @@ -557,9 +548,8 @@ Samples # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, } @@ -596,18 +586,12 @@ Samples # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, "AccidentId"), + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } # Load the target variable "Gravity" @@ -701,14 +685,10 @@ Samples # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, - "relations": [ - ("Accidents", "Vehicles"), - ], } # Load the target variable "Gravity" diff --git a/khiops/core/helpers.py b/khiops/core/helpers.py index c4b1c8fa..c900df08 100644 --- a/khiops/core/helpers.py +++ b/khiops/core/helpers.py @@ -23,7 +23,10 @@ def _build_multi_table_dictionary_domain( - dictionary_domain, root_dictionary_name, secondary_table_variable_name + dictionary_domain, + root_dictionary_name, + secondary_table_variable_name, + update_secondary_table_name=False, ): """Builds a multi-table dictionary domain from a dictionary with a key Parameters @@ -34,6 +37,9 @@ def _build_multi_table_dictionary_domain( Name for the new root dictionary secondary_table_variable_name : str Name, in the root dictionary, for the "table" variable of the secondary table. + update_secondary_table_name : bool, default `False` + If ``True``, then update the secondary table name according to the + secondary table variable name. If not set, keep original table name. Returns ------- @@ -103,11 +109,17 @@ def _build_multi_table_dictionary_domain( target_variable = Variable() target_variable.name = secondary_table_variable_name target_variable.type = "Table" - target_variable.object_type = root_source_dictionary.name + if update_secondary_table_name: + target_variable.object_type = secondary_table_variable_name + else: + target_variable.object_type = root_source_dictionary.name root_target_dictionary.add_variable(target_variable) # Build secondary target dictionary, by copying root source dictionary secondary_target_dictionary = root_source_dictionary.copy() + secondary_target_dictionary.root = False + if update_secondary_table_name: + secondary_target_dictionary.name = secondary_table_variable_name # Build target domain and add dictionaries to it target_domain = DictionaryDomain() diff --git a/khiops/samples/samples_sklearn.ipynb b/khiops/samples/samples_sklearn.ipynb index 6a15c898..c4b107fe 100644 --- a/khiops/samples/samples_sklearn.ipynb +++ b/khiops/samples/samples_sklearn.ipynb @@ -180,9 +180,8 @@ "\n", "# Create the dataset spec and the target\n", "X = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", " },\n", "}\n", @@ -249,18 +248,12 @@ "\n", "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", "X = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", - " \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", - " \"Places\": (places_df, \"AccidentId\"),\n", + " \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", + " \"Places\": (places_df, [\"AccidentId\"], True),\n", " },\n", - " \"relations\": [\n", - " (\"Accidents\", \"Vehicles\"),\n", - " (\"Vehicles\", \"Users\"),\n", - " (\"Accidents\", \"Places\", True),\n", - " ],\n", "}\n", "\n", "# Load the target variable \"Gravity\"\n", @@ -475,16 +468,14 @@ "# Create the dataset multitable specification for the train/test split\n", "# We specify each table with a name and a tuple (dataframe, key_columns)\n", "X_train = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (X_train_main, \"AccidentId\"),\n", + " \"main_table\": (X_train_main, [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (X_train_secondary, [\"AccidentId\", \"VehicleId\"]),\n", " },\n", "}\n", "X_test = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (X_test_main, \"AccidentId\"),\n", + " \"main_table\": (X_test_main, [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (X_test_secondary, [\"AccidentId\", \"VehicleId\"]),\n", " },\n", "}\n", @@ -660,9 +651,8 @@ "\n", "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", "X = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", " },\n", "}\n", @@ -712,18 +702,12 @@ "\n", "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", "X = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", - " \"Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", - " \"Places\": (places_df, \"AccidentId\"),\n", + " \"Vehicles/Users\": (users_df, [\"AccidentId\", \"VehicleId\"]),\n", + " \"Places\": (places_df, [\"AccidentId\"], True),\n", " },\n", - " \"relations\": [\n", - " (\"Accidents\", \"Vehicles\"),\n", - " (\"Vehicles\", \"Users\"),\n", - " (\"Accidents\", \"Places\", True),\n", - " ],\n", "}\n", "\n", "# Load the target variable \"Gravity\"\n", @@ -843,14 +827,10 @@ "\n", "# Build the multi-table dataset spec (drop the target column \"Gravity\")\n", "X = {\n", - " \"main_table\": \"Accidents\",\n", - " \"tables\": {\n", - " \"Accidents\": (accidents_df.drop(\"Gravity\", axis=1), \"AccidentId\"),\n", + " \"main_table\": (accidents_df.drop(\"Gravity\", axis=1), [\"AccidentId\"]),\n", + " \"additional_data_tables\": {\n", " \"Vehicles\": (vehicles_df, [\"AccidentId\", \"VehicleId\"]),\n", " },\n", - " \"relations\": [\n", - " (\"Accidents\", \"Vehicles\"),\n", - " ],\n", "}\n", "\n", "# Load the target variable \"Gravity\"\n", diff --git a/khiops/samples/samples_sklearn.py b/khiops/samples/samples_sklearn.py index 2b42eabb..d473be53 100644 --- a/khiops/samples/samples_sklearn.py +++ b/khiops/samples/samples_sklearn.py @@ -163,9 +163,8 @@ def khiops_classifier_multitable_star(): # Create the dataset spec and the target X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, } @@ -224,18 +223,12 @@ def khiops_classifier_multitable_snowflake(): # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, "AccidentId"), + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } # Load the target variable "Gravity" @@ -416,16 +409,14 @@ def khiops_classifier_with_hyperparameters(): # Create the dataset multitable specification for the train/test split # We specify each table with a name and a tuple (dataframe, key_columns) X_train = { - "main_table": "Accidents", - "tables": { - "Accidents": (X_train_main, "AccidentId"), + "main_table": (X_train_main, ["AccidentId"]), + "additional_data_tables": { "Vehicles": (X_train_secondary, ["AccidentId", "VehicleId"]), }, } X_test = { - "main_table": "Accidents", - "tables": { - "Accidents": (X_test_main, "AccidentId"), + "main_table": (X_test_main, ["AccidentId"]), + "additional_data_tables": { "Vehicles": (X_test_secondary, ["AccidentId", "VehicleId"]), }, } @@ -578,9 +569,8 @@ def khiops_encoder_multitable_star(): # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, } @@ -622,18 +612,12 @@ def khiops_encoder_multitable_snowflake(): # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, "AccidentId"), + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } # Load the target variable "Gravity" @@ -739,14 +723,10 @@ def khiops_encoder_with_hyperparameters(): # Build the multi-table dataset spec (drop the target column "Gravity") X = { - "main_table": "Accidents", - "tables": { - "Accidents": (accidents_df.drop("Gravity", axis=1), "AccidentId"), + "main_table": (accidents_df.drop("Gravity", axis=1), ["AccidentId"]), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), }, - "relations": [ - ("Accidents", "Vehicles"), - ], } # Load the target variable "Gravity" diff --git a/khiops/sklearn/dataset.py b/khiops/sklearn/dataset.py index 5c9e071c..198e0bf2 100644 --- a/khiops/sklearn/dataset.py +++ b/khiops/sklearn/dataset.py @@ -21,7 +21,12 @@ import khiops.core as kh import khiops.core.internals.filesystems as fs from khiops.core.dictionary import VariableBlock -from khiops.core.internals.common import is_dict_like, is_list_like, type_error_message +from khiops.core.internals.common import ( + deprecation_message, + is_dict_like, + is_list_like, + type_error_message, +) # Disable PEP8 variable names because of scikit-learn X,y conventions # To capture invalid-names other than X,y run: @@ -48,65 +53,69 @@ def check_dataset_spec(ds_spec): if not is_dict_like(ds_spec): raise TypeError(type_error_message("ds_spec", ds_spec, Mapping)) - # Check the "tables" field - if "tables" not in ds_spec: - raise ValueError("'tables' entry missing from dataset dict spec") - if not is_dict_like(ds_spec["tables"]): + # Check the "main_table" field + if "main_table" not in ds_spec: + raise ValueError("'main_table' entry missing from dataset dict spec") + if not isinstance(ds_spec["main_table"], tuple): raise TypeError( - type_error_message("'tables' entry", ds_spec["tables"], Mapping) + type_error_message("'main_table' entry", ds_spec["main_table"], tuple) ) - if len(ds_spec["tables"]) == 0: - raise ValueError("'tables' dictionary cannot be empty") - for table_name, table_entry in ds_spec["tables"].items(): - _check_table_entry(table_name, table_entry) + if len(ds_spec["main_table"]) != 2: + raise ValueError("'main_table' must be a 2-element tuple") # Multi-table specific table checks - if len(ds_spec["tables"]) > 1: + if "additional_data_tables" in ds_spec: _check_multitable_spec(ds_spec) - # Check the 'format' field - if "format" in ds_spec: - _check_format_entry(ds_spec["format"]) - -def _check_table_entry(table_name, table_spec): +def _check_table_entry(table_path, table_spec): + if not isinstance(table_path, str): + raise TypeError(type_error_message("Table path", table_path, str)) if not isinstance(table_spec, tuple): raise TypeError( - type_error_message(f"'{table_name}' table entry", table_spec, tuple) + type_error_message(f"'{table_path}' table entry", table_spec, tuple) ) - if len(table_spec) != 2: + if len(table_spec) not in (2, 3): raise ValueError( - f"'{table_name}' table entry must have size 2, not {len(table_spec)}" + f"'{table_path}' table entry must have size 2 or 3, not {len(table_spec)}" + ) + if len(table_spec) == 3 and not isinstance(table_spec[2], bool): + raise TypeError( + type_error_message( + f"Table at data path {table_path} 1-1 flag", + table_spec[2], + bool, + ) ) - source, key = table_spec + source, key = table_spec[:2] if not isinstance(source, (pd.DataFrame, sp.spmatrix, str)) and not hasattr( source, "__array__" ): raise TypeError( type_error_message( - f"'{table_name}' table's source", + f"Source of table at data path '{table_path}'", source, "array-like", "scipy.sparse.spmatrix", str, ) ) - _check_table_key(table_name, key) + _check_table_key(table_path, key) -def _check_table_key(table_name, key): +def _check_table_key(table_path, key): if key is not None: - if not is_list_like(key) and not isinstance(key, str): + if not is_list_like(key): raise TypeError( - type_error_message(f"'{table_name}' table's key", key, str, Sequence) + type_error_message(f"'{table_path}' table's key", key, Sequence) ) if len(key) == 0: - raise ValueError(f"'{table_name}' table's key is empty") + raise ValueError(f"'{table_path}' table's key is empty") for column_name in key: if not isinstance(column_name, str): raise TypeError( type_error_message( - f"'{table_name}' table's key column name", + f"'{table_path}' table's key column name", column_name, str, ) @@ -114,192 +123,105 @@ def _check_table_key(table_name, key): def _check_multitable_spec(ds_spec): - # Check the main table - if "main_table" not in ds_spec: - raise ValueError( - "'main_table' entry must be specified for multi-table datasets" - ) - if not isinstance(ds_spec["main_table"], str): - raise TypeError( - type_error_message("'main_table' entry", ds_spec["main_table"], str) - ) - if ds_spec["main_table"] not in ds_spec["tables"]: - raise ValueError( - f"A table entry with the main table's name ('{ds_spec['main_table']}') " - f"must be present in the 'tables' dictionary" - ) + # Check that "additional_data_tables" is present + assert "additional_data_tables" in ds_spec - # Check that all tables have non-None keys - for table_name, (_, table_key) in ds_spec["tables"].items(): - if table_key is None: - table_kind = "main" if ds_spec["main_table"] == table_name else "secondary" - raise ValueError( - f"key of {table_kind} table '{table_name}' is 'None': " - "table keys must be specified in multi-table datasets" + # Check the "additional_data_tables" field + if not is_dict_like(ds_spec["additional_data_tables"]): + raise TypeError( + type_error_message( + "'additional_data_tables' entry", + ds_spec["additional_data_tables"], + Mapping, ) + ) + for table_path, table_entry in ds_spec["additional_data_tables"].items(): + _check_table_entry(table_path, table_entry) # Check that all the tables have the same type as the main - main_table_type = type(ds_spec["tables"][ds_spec["main_table"]][0]) - for table_name, (table_source, _) in ds_spec["tables"].items(): - if table_name != ds_spec["main_table"]: - if not isinstance(table_source, main_table_type): - raise ValueError( - f"Secondary table '{table_name}' has type " - f"'{type(table_source).__name__}' which is different from the " - f"main table's type '{main_table_type.__name__}'." - ) - - # If the 'relations' entry exists check it - if "relations" in ds_spec: - relations_spec = ds_spec["relations"] - # Otherwise build a star schema relations spec and check it - else: - relations_spec = [ - (ds_spec["main_table"], table) - for table in ds_spec["tables"].keys() - if table != ds_spec["main_table"] - ] - _check_relations_entry(ds_spec["main_table"], ds_spec["tables"], relations_spec) - - -def _check_relations_entry(main_table_name, tables_spec, relations_spec): - # Check the types and size of the relation entries - if not is_list_like(relations_spec): - raise TypeError( - type_error_message("'relations' entry", relations_spec, "list-like") + # Check that the main table's key is contained in subtable keys + main_table_type = type(ds_spec["main_table"][0]) + main_table_key = ds_spec["main_table"][1] + if main_table_key is None: + raise ValueError( + "The key of the main table is 'None': " + "table keys must be specified in multi-table datasets" ) - for i, relation in enumerate(relations_spec, 1): - # Check that the relation is a 2 or 3 tuple - if not isinstance(relation, tuple): - raise TypeError(type_error_message("Relation", relation, tuple)) - if len(relation) not in (2, 3): - raise ValueError(f"A relation must be of size 2 or 3, not {len(relation)}") - - # Check the types of the tuple contents - parent_table, child_table = relation[:2] - if not isinstance(parent_table, str): - raise TypeError( - type_error_message(f"Relation #{i}'s parent table", parent_table, str) - ) - if not isinstance(child_table, str): - raise TypeError( - type_error_message(f"Relation #{i}'s child table", child_table, str) - ) - if len(relation) == 3 and not isinstance(relation[2], bool): - raise TypeError( - type_error_message( - f"Relation #{i} ({parent_table}, {child_table}) 1-1 flag", - relation[2], - bool, - ) + if not main_table_key: + raise ValueError( + "The key of the main table is empty: " + "table keys must be specified in multi-table datasets" + ) + for table_path, table_spec in ds_spec["additional_data_tables"].items(): + table_source = table_spec[0] + if not isinstance(table_source, main_table_type): + raise ValueError( + f"Additional data table at data path '{table_path}' has type " + f"'{type(table_source).__name__}' which is different from the " + f"main table's type '{main_table_type.__name__}'." ) - - # Check structure and coherence with the rest of the spec - parents_and_children = [relation[:2] for relation in relations_spec] - for i, relation in enumerate(relations_spec, 1): - parent_table, child_table = relation[:2] - if parent_table == child_table: + table_key = table_spec[1] + if table_key is None: raise ValueError( - f"Relation #{i}'s tables are equal: ({parent_table}, {child_table}). " - "They must be different." + f"Key of secondary table at path '{table_path}' is 'None': " + "table keys must be specified in multi-table datasets" ) - for table in (parent_table, child_table): - if not table in tables_spec.keys(): - raise ValueError( - f"Relation #{i} ({parent_table}, {child_table}) contains " - f"non-existent table '{table}'. All relation tables must exist " - "in the 'tables' entry." - ) - if parents_and_children.count(relation[:2]) > 1: + if not set(main_table_key).issubset(table_key): + table_key_msg = f"[{', '.join(table_key)}]" + main_table_key_msg = f"[{', '.join(main_table_key)}]" raise ValueError( - f"Relation #{i} ({parent_table}, {child_table}) occurs " - f"{parents_and_children.count(relation[:2])} times. " - f"Each relation must be unique." + f"Table at data path '{table_path}' " + f"key ({table_key_msg}) does not contain that of the main table " + f"({main_table_key_msg})." ) - # Check hierarchical keys - _check_hierarchical_keys( - i, - parent_table, - tables_spec[parent_table][1], - child_table, - tables_spec[child_table][1], - ) - # Check there are no cycles - _check_no_cycles(relations_spec, main_table_name) - - -def _check_hierarchical_keys( - relation_id, parent_table, parent_table_key, child_table, child_table_key -): - """Check that the parent table's key is contained in the child table's key""" - # Perform the check and save the error status - error_found = False - if isinstance(parent_table_key, str) and isinstance(child_table_key, str): - error_found = child_table_key != parent_table_key - elif isinstance(parent_table_key, str) and is_list_like(child_table_key): - error_found = parent_table_key not in child_table_key - elif is_list_like(parent_table_key) and is_list_like(child_table_key): - error_found = not set(parent_table_key).issubset(child_table_key) - elif is_list_like(parent_table_key) and isinstance(child_table_key, str): - error_found = ( - len(parent_table_key) != 1 or child_table_key not in parent_table_key - ) +def table_name_of_path(table_path): + return table_path.split("/")[-1] - # Report any error found - if error_found: - if isinstance(child_table_key, str): - child_table_key_msg = f"[{child_table_key}]" - else: - child_table_key_msg = f"[{', '.join(child_table_key)}]" - if isinstance(parent_table_key, str): - parent_table_key_msg = f"[{parent_table_key}]" - else: - parent_table_key_msg = f"[{', '.join(parent_table_key)}]" - raise ValueError( - f"Relation #{relation_id} child table '{child_table}' " - f"key ({child_table_key_msg}) does not contain that of parent table " - f"'{parent_table}' ({parent_table_key_msg})." - ) - - -def _check_no_cycles(relations_spec, main_table_name): - """Check that there are no cycles in the 'relations' entry""" - tables_to_visit = [main_table_name] - tables_visited = set() - while tables_to_visit: - current_table = tables_to_visit.pop(0) - tables_visited.add(current_table) - for relation in relations_spec: - parent_table, child_table = relation[:2] - if parent_table == current_table: - tables_to_visit.append(child_table) - if tables_visited.intersection(tables_to_visit): - raise ValueError( - "'relations' entry contains a cycle that includes " - f"the relation ({parent_table}, {child_table})." - ) - -def _check_format_entry(format_spec): - if not isinstance(format_spec, tuple): - raise TypeError(type_error_message("'format' entry", format_spec, tuple)) - if len(format_spec) != 2: - raise ValueError( - f"'format' entry must be a tuple of size 2, not {len(format_spec)}" - ) - sep, header = format_spec - if not isinstance(sep, str): - raise TypeError( - type_error_message("'format' tuple's 1st element (separator)", sep, str) - ) - if not isinstance(header, bool): - raise TypeError( - type_error_message("'format' tuple's 2nd element (header)", header, bool) - ) - if len(sep) != 1: - raise ValueError(f"'format' separator must be a single char, got '{sep}'") +def _upgrade_mapping_spec(ds_spec): + assert is_dict_like(ds_spec) + new_ds_spec = {} + new_ds_spec["additional_data_tables"] = {} + for table_name, table_data in ds_spec["tables"].items(): + table_df, table_key = table_data + if not is_list_like(table_key): + table_key = [table_key] + if table_name == ds_spec["main_table"]: + new_ds_spec["main_table"] = (table_df, table_key) + else: + table_path = [table_name] + is_entity = False + + # Cycle 4 times on the relations to get all transitive relation, like: + # - current table name N + # - main table name N1 + # - and relations: (N1, N2), (N2, N3), (N3, N) + # the data-path must be N2/N3/N + # Note: this is a heuristic that should be replaced with a graph + # traversal procedure + # If no "relations" key exists, then one has a star schema and + # the data-paths are the names of the secondary tables themselves + # (with respect to the main table) + if "relations" in ds_spec: + for relation in list(ds_spec["relations"]) * 4: + left, right = relation[:2] + if len(relation) == 3 and right == table_name: + is_entity = relation[2] + if ( + left != ds_spec["main_table"] + and left not in table_path + and right in table_path + ): + table_path.insert(0, left) + table_path = "/".join(table_path) + if is_entity: + table_data = (table_df, table_key, is_entity) + else: + table_data = (table_df, table_key) + new_ds_spec["additional_data_tables"][table_path] = table_data + return new_ds_spec def get_khiops_type(numpy_type): @@ -464,8 +386,7 @@ class Dataset: def __init__(self, X, y=None, categorical_target=True): # Initialize members self.main_table = None - self.secondary_tables = None - self.relations = None + self.additional_data_tables = None self.categorical_target = categorical_target self.target_column = None self.target_column_id = None @@ -476,15 +397,15 @@ def __init__(self, X, y=None, categorical_target=True): # A single pandas dataframe if isinstance(X, pd.DataFrame): self.main_table = PandasTable("main_table", X) - self.secondary_tables = [] + self.additional_data_tables = [] # A single numpy array (or compatible object) elif hasattr(X, "__array__") or is_list_like(X): self.main_table = NumpyTable("main_table", X) - self.secondary_tables = [] + self.additional_data_tables = [] # A scipy.sparse.spmatrix elif isinstance(X, sp.spmatrix): self.main_table = SparseTable("main_table", X) - self.secondary_tables = [] + self.additional_data_tables = [] # Special rejection for scipy.sparse.sparray (to pass the sklearn tests) # Note: We don't use scipy.sparse.sparray because it is not implemented in scipy # 1.10 which is the latest supporting py3.8 @@ -514,16 +435,18 @@ def __init__(self, X, y=None, categorical_target=True): # Index the tables by name self._tables_by_name = { - table.name: table for table in [self.main_table] + self.secondary_tables + table.name: table + for table in [self.main_table] + + [table for _, table, _ in self.additional_data_tables] } # Post-conditions assert self.main_table is not None, "'main_table' is 'None' after init" assert isinstance( - self.secondary_tables, list + self.additional_data_tables, list ), "'secondary_tables' is not a list after init" assert not self.is_multitable or len( - self.secondary_tables + self.additional_data_tables ), "'secondary_tables' is empty in a multi-table dataset" assert ( y is None or self.target_column is not None @@ -560,18 +483,27 @@ def _init_tables_from_mapping(self, X): """Initializes the table spec from a dict-like 'X'""" assert is_dict_like(X), "'X' must be dict-like" + # Detect if deprecated mapping specification syntax is used; + # if so, issue deprecation warning and transform it to the new syntax + if "tables" in X.keys() and isinstance(X.get("main_table"), str): + warnings.warn( + deprecation_message( + "This multi-table dataset specification format", + "11.0.1", + replacement=( + "the new data-path-based format, as documented in " + ":doc:`multi_table_primer`." + ), + quote=False, + ) + ) + X = _upgrade_mapping_spec(X) + # Check the input mapping check_dataset_spec(X) - # Initialize tables objects - if len(X["tables"]) == 1: - main_table_name = list(X["tables"])[0] - main_table_source, main_table_key = list(X["tables"].values())[0] - if isinstance(main_table_key, str): - main_table_key = [main_table_key] - else: - main_table_name = X["main_table"] - main_table_source, main_table_key = X["tables"][main_table_name] + main_table_name = "main_table" + main_table_source, main_table_key = X["main_table"] # Initialize a Pandas dataset if isinstance(main_table_source, pd.DataFrame): @@ -580,32 +512,43 @@ def _init_tables_from_mapping(self, X): main_table_source, key=main_table_key, ) - self.secondary_tables = [] - for table_name, (table_source, table_key) in X["tables"].items(): - if table_name != main_table_name: - self.secondary_tables.append( - PandasTable(table_name, table_source, key=table_key) + self.additional_data_tables = [] + if "additional_data_tables" in X: + for table_path, table_spec in X["additional_data_tables"].items(): + table_source, table_key = table_spec[:2] + table_name = table_name_of_path(table_path) + table = PandasTable( + table_name, + table_source, + key=table_key, + ) + is_one_to_one_relation = False + if len(table_spec) == 3 and table_spec[2] is True: + is_one_to_one_relation = True + + self.additional_data_tables.append( + (table_path, table, is_one_to_one_relation) ) # Initialize a sparse dataset (monotable) elif isinstance(main_table_source, sp.spmatrix): self.main_table = SparseTable( - main_table_name, - main_table_source, + name=main_table_name, + matrix=main_table_source, key=main_table_key, ) - self.secondary_tables = [] + self.additional_data_tables = [] # Initialize a numpyarray dataset (monotable) elif hasattr(main_table_source, "__array__"): self.main_table = NumpyTable( - main_table_name, - main_table_source, + name=main_table_name, + array=main_table_source, ) - if len(X["tables"]) > 1: + if "additional_data_tables" in X and len(X["additional_data_tables"]) > 0: raise ValueError( "Multi-table schemas are only allowed " "with pandas dataframe source tables" ) - self.secondary_tables = [] + self.additional_data_tables = [] else: raise TypeError( type_error_message( @@ -613,25 +556,9 @@ def _init_tables_from_mapping(self, X): ) ) - # If the relations are not specified initialize to a star schema - if "relations" not in X: - self.relations = [ - (self.main_table.name, table.name, False) - for table in self.secondary_tables - ] - # Otherwise initialize the relations in the spec - else: - relations = [] - for relation in X["relations"]: - parent, child = relation[:2] - relations.append( - (parent, child, relation[2] if len(relation) == 3 else False) - ) - self.relations = relations - def _init_target_column(self, y): assert self.main_table is not None - assert self.secondary_tables is not None + assert self.additional_data_tables is not None # Check y's type # For in memory target columns: @@ -730,22 +657,23 @@ def table_type(self): @property def is_multitable(self): """bool : ``True`` if the dataset is multitable""" - return self.secondary_tables is not None and len(self.secondary_tables) > 0 + return ( + self.additional_data_tables is not None + and len(self.additional_data_tables) > 0 + ) def to_spec(self): """Returns a dictionary specification of this dataset""" ds_spec = {} - ds_spec["main_table"] = self.main_table.name - ds_spec["tables"] = {} - ds_spec["tables"][self.main_table.name] = ( - self.main_table.data_source, - self.main_table.key, - ) - for table in self.secondary_tables: - ds_spec["tables"][table.name] = (table.data_source, table.key) - if self.relations: - ds_spec["relations"] = [] - ds_spec["relations"].extend(self.relations) + ds_spec["main_table"] = (self.main_table.data_source, self.main_table.key) + ds_spec["additional_data_tables"] = {} + for table_path, table, is_one_to_one_relation in self.additional_data_tables: + assert table_path is not None + ds_spec["additional_data_tables"][table_path] = ( + table.data_source, + table.key, + is_one_to_one_relation, + ) return ds_spec @@ -806,33 +734,34 @@ def create_khiops_dictionary_domain(self): # root dictionary that point to each secondary table # This is performed using a breadth-first-search over the graph of relations # Note: In general 'name' and 'object_type' fields of Variable can be different - if self.secondary_tables: + if self.additional_data_tables: main_dictionary.root = True - table_names = [table.name for table in self.secondary_tables] - tables_to_visit = [self.main_table.name] - while tables_to_visit: - current_table = tables_to_visit.pop(0) - for relation in self.relations: - parent_table, child_table, is_one_to_one_relation = relation - if parent_table == current_table: - tables_to_visit.append(child_table) - parent_table_name = parent_table - index_table = table_names.index(child_table) - table = self.secondary_tables[index_table] - parent_table_dictionary = dictionary_domain.get_dictionary( - parent_table_name - ) - dictionary = table.create_khiops_dictionary() - dictionary_domain.add_dictionary(dictionary) - table_variable = kh.Variable() - if is_one_to_one_relation: - table_variable.type = "Entity" - else: - table_variable.type = "Table" - table_variable.name = table.name - table_variable.object_type = table.name - parent_table_dictionary.add_variable(table_variable) + for ( + table_path, + table, + is_one_to_one_relation, + ) in self.additional_data_tables: + if not "/" in table_path: + parent_table_name = self.main_table.name + else: + table_path_fragments = table_path.split("/") + parent_table_name = table_name_of_path( + "/".join(table_path_fragments[:-1]) + ) + parent_table_dictionary = dictionary_domain.get_dictionary( + parent_table_name + ) + dictionary = table.create_khiops_dictionary() + dictionary_domain.add_dictionary(dictionary) + table_variable = kh.Variable() + if is_one_to_one_relation: + table_variable.type = "Entity" + else: + table_variable.type = "Table" + table_variable.name = table.name + table_variable.object_type = table.name + parent_table_dictionary.add_variable(table_variable) return dictionary_domain def create_table_files_for_khiops(self, output_dir, sort=True): @@ -851,7 +780,7 @@ def create_table_files_for_khiops(self, output_dir, sort=True): A tuple containing: - The path of the main table - - A dictionary containing the relation [table-name -> file-path] for the + - A dictionary containing the relation [data-path -> file-path] for the secondary tables. The dictionary is empty for monotable datasets. """ # Sort the main table unless: @@ -871,8 +800,9 @@ def create_table_files_for_khiops(self, output_dir, sort=True): # Create a copy of each secondary table secondary_table_paths = {} - for table in self.secondary_tables: - secondary_table_paths[table.name] = table.create_table_file_for_khiops( + for table_path, table, _ in self.additional_data_tables: + assert table_path is not None + secondary_table_paths[table_path] = table.create_table_file_for_khiops( output_dir, sort=sort ) @@ -895,25 +825,19 @@ def __init__(self, name, key=None): if not name: raise ValueError("'name' cannot be empty") if key is not None: - if not is_list_like(key) and not isinstance(key, (str, int)): - raise TypeError(type_error_message("key", key, str, int, "list-like")) - if is_list_like(key): - for column_index, column_id in enumerate(key): - if not isinstance(column_id, (str, int)): - raise TypeError( - type_error_message( - f"key[{column_index}]", column_id, str, int - ) - + f" at table '{name}'" - ) + if not is_list_like(key): + raise TypeError(type_error_message("key", key, "list-like")) + for column_index, column_id in enumerate(key): + if not isinstance(column_id, (str, int)): + raise TypeError( + type_error_message(f"key[{column_index}]", column_id, str, int) + + f" at table '{name}'" + ) # Initialization (must be completed by concrete sub-classes) self.name = name self.data_source = None - if is_list_like(key) or key is None: - self.key = key - else: - self.key = [key] + self.key = key self.column_ids = None self.khiops_types = None self.n_samples = None @@ -922,9 +846,7 @@ def check_key(self): """Checks that the key columns exist""" if self.key is not None: if not is_list_like(self.key): - raise TypeError( - type_error_message("key", self.key, str, int, "list-like") - ) + raise TypeError(type_error_message("key", self.key, "list-like")) for column_name in self.key: if column_name not in self.column_ids: raise ValueError( @@ -991,7 +913,7 @@ class PandasTable(DatasetTable): def __init__(self, name, dataframe, key=None): # Call the parent method - super().__init__(name, key=key) + super().__init__(name=name, key=key) # Check inputs specific to this sub-class if not isinstance(dataframe, pd.DataFrame): @@ -1104,7 +1026,7 @@ class NumpyTable(DatasetTable): def __init__(self, name, array, key=None): # Call the parent method - super().__init__(name, key=key) + super().__init__(name=name, key=key) # Check the array's types and shape if not hasattr(array, "__array__") and not is_list_like(array): diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py index f24afd81..bf0435c9 100644 --- a/khiops/sklearn/estimators.py +++ b/khiops/sklearn/estimators.py @@ -556,7 +556,7 @@ def _transform_deploy_model( # Create the table files to be used by Khiops ( main_table_path, - secondary_table_paths, + additional_data_table_paths, ) = deployment_ds.create_table_files_for_khiops( computation_dir, sort=self.auto_sort ) @@ -565,20 +565,12 @@ def _transform_deploy_model( secondary_data_paths = model_dictionary_domain.extract_data_paths( model_dictionary_name ) - additional_data_tables = {} for data_path in secondary_data_paths: dictionary = model_dictionary_domain.get_dictionary_at_data_path(data_path) assert dictionary.name.startswith(self._khiops_model_prefix), ( f"Dictionary '{dictionary.name}' " f"does not have prefix '{self._khiops_model_prefix}'" ) - initial_dictionary_name = dictionary.name.replace( - self._khiops_model_prefix, "", 1 - ) - - additional_data_tables[data_path] = secondary_table_paths[ - initial_dictionary_name - ] # Set output path files output_dir = self._get_output_dir(computation_dir) @@ -591,7 +583,7 @@ def _transform_deploy_model( model_dictionary_name, main_table_path, output_data_table_path, - additional_data_tables=additional_data_tables, + additional_data_tables=additional_data_table_paths, detect_format=False, field_separator="\t", header_line=True, @@ -816,10 +808,10 @@ def _fit_train_model(self, ds, computation_dir, **kwargs): main_table_dictionary.key = [self.model_id_column] main_table_dictionary.name = f"{self._khiops_model_prefix}{ds.main_table.name}" self.model_main_dictionary_name_ = ( - f"{self._khiops_model_prefix}Keys_{ds.main_table.name}" + f"{self._khiops_model_prefix}{ds.main_table.name}" ) self.model_secondary_table_variable_name = ( - f"{self._khiops_model_prefix}{ds.main_table.name}" + f"{self._khiops_model_prefix}original_{ds.main_table.name}" ) self._create_coclustering_model_domain( tmp_domain, coclustering_file_path, output_dir @@ -873,6 +865,7 @@ def _create_coclustering_model_domain( domain, self.model_main_dictionary_name_, self.model_secondary_table_variable_name, + update_secondary_table_name=True, ) # Create the model by adding the coclustering variables @@ -1162,10 +1155,8 @@ def _transform_create_deployment_dataset(self, ds, _): assert not ds.is_multitable, "'dataset' is multitable" # Build the multitable deployment dataset - keys_table_name = f"keys_{ds.main_table.name}" deploy_dataset_spec = {} - deploy_dataset_spec["main_table"] = keys_table_name - deploy_dataset_spec["tables"] = {} + deploy_dataset_spec["additional_data_tables"] = {} # Extract the keys from the main table keys_table_dataframe = pd.DataFrame( @@ -1177,13 +1168,15 @@ def _transform_create_deployment_dataset(self, ds, _): ) # Create the dataset with the keys table as the main one - deploy_dataset_spec["tables"][keys_table_name] = ( + deploy_dataset_spec["main_table"] = ( keys_table_dataframe, - self.model_id_column, + [self.model_id_column], ) - deploy_dataset_spec["tables"][ds.main_table.name] = ( + deploy_dataset_spec["additional_data_tables"][ + f"{self._khiops_model_prefix}original_main_table" + ] = ( ds.main_table.data_source, - self.model_id_column, + [self.model_id_column], ) return Dataset(deploy_dataset_spec) @@ -1340,8 +1333,18 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir): secondary_data_paths = ds_domain.extract_data_paths(ds.main_table.name) additional_data_tables = {} for data_path in secondary_data_paths: - dictionary = ds_domain.get_dictionary_at_data_path(data_path) - additional_data_tables[data_path] = secondary_table_paths[dictionary.name] + path_bits = [] + data_path_fragments = data_path.split("/") + for path_fragment in data_path_fragments: + path_subfragments = path_fragment.split("ยง") + for path_subfragment in path_subfragments: + if path_subfragment not in path_bits: + path_bits.append(path_subfragment) + simplified_data_path = "/".join(path_bits) + + additional_data_tables[data_path] = secondary_table_paths[ + simplified_data_path + ] # Build the mandatory arguments args = [ @@ -1415,7 +1418,6 @@ def _fit_training_post_process(self, ds): f"does not have prefix '{self._khiops_model_prefix}' " f"or '{self._khiops_baseline_model_prefix}'." ) - # Skip baseline model if dictionary.name.startswith(self._khiops_model_prefix): initial_dictionary_name = dictionary.name.replace( @@ -1514,7 +1516,7 @@ def _transform_check_dataset(self, ds): # Multi-table model: Check name and dictionary coherence of secondary tables dataset_secondary_tables_by_name = { - table.name: table for table in ds.secondary_tables + table.name: table for _, table, _ in ds.additional_data_tables } for dictionary in self.model_.dictionaries: assert dictionary.name.startswith(self._khiops_model_prefix), ( diff --git a/khiops/sklearn/helpers.py b/khiops/sklearn/helpers.py index 301ba8f5..8f774f8b 100644 --- a/khiops/sklearn/helpers.py +++ b/khiops/sklearn/helpers.py @@ -11,7 +11,7 @@ from sklearn.model_selection import train_test_split from khiops.core.internals.common import is_dict_like, type_error_message -from khiops.sklearn.dataset import Dataset +from khiops.sklearn.dataset import Dataset, table_name_of_path # Note: We build the splits with lists and itertools.chain avoid pylint warning about # unbalanced-tuple-unpacking. See issue https://github.com/pylint-dev/pylint/issues/5671 @@ -122,15 +122,31 @@ def _train_test_split_in_memory_dataset(ds, y, test_size, sklearn_split_params=N # Split the secondary tables tables # Note: The tables are traversed in BFS - todo_relations = [ - relation for relation in ds.relations if relation[0] == ds.main_table.name + todo_tables = [ + (table_path, table) + for table_path, table, _ in ds.additional_data_tables + if "/" not in table_path ] - while todo_relations: - current_parent_table_name, current_child_table_name, _ = todo_relations.pop(0) - for relation in ds.relations: - parent_table_name, _, _ = relation + while todo_tables: + current_table_path, current_table = todo_tables.pop(0) + if "/" not in current_table_path: + current_parent_table_name = ds.main_table.name + else: + table_path_fragments = current_table_path.split("/") + current_parent_table_name = table_name_of_path( + "/".join(table_path_fragments[:-1]) + ) + current_child_table_name = current_table.name + for secondary_table_path, secondary_table, _ in ds.additional_data_tables: + if "/" not in secondary_table_path: + parent_table_name = ds.main_table.name + else: + table_path_fragments = secondary_table_path.split("/") + parent_table_name = table_name_of_path( + "/".join(table_path_fragments[:-1]) + ) if parent_table_name == current_child_table_name: - todo_relations.append(relation) + todo_tables.append((secondary_table_path, secondary_table)) for new_ds in (train_ds, test_ds): origin_child_table = ds.get_table(current_child_table_name) diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdic b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdic index f5d9f8bc..f190622b 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdic +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdic @@ -1,7 +1,7 @@ #Khiops 10.0.3 -Root Dictionary SNB_SpliceJunction (SampleId) - +Root Dictionary SNB_main_table (SampleId) + { Categorical SampleId ; Unused Categorical Class ; diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdicj b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdicj index cb76725d..9b37332e 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdicj +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsClassifier/Modeling.kdicj @@ -3,11 +3,11 @@ "version": "10.0", "dictionaries": [ { - "name": "SNB_SpliceJunction", + "name": "SNB_main_table", "root": true, "key": ["SampleId"], "metaData": { - "InitialDictionary": "SpliceJunction", + "InitialDictionary": "main_table", "PredictorLabel": "Selective Naive Bayes", "PredictorType": "Classifier" }, diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdic b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdic index aeeb74e3..791e6999 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdic +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdic @@ -1,19 +1,19 @@ #Khiops 10.0 -Dictionary CC_Keys_main_table (SampleId) +Dictionary CC_main_table (SampleId) { Categorical SampleId ; - Table(CC_main_table) CC_main_table ; + Table(CC_original_main_table) CC_original_main_table ; Unused Structure(DataGrid) Coclustering = DataGrid(ValueGroups(ValueGroup("HUMVPNP-ACCEPTOR-1688", "HUMMHCW2B-ACCEPTOR-2660", "HUMMHANTLE-ACCEPTOR-3110", "HUMMHCW1B-ACCEPTOR-2658", "HUMCS1-ACCEPTOR-1814", "HUMGHCSA-ACCEPTOR-51551", "HUMIL1B-ACCEPTOR-3546", "HUMGHCSA-ACCEPTOR-28732", "HUMGH-ACCEPTOR-1572", "HUMGHCSA-ACCEPTOR-6465", "HUMGHN-ACCEPTOR-1797", "HUMGHVA-ACCEPTOR-741", "HUMAPOCII-ACCEPTOR-3131", "HUMMHEA-ACCEPTOR-2678", "HUMHBA4-ACCEPTOR-10696", "MACHBA-ACCEPTOR-238", "HUMMYLCA-ACCEPTOR-3623", "HUMAPOCIA-ACCEPTOR-634", "HUMHPARS1-ACCEPTOR-2519", "HUMGAST2-ACCEPTOR-916", "HUMAPOC2-ACCEPTOR-3143", "HUMAPOC2G-ACCEPTOR-2851", "HUMGH-ACCEPTOR-1154", "HUMSAA-ACCEPTOR-2567", "HUMKER18-ACCEPTOR-5495", "HUMTNFAB-ACCEPTOR-1237", "HUMPPPA-ACCEPTOR-1721", "HUMOTNPI-ACCEPTOR-1091", "HUMLYL1B-ACCEPTOR-3525", "HUMGHCSA-ACCEPTOR-42976", "HUMPRCA-ACCEPTOR-5420", "HUMMHCD8A-ACCEPTOR-733", "HUMAPOAI1-ACCEPTOR-653", "HUMGHCSA-ACCEPTOR-13046", "HUMACCYBA-ACCEPTOR-2893", "HUMCP21OH-ACCEPTOR-1386", "HUMGHN-ACCEPTOR-1379", "HUMTUBBM-ACCEPTOR-1265", "HUMGHCSA-ACCEPTOR-28313", "HUMG0S19A-ACCEPTOR-2812", "HUMCSPB-ACCEPTOR-1874", "HUMGHCSA-ACCEPTOR-14059", "HUMGHCSA-ACCEPTOR-27717", "HUMGHCSA-ACCEPTOR-50539", "HUMMHCP42-ACCEPTOR-2316", "HUMCP21OH-ACCEPTOR-3161", "HUMMHCP42-ACCEPTOR-4089", "HUMMHCP52-ACCEPTOR-2628", "HUMKAL2-ACCEPTOR-3494", "HUMGHCSA-ACCEPTOR-13642", "HUMDES-ACCEPTOR-3015", "HUMPOMC-ACCEPTOR-7483", "HUMH19-ACCEPTOR-2449", "MACHBA-ACCEPTOR-587", "HUMUBILP-ACCEPTOR-1488", "HUMPIM1A-ACCEPTOR-1687", "HUMATP1A2-ACCEPTOR-6683", "HUMADAG-ACCEPTOR-29793", "HUMGH-ACCEPTOR-571", "HUMGHN-ACCEPTOR-798", "HUMEDHB17-ACCEPTOR-2346", "HUMHPARS1-ACCEPTOR-4612", "HUMG0S19B-ACCEPTOR-2948", "HUMMIS-ACCEPTOR-1697", "HUMCYC1A-ACCEPTOR-2399", "HUMPOMC-ACCEPTOR-4445", "HUMKEREP-ACCEPTOR-4597", "HUMPSAP-ACCEPTOR-1501", "HUMPRF1A-ACCEPTOR-3240", "HUMRPS14-ACCEPTOR-5473", "HUMMHDRHA-ACCEPTOR-4276", "HUMALPHA-ACCEPTOR-2851", "HUMALPPD-ACCEPTOR-2473", "HUMANT1-ACCEPTOR-2955", "HUMVPNP-ACCEPTOR-2057", "HUMCP21OHC-ACCEPTOR-1387", "HUMBNPA-ACCEPTOR-1629", "HUMAPOCIA-ACCEPTOR-1956", "HUMMYCRT-ACCEPTOR-1111", "HUMCP21OHC-ACCEPTOR-2298", "HUMMHCP52-ACCEPTOR-1763", "HUMTUBAG-ACCEPTOR-2405", "HUMRASH-ACCEPTOR-3201", "HUMMHCW2B-ACCEPTOR-885", "HUMH19-ACCEPTOR-2224", "HUMA1GLY2-ACCEPTOR-3502", "HUMPRF1A-ACCEPTOR-4965", "HUMADAG-ACCEPTOR-19200", "HUMTKRA-ACCEPTOR-2330", "HUMERP-ACCEPTOR-2578", "HUMERPA-ACCEPTOR-2587", "HUMHST-ACCEPTOR-3478", "HUMA1ATP-ACCEPTOR-11880", "HUMHBA4-ACCEPTOR-7232", "HUMMH6-ACCEPTOR-955", "HUMGHCSA-ACCEPTOR-13504", "HUMATP1A2-ACCEPTOR-21144", "HUMMIS-DONOR-1857", "HUMSAACT-ACCEPTOR-3116", "HUMMYCC-ACCEPTOR-4476", "HUMAPOCII-ACCEPTOR-3595", "HUMTHB-ACCEPTOR-7876", "HUMCP21OH-ACCEPTOR-1648", "HUMCP21OHC-ACCEPTOR-1649", "HUMMHCP52-ACCEPTOR-1114", "HUMSAACT-ACCEPTOR-2578", "HUMAK1-ACCEPTOR-5504", "HUMMHB27B-ACCEPTOR-690", "HUMP3A-ACCEPTOR-2544", "HUMAPOE4-ACCEPTOR-3751", "HUMMYLCA-ACCEPTOR-2864", "HUMALPI-ACCEPTOR-2283", "HUMMIS-ACCEPTOR-1185", "HUMKEREP-ACCEPTOR-3671", "HUMSPRO-ACCEPTOR-4385", "HUMMHCP42-ACCEPTOR-3889", "ORAIGECA-ACCEPTOR-583", "HUMMHCP52-ACCEPTOR-851", "HUMALPPD-ACCEPTOR-2062", "HUMACCYBA-ACCEPTOR-1384", "HUMATP1A2-ACCEPTOR-22870", "HUMMHEA-ACCEPTOR-869", "HUMSHBGA-ACCEPTOR-3120", "HUMAPOC2-ACCEPTOR-3599", "HUMAPOC2G-ACCEPTOR-3309", "HUMINCP-ACCEPTOR-3281", "HUMADAG-ACCEPTOR-32395", "HUMA1ATP-ACCEPTOR-7282", "HUMMHEA-ACCEPTOR-2278", "LEMHBB-ACCEPTOR-1437", "HUMUBILP-ACCEPTOR-806", "HUMHMG14A-ACCEPTOR-1361", "HUMADAG-ACCEPTOR-32821", "HUMMHCP52-ACCEPTOR-2427", "HUMAPRTA-ACCEPTOR-774", "HUMATP1A2-ACCEPTOR-20042", "HUMMT2A-ACCEPTOR-1047", "BABAPOE-ACCEPTOR-3437", "HUMPRCA-ACCEPTOR-5063", "HUMMHANTLE-ACCEPTOR-1363", "HUMCAPG-ACCEPTOR-1733", "HUMMYLCA-ACCEPTOR-924", "HUMOPS-ACCEPTOR-4065", "HUMKAL2-ACCEPTOR-1745", "HUMEDHB17-ACCEPTOR-3311", "HUMTHB-ACCEPTOR-4549", "CHPIGECA-ACCEPTOR-585", "HUMAPOAI1-ACCEPTOR-1647", "HUMATP1A2-ACCEPTOR-6969", "HUMAPOAI1-ACCEPTOR-902", "HUMAPOAIT-ACCEPTOR-648", "ATRINS-ACCEPTOR-701", "HUMPLPSPC-ACCEPTOR-1326", "HUMGASTA-ACCEPTOR-6254", "HUMA1ATP-ACCEPTOR-9382", "HUMRPS17A-ACCEPTOR-463", "HUMH19-ACCEPTOR-2643", "HUMSHBGA-ACCEPTOR-4319", "HUMPNMTA-ACCEPTOR-3129", "HUMCFVII-ACCEPTOR-8277", "HUMTPA-ACCEPTOR-35307", "TARHBB-ACCEPTOR-1686", "HUMSHBGA-DONOR-2724", "HUMPPPA-ACCEPTOR-2162", "HUMATP1A2-ACCEPTOR-20331", "HUMATP1A2-ACCEPTOR-15000", "HUMTNFAB-ACCEPTOR-1431", "HUMCRYGBC-ACCEPTOR-2225", "HUMPSAP-ACCEPTOR-993", "HUMINCP-ACCEPTOR-2049", "CHPIGECA-ACCEPTOR-989", "HUMALPI-ACCEPTOR-2926", "HUMSAACT-ACCEPTOR-2337", "HUMCP21OH-ACCEPTOR-2041", "HUMCP21OHC-ACCEPTOR-2042", "HUMTKRA-ACCEPTOR-12537", "HUMPRCA-ACCEPTOR-8223", "HUMMT2A-ACCEPTOR-1454", "ORAHBD-ACCEPTOR-1713", "HUMHPARS1-ACCEPTOR-6470", "HUMTKRA-ACCEPTOR-11871", "HUMALPI-ACCEPTOR-2064", "HUMHMG14A-ACCEPTOR-1487", "TARHBB-ACCEPTOR-2748", "HUMTUBBM-ACCEPTOR-1078", "HUMAK1-ACCEPTOR-6626", "HUMTPA-ACCEPTOR-25978", "HUMSHBGA-ACCEPTOR-5188", "HUMRPS17A-ACCEPTOR-3688", "HUMDES-ACCEPTOR-1920", "HUMPSAP-ACCEPTOR-2927", "HUMPRCA-ACCEPTOR-10486", "HUMRASH-ACCEPTOR-2012", "HUMACTGA-ACCEPTOR-886", "HUMKEREP-ACCEPTOR-3451", "HUMIL1B-ACCEPTOR-5693", "LEMHBB-ACCEPTOR-346", "ORAIGECA-ACCEPTOR-990", "HUMMH6-ACCEPTOR-3284", "BABA1AT-NEG-181", "HUMTNFAB-ACCEPTOR-5037", "HUMTNFX-ACCEPTOR-1332", "HUMPNMTA-ACCEPTOR-3450", "HUMMHCW1B-ACCEPTOR-3217", "HUMMHCW2B-ACCEPTOR-3219", "HUMGPIBAA-ACCEPTOR-3033", "HUMCKMT-ACCEPTOR-1127", "HUMSPRO-ACCEPTOR-2022", "ORAHBD-ACCEPTOR-596", "HUMACTGA-ACCEPTOR-2414", "HUMALPI-ACCEPTOR-2698", "HUMATP1A2-ACCEPTOR-21467", "HUMSHBGA-ACCEPTOR-3640", "HUMTCBA-NEG-2161", "HUMALPPD-ACCEPTOR-421", "HUMAPOCIA-ACCEPTOR-4946", "HUMKER18-ACCEPTOR-5107", "HUMSPRO-ACCEPTOR-1827", "HUMPPPA-ACCEPTOR-2425", "HUMSHBGA-ACCEPTOR-4029", "HUMCP21OHC-ACCEPTOR-1015", "HUMMHCP42-ACCEPTOR-1945", "HUMMHCP52-ACCEPTOR-479", "HUMTHB-ACCEPTOR-10541", "HUMIRBPG-ACCEPTOR-6987", "HUMAPOE4-ACCEPTOR-1818", "HUMSAACT-ACCEPTOR-2856", "HUMCACY-ACCEPTOR-1973", "HUMALPPD-ACCEPTOR-1408", "HUMGRP78-ACCEPTOR-775", "HUMCSPB-ACCEPTOR-3118", "HUMADAG-ACCEPTOR-26314", "HUMTHB-ACCEPTOR-9160", "HUMHMG14A-ACCEPTOR-1599", "MNKHBD-ACCEPTOR-1583", "HUMATP1A2-ACCEPTOR-9720", "HUMADAG-ACCEPTOR-35070", "HUMCP21OHC-ACCEPTOR-2699", "HUMMHCP42-ACCEPTOR-3627", "HUMMHCP52-ACCEPTOR-2165", "HUMATP1A2-ACCEPTOR-12450", "HUMTHY1A-DONOR-34", "HUMSPRO-ACCEPTOR-3195", "HUMSP18A-NEG-361", "HUMPSAP-ACCEPTOR-2374", "HUMOPS-ACCEPTOR-3783", "HUMAPRTA-ACCEPTOR-2252", "HUMPRCA-ACCEPTOR-3434", "HUMMHEA-ACCEPTOR-3682", "HUMPLPSPC-ACCEPTOR-1830", "HUMAPOC2-ACCEPTOR-2908", "HUMAPOC2G-ACCEPTOR-2616", "HUMGASTA-DONOR-3213", "HUMKER18-ACCEPTOR-3708", "HUMALPI-ACCEPTOR-1861", "HUMALPPD-ACCEPTOR-1845", "HUMALPHA-ACCEPTOR-809", "HUMPLAPL-ACCEPTOR-681", "HUMCYPIIE-ACCEPTOR-3877", "HUMPIM1A-ACCEPTOR-1335", "HUMKEREP-ACCEPTOR-3206", "HUMAPOA4A-ACCEPTOR-1382", "HUMACCYBB-ACCEPTOR-2533", "HUMBNPA-ACCEPTOR-831", "HUMALPHA-ACCEPTOR-581", "HUMPLAPL-ACCEPTOR-452", "HUMMHANTLE-ACCEPTOR-3667", "HUMP45C17-ACCEPTOR-7334", "ORAIGECA-ACCEPTOR-1393", "HUMLACTA-ACCEPTOR-2735", "HUMSPRO-ACCEPTOR-3807", "HUMALPPD-ACCEPTOR-2719", "HUMALPI-ACCEPTOR-1631", "HUMGCB1-ACCEPTOR-5186", "HUMMHB27B-ACCEPTOR-3119", "HUMCFVII-ACCEPTOR-4264", "HUMCFVII-ACCEPTOR-11034", "HUMMIS-ACCEPTOR-1947", "HUMANT1-ACCEPTOR-3950", "HUMAPRTA-DONOR-618", "HUMMHCD8A-ACCEPTOR-1987", "HUMSAA1A-ACCEPTOR-3145", "HUMAPOE4-ACCEPTOR-2977", "HUMAK1-ACCEPTOR-10045", "HUMPGAMMG-ACCEPTOR-1450", "HUMMYLCA-ACCEPTOR-2214", "HUMSPRO-DONOR-4152", "HUMANT1-ACCEPTOR-5005", "HUMSAACT-ACCEPTOR-1885", "HUMIRBPG-ACCEPTOR-8736", "HUMACHRG4-NEG-1", "HUMALPI-ACCEPTOR-1216", "HUMCFVII-ACCEPTOR-6448", "HUMGH-ACCEPTOR-986", "HUMACCYBB-ACCEPTOR-1999", "HUMCYPIIE-ACCEPTOR-9142", "HUMRPS14-ACCEPTOR-2872", "HUMCYPIIE-DONOR-7676", "HUMERP-ACCEPTOR-1164", "HUMERPA-ACCEPTOR-1171", "HUMALPHA-DONOR-487", "HUMLYL1B-ACCEPTOR-2630", "HUMOPS-ACCEPTOR-2409", "HUMTKRA-ACCEPTOR-12318", "HUMALPI-DONOR-333", "HUMATP1A2-ACCEPTOR-20736", "HUMERP-ACCEPTOR-1566", "HUMERPA-ACCEPTOR-1575", "HUMGCB1-ACCEPTOR-6344", "HUMH19-ACCEPTOR-2846", "HUMCHRA-NEG-121", "HUMHSP90B-ACCEPTOR-5391", "HUMSPRO-DONOR-2367", "HUMPLPSPC-ACCEPTOR-2621", "HUMKER18-ACCEPTOR-4128", "HUMCAPG-ACCEPTOR-2044", "LEMHBDPS-NEG-1441", "HUMEDHB17-ACCEPTOR-2701", "HUMCAPG-DONOR-1869", "HUMDAFA-ACCEPTOR-1235", "HUMUBILP-ACCEPTOR-1158"), ValueGroup("HUMTFPB-ACCEPTOR-6362", "HUMHPRTB-ACCEPTOR-27861", "HUMIFNINI-ACCEPTOR-3147", "HUMFIXG-ACCEPTOR-9613", "HUMHPRTB-ACCEPTOR-31587", "HUMATPSYB-NEG-841", "HUMIL2-ACCEPTOR-5027", "HUMIL2A-ACCEPTOR-5980", "HUMIL2B-ACCEPTOR-4857", "HUMHMG14A-ACCEPTOR-4663", "HUMCSFGMA-ACCEPTOR-891", "HUMANFA-ACCEPTOR-785", "HUMHMG14A-ACCEPTOR-6789", "HUMAFP-ACCEPTOR-19790", "HUMIL5-ACCEPTOR-1853", "HUMIL5A-ACCEPTOR-1857", "HUMGG-ACCEPTOR-3631", "HUMAFP-ACCEPTOR-6914", "HUMFABP-ACCEPTOR-4068", "HUMTNFX-ACCEPTOR-1565", "HUMIL2-ACCEPTOR-685", "HUMIL2A-ACCEPTOR-1623", "HUMIL2B-ACCEPTOR-500", "HUMFABP-ACCEPTOR-2320", "HUMHPRTB-ACCEPTOR-41424", "HUMATPSYB-ACCEPTOR-4464", "HUMFIXG-ACCEPTOR-20604", "HUMCSFGMA-ACCEPTOR-2554", "HUMALBGC-ACCEPTOR-16911", "HUMFABP-ACCEPTOR-3516", "HUMCYCAA-ACCEPTOR-1188", "HUMSAA1A-ACCEPTOR-5878", "HUMRPS14-ACCEPTOR-2108", "HUMALBGC-ACCEPTOR-12451", "HUMAFP-ACCEPTOR-7965", "HUMHPARS1-ACCEPTOR-3750", "HUMAFP-ACCEPTOR-14589", "HUMTPA-ACCEPTOR-17940", "HUMHPRTB-NEG-44641", "HUMPCNA-ACCEPTOR-3526", "LEMHBGA-ACCEPTOR-425", "HUMIL1B-ACCEPTOR-6540", "HUMPCNA-ACCEPTOR-5606", "LEMHBE-ACCEPTOR-1474", "HUMSODA-ACCEPTOR-6717", "HUMEF1A-ACCEPTOR-1522", "HUMSODA-ACCEPTOR-7572", "HUMMHANTLE-ACCEPTOR-4055", "HUMMYC3L-ACCEPTOR-4242", "HUMETN3-ACCEPTOR-391", "HUMHSP90B-ACCEPTOR-2605", "HUMGG-ACCEPTOR-5138", "LEMHBG-ACCEPTOR-364", "HUMPRPH2-ACCEPTOR-1848", "HUMHPRTB-ACCEPTOR-16573", "HUMPCNA-ACCEPTOR-5816", "HUMMHDOB-ACCEPTOR-4486", "HUMMHEA-ACCEPTOR-3890", "HUMHIAPPA-ACCEPTOR-5862", "HUMSODA-ACCEPTOR-6266", "HUMPRPH1-ACCEPTOR-2738", "HUMAFP-ACCEPTOR-13888", "HUMALBGC-ACCEPTOR-6772", "HUMHPRTB-ACCEPTOR-40003", "HUMMHDOB-ACCEPTOR-4124", "ORAHBG2F-ACCEPTOR-1492", "HUMKEREP-ACCEPTOR-3986", "LEMHBG-ACCEPTOR-1452", "MACHBCA2-ACCEPTOR-1693", "MACHBGA1-ACCEPTOR-1692", "HUMHPRTB-ACCEPTOR-34907", "HUMMH6-ACCEPTOR-2326", "HUMMYCC-ACCEPTOR-6624", "HUMMYCRT-ACCEPTOR-3258", "HUMLACTA-ACCEPTOR-1512", "HUMFIXG-ACCEPTOR-33757", "HUMALBGC-ACCEPTOR-14947", "HUMHPARS1-ACCEPTOR-5472", "HUMMHDRHA-ACCEPTOR-2969", "HUMRPS14-ACCEPTOR-4137", "GIBHBGGL-ACCEPTOR-8438", "LEMHBGA-ACCEPTOR-1498", "HUMDES-ACCEPTOR-5413", "HUMHBBAG-ACCEPTOR-2565", "HUMADAG-ACCEPTOR-32543", "HUMADAM2-ACCEPTOR-784", "HUMIL1B-ACCEPTOR-879", "HUMIL1B-ACCEPTOR-1506", "HUMAFP-ACCEPTOR-1782", "HUMATPSYB-ACCEPTOR-9633", "HUMGFP40H-ACCEPTOR-4066", "HUMMHDOB-ACCEPTOR-3357", "HUMCFVII-ACCEPTOR-9389", "HUMMHCD8A-ACCEPTOR-5474", "HUMCYCAA-ACCEPTOR-1466", "TARHBD-ACCEPTOR-1884", "HUMSODA-ACCEPTOR-4173", "HUMMCR-NEG-5341", "HUMGFP40H-ACCEPTOR-992", "GIBHBGGL-ACCEPTOR-3506", "HUMAFP-ACCEPTOR-2796", "HUMHSP90B-ACCEPTOR-4265", "HUMTPA-ACCEPTOR-22075", "HUMATP1A2-ACCEPTOR-13550", "HUMATPSYB-ACCEPTOR-5471", "HUMCRYGBC-ACCEPTOR-5283", "HUMCKMT-ACCEPTOR-2011", "HUMALBGC-ACCEPTOR-6011", "HUMCYPMPC-NEG-1", "HUMMHANTLE-ACCEPTOR-2733", "HUMMRP14A-ACCEPTOR-1386", "HUMSODA-ACCEPTOR-6511", "HUMLACTA-ACCEPTOR-2160", "HUMALBGC-ACCEPTOR-17658", "HUMMHCW1B-ACCEPTOR-3569", "HUMMHCW2B-ACCEPTOR-3570", "HUMSODA-ACCEPTOR-4869", "HUMGRP78-ACCEPTOR-2347", "HUMTPA-ACCEPTOR-29440", "HUMKAL2-ACCEPTOR-5424", "HUMALBGC-NEG-241", "HUMDES-ACCEPTOR-1763", "HUMTPA-ACCEPTOR-32398", "HUMALBGC-ACCEPTOR-9414", "HUMMHCW1B-ACCEPTOR-3357", "HUMMHCW2B-ACCEPTOR-3359", "HUMMYLCA-ACCEPTOR-2481", "HUMHSP90B-ACCEPTOR-7462", "HUMDES-ACCEPTOR-2504", "HUMGRP78-ACCEPTOR-2848", "HUMALBGC-ACCEPTOR-15504", "HUMCRPG-ACCEPTOR-578", "HUMCRPGA-ACCEPTOR-539", "HUMFIXG-ACCEPTOR-32974", "HUMLDL100-NEG-5941", "ORAHBG2F-ACCEPTOR-392", "HUMANFA-ACCEPTOR-2205", "HUMHBBAG-ACCEPTOR-1466", "HUMHBBGG-ACCEPTOR-2363", "HUMMHCW1B-ACCEPTOR-2261", "HUMHIAPPA-ACCEPTOR-959", "HUMMYLCA-ACCEPTOR-1831", "HUMA1GLY2-ACCEPTOR-2469", "HUMPCNA-ACCEPTOR-2520", "HUMCP21OH-ACCEPTOR-1838", "HUMMHCP42-ACCEPTOR-2767", "HUMMHCP52-ACCEPTOR-1304", "GIBHBGGL-ACCEPTOR-7321", "MACHBCA2-ACCEPTOR-586", "MACHBGA1-ACCEPTOR-588", "HUMIL2-ACCEPTOR-3038", "HUMIL2A-ACCEPTOR-3975", "HUMIL2B-ACCEPTOR-2852", "HUMIL1B-ACCEPTOR-4293", "HUMMRP8A-ACCEPTOR-2302", "HUMCYPIIE-ACCEPTOR-12808", "HUMCACY-ACCEPTOR-2504", "HUMKER18-ACCEPTOR-4857", "HUMMHANTLE-ACCEPTOR-3842", "HUMATP1A2-ACCEPTOR-19381", "HUMATP1A2-ACCEPTOR-10913", "HUMEF1A-ACCEPTOR-2727", "HUMATP1A2-ACCEPTOR-24266", "HUMAFP-ACCEPTOR-15169", "HUMEF1A-ACCEPTOR-2347", "HUMCYPIIE-ACCEPTOR-6976", "HUMCRYGBC-ACCEPTOR-18489", "HUMTHB-ACCEPTOR-9825", "ORAHBG1F-ACCEPTOR-391", "HUMATPSYB-ACCEPTOR-2698", "HUMEF1A-ACCEPTOR-2965", "HUMSODA-ACCEPTOR-3793", "HUMSAA-ACCEPTOR-3086", "HUMPRP0A-NEG-2161", "HUMHSP90B-ACCEPTOR-5710", "HUMCKMT-ACCEPTOR-1529", "HUMPRL1-NEG-61", "HUMATPSYB-ACCEPTOR-7858", "HUMATPSYB-ACCEPTOR-5220", "HUMSAA1A-ACCEPTOR-6290", "HUMP45C17-ACCEPTOR-6267", "HUMHSP90B-ACCEPTOR-5972", "HUMCKMT-ACCEPTOR-4348", "HUMFBRAA-NEG-1", "HUMIRBPG-ACCEPTOR-4936", "HUMCATF-ACCEPTOR-1108", "HUMSHBGA-ACCEPTOR-5640", "HUMALPPD-ACCEPTOR-734", "HUMALPHA-ACCEPTOR-1369", "HUMRPS17A-ACCEPTOR-1611", "HUMERVKA-NEG-6721", "HUMP45C17-ACCEPTOR-3774", "HUMGHCSA-ACCEPTOR-28101", "HUMP45C17-ACCEPTOR-5819", "HUMHSP90B-ACCEPTOR-3995", "HUMGH-ACCEPTOR-941", "HUMGHCSA-ACCEPTOR-5835", "HUMLAPA-NEG-2881", "HUMIGHVQ-NEG-61", "HUMPSBGA-NEG-781", "HUMALPPD-ACCEPTOR-985", "HUMCYPIIE-DONOR-12309", "HUMFIXG-NEG-21901", "HUMINSRC-NEG-2101", "HUMCRYGBC-ACCEPTOR-19693", "HUMPGAMMG-ACCEPTOR-3519", "HUMC3-NEG-3481", "HUMPSAA1-NEG-1", "HUMADAG-ACCEPTOR-35621"), ValueGroup("HUMMHCD8A-DONOR-1781", "HUMMYC3L-DONOR-402", "HUMCFVII-DONOR-4425", "HUMTUBBM-DONOR-775", "HUMHST-DONOR-2861", "HUMERP-DONOR-598", "HUMDES-DONOR-3236", "HUMADAG-DONOR-4034", "HUMCYC1A-DONOR-1506", "HUMHMG14A-DONOR-1039", "HUMALPHA-DONOR-2968", "CHPIGECA-DONOR-1313", "ATRINS-DONOR-905", "HUMHMG14A-DONOR-1394", "HUMPSAP-DONOR-1621", "HUMACCYBA-DONOR-1250", "HUMACCYBB-DONOR-1186", "HUMSAACT-DONOR-3038", "HUMPRCA-DONOR-5318", "HUMEDHB17-DONOR-3489", "HUMACCYBA-DONOR-289", "HUMACCYBB-DONOR-290", "HUMACTGA-DONOR-1015", "HUMMHCW1B-DONOR-1152", "HUMPIM1A-DONOR-1442", "HUMPRT1A-DONOR-118", "HUMOTNPI-DONOR-508", "HUMMHCW2B-DONOR-1155", "HUMOTNPI-DONOR-1007", "HUMCYPIIE-DONOR-4037", "HUMFOS-DONOR-1000", "HUMSODA-DONOR-327", "HUMAK1-DONOR-954", "HUMTKRA-DONOR-556", "HUMHPRTB-DONOR-1674", "HUMPOMC-DONOR-737", "HUMRPS17A-DONOR-615", "HUMRPS14-DONOR-225", "HUMMH6-DONOR-826", "HUMALPI-DONOR-1140", "HUMMHB27B-DONOR-960", "HUMP45C17-DONOR-3913", "HUMPIM1A-DONOR-1222", "HUMALPPD-DONOR-1543", "HUMPCNA-DONOR-1618", "HUMMHB27B-DONOR-2538", "HUMACTGA-DONOR-2321", "HUMACTGA-DONOR-1344", "LEMHBGA-DONOR-302", "HUMMHCW1B-DONOR-754", "HUMANDREC-NEG-1681", "HUMHBA4-DONOR-10901", "HUMMHCW1B-DONOR-2778", "HUMOPS-DONOR-3949", "HUMALPI-DONOR-2815", "HUMMRP8A-DONOR-2152", "HUMHBA4-DONOR-6768", "MACHBA-DONOR-121", "HUMMHEA-DONOR-739", "HUMANT1-DONOR-1688", "HUMACTGA-DONOR-513", "HUMACCYBA-DONOR-1624", "HUMACCYBB-DONOR-1558", "HUMGSHPXG-DONOR-690", "HUMTHB-DONOR-7792", "HUMSAACT-DONOR-1779", "HUMINCP-DONOR-539", "HUMAPOAI1-DONOR-1059", "HUMAPOAIT-DONOR-805", "HUMALPHA-DONOR-1924", "HUMCP21OH-DONOR-1541", "HUMCP21OHC-DONOR-1542", "HUMMHCP42-DONOR-2471", "HUMMHCP52-DONOR-1006", "HUMPRCA-DONOR-8366", "HUMTUBBM-DONOR-1376", "MACHBA-DONOR-443", "HUMALPI-DONOR-1766", "BABAPOE-DONOR-30", "HUMSAACT-DONOR-2210", "HUMMH6-DONOR-2841", "HUMPSAA-DONOR-4013", "HUMCSPB-DONOR-258", "HUMTFPB-DONOR-992", "HUMMYC3L-DONOR-1271", "HUMATP1A2-DONOR-20500", "HUMSHBGA-DONOR-3310", "HUMHBBAG-DONOR-1344", "HUMHBBGG-DONOR-2241", "ORAHBG2F-DONOR-269", "HUMPIM1A-DONOR-1594", "HUMAK1-DONOR-10237", "HUMPGAMMG-DONOR-1631", "HUMAPOC2-DONOR-522", "HUMAPOC2G-DONOR-225", "HUMAPOCII-DONOR-541", "HUMGHCSA-DONOR-51300", "HUMTUBAG-DONOR-506", "HUMCP21OHC-DONOR-2129", "HUMKAL2-DONOR-3781", "HUMTHB-DONOR-497", "HUMTHB-DONOR-4686", "HUMMHCW1B-DONOR-3250", "HUMGHCSA-DONOR-6212", "HUMGHN-DONOR-1544", "ORAHBG1F-DONOR-269", "HUMCP21OH-DONOR-3064", "HUMCP21OHC-DONOR-3066", "HUMMHCP52-DONOR-2531", "HUMH19-DONOR-2562", "HUMDES-DONOR-2326", "GCRHBBA1-DONOR-1260", "HUMALPHA-DONOR-1292", "HUMCYPIIE-DONOR-8260", "HUMDES-DONOR-2630", "HUMTFPB-DONOR-10205", "HUMTNFAB-DONOR-1345", "HUMMHANTLE-DONOR-3008", "HUMALPPD-DONOR-2254", "HUMH19-DONOR-2359", "HUMCP21OH-DONOR-2128", "HUMMHCP42-DONOR-3057", "HUMKEREP-DONOR-4033", "HUMA1GLY2-DONOR-1693", "HUMMHEA-DONOR-2795", "MACHBCA2-DONOR-463", "MACHBGA1-DONOR-465", "HUMP45C17-DONOR-2106", "HUMMHCW2B-DONOR-3252", "HUMMHCP52-DONOR-1594", "HUMSAA1A-DONOR-3240", "HUMRPS14-DONOR-3034", "HUMAPOAI1-DONOR-457", "HUMMHB27B-DONOR-3013", "HUMDES-DONOR-711", "HUMTHB-DONOR-9999", "HUMGHCSA-DONOR-28478", "HUMTPA-DONOR-28433", "HUMRPS14-DONOR-4214", "GCRHBBA1-DONOR-1590", "HUMPRCA-DONOR-2171", "HUMCAPG-DONOR-2299", "HUMAPOE4-DONOR-1884", "HUMBNPA-DONOR-600", "HUMH19-DONOR-2128", "HUMGAST2-DONOR-786", "HUMALPI-DONOR-2475", "HUMTPA-DONOR-31536", "HUMHMG14A-DONOR-4792", "GCRHBBA6-DONOR-795", "HUMADAG-DONOR-29909", "HUMCS1-DONOR-1304", "HUMGHCSA-DONOR-51042", "HUMADAG-DONOR-29022", "HUMSAACT-DONOR-2499", "HUMMHB27B-DONOR-2329", "HUMPRF1A-DONOR-1477", "HUMCS1-DONOR-961", "HUMGHCSA-DONOR-27878", "HUMGHCSA-DONOR-50700", "HUMSAA-DONOR-2706", "HUMMYCC-DONOR-5248", "HUMCS1-DONOR-1561", "HUMTHB-DONOR-4453", "HUMGH-DONOR-1319", "HUMAK1-DONOR-6743", "HUMMT2A-DONOR-421", "HUMMHCP42-DONOR-3993", "HUMCKMT-DONOR-523", "HUMDES-DONOR-7366", "HUMAPOAIT-DONOR-201", "HUMALPHA-DONOR-2359", "HUMTPA-DONOR-23748", "HUMHST-DONOR-3582", "HUMTHB-DONOR-7339", "HUMACCYBA-DONOR-2781", "HUMACCYBB-DONOR-2715", "HUMPPPA-DONOR-965", "HUMMHANTLE-DONOR-2150", "HUMTKRA-DONOR-698", "HUMGHCSA-DONOR-13807", "HUMALPPD-DONOR-1980", "CHPIGECA-DONOR-903", "ORAIGECA-DONOR-904", "HUMCSFGMA-DONOR-1746", "HUMCP21OH-DONOR-2877", "HUMMHCP42-DONOR-3806", "HUMGHCSA-DONOR-5955", "HUMGHN-DONOR-1286", "HUMGH-DONOR-1061", "HUMATP1A2-DONOR-9834", "HUMMH6-DONOR-1225", "HUMTPA-DONOR-32565", "HUMPSAA-DONOR-691", "HUMALPHA-DONOR-1542", "HUMRASH-DONOR-2504", "HUMMRP8A-DONOR-1504", "HUMMIS-DONOR-593", "HUMATP1A2-DONOR-21268", "HUMGHCSA-DONOR-28221", "HUMPRCA-DONOR-5088", "HUMPLPSPC-DONOR-628", "HUMGPIBAA-DONOR-2800", "HUMMH6-DONOR-2602", "HUMEDHB17-DONOR-2199", "HUMCKMT-DONOR-1326", "HUMSPRO-DONOR-1947", "HUMGHCSA-DONOR-5626", "HUMGHN-DONOR-959", "HUMMIS-DONOR-1328", "HUMUBILP-DONOR-912", "BABAPOE-DONOR-867", "HUMATP1A2-DONOR-19518", "HUMANT1-DONOR-3442", "HUMRPS17A-DONOR-171", "HUMKER18-DONOR-3791", "HUMTNFAB-DONOR-4431", "HUMTNFX-DONOR-726", "HUMMHCW2B-DONOR-1675", "HUMCP21OHC-DONOR-2878", "HUMGHCSA-DONOR-42885", "HUMKER18-DONOR-5719", "HUMFOS-DONOR-2005", "HUMERPA-DONOR-2453", "HUMCFVII-DONOR-1690", "HUMGHCSA-DONOR-43141", "HUMRASH-DONOR-1745", "HUMKER18-DONOR-2967", "HUMADAG-DONOR-32886", "HUMMHB27B-DONOR-1478", "HUMMHCW1B-DONOR-1674", "HUMPOMC-DONOR-4597", "HUMH19-DONOR-2766", "HUMATP1A2-DONOR-13749", "HUMCFVII-DONOR-9499", "HUMCP21OHC-DONOR-918", "HUMMHCP42-DONOR-1848", "HUMMHCP52-DONOR-382", "HUMGHVA-DONOR-488", "HUMTHB-DONOR-20241", "HUMKEREP-DONOR-3577", "HUMKER18-DONOR-5022", "HUMADAG-DONOR-34454", "HUMANFA-DONOR-1112", "HUMMYCRT-DONOR-888", "HUMCKMT-DONOR-2233", "HUMERP-DONOR-1310", "HUMTPA-DONOR-26070", "HUMAPRTA-DONOR-881", "HUMALPI-DONOR-2199", "HUMTPA-DONOR-19780", "HUMALPPD-DONOR-309", "HUMMHEA-DONOR-3725", "HUMCYPIIE-DONOR-7126", "HUMAPOC2-DONOR-2976", "HUMAPOC2G-DONOR-2684", "HUMIRBPG-DONOR-5127", "HUMAK1-DONOR-5540", "HUMKAL2-DONOR-539", "HUMAPRT-NEG-421", "HUMMT2A-DONOR-1113", "HUMGFP40H-DONOR-880", "HUMALPHA-DONOR-698", "HUMKER18-DONOR-4285", "HUMGHCSA-DONOR-42555", "ATRINS-NEG-841", "HUMGCB1-DONOR-5411", "HUMTHB-ACCEPTOR-4347", "HUMMYLCA-DONOR-952", "HUMALPI-DONOR-1389", "HUMTHB-DONOR-9328", "HUMG0S19B-DONOR-3063", "HUMMETIF1-DONOR-1131", "HUMGCB1-DONOR-5974", "HUMMHCD8A-DONOR-638", "HUMCFVII-DONOR-6562", "HUMCYC1A-DONOR-2769", "HUMTNFAB-DONOR-950", "HUMGHCSA-DONOR-12784", "HUMGHCSA-DONOR-42122", "HUMKER18-DONOR-5233", "HUMMHCD8A-DONOR-2098", "HUMATP1A2-DONOR-21929", "HUMCACY-DONOR-2132", "HUMA1ATP-DONOR-1972", "HUMAK1-DONOR-3959", "HUMGGTA-NEG-661", "HUMMHDRHA-DONOR-4442", "HUMPLAPL-DONOR-797", "HUMCYC1A-DONOR-3019", "HUMPLPSPC-DONOR-1953", "HUMAPOCIA-DONOR-496", "HUMTHB-DONOR-2021", "HUMRASH-DONOR-2191", "ORAIGECA-DONOR-378", "HUMCRPG-DONOR-300", "HUMMRP14A-DONOR-999", "HUMAPOA4A-DONOR-1509", "HUMMHDOB-DONOR-4235", "HUMSODA-DONOR-3967", "HUMDES-NEG-7201", "HUMACCYBA-DONOR-2504", "HUMCS1-DONOR-535", "HUMGHCSA-DONOR-50275", "HUMGHCSA-DONOR-5205", "HUMATP1A2-DONOR-15496", "HUMMRP14A-DONOR-1551", "HUMGCB1-DONOR-3526", "HUMCACY-DONOR-1386", "HUMTHB-DONOR-1728", "HUMALDH03-NEG-1", "HUMALPPD-DONOR-100", "HUMPRCA-DONOR-5555", "HUMTPA-DONOR-30972", "HUMGHCSA-DONOR-13226", "HUMKEREP-DONOR-2153", "HUMCRYGBC-DONOR-2468", "HUMADAG-DONOR-31274", "HUMCSPB-DONOR-1408", "HUMAPOC2-DONOR-3303", "HUMAPOC2G-DONOR-3011", "HUMTPA-DONOR-18038", "HUMRETPON-NEG-1261", "HUMPGK1-NEG-181", "HUMMHCP42-DONOR-2869", "HUMMHCP52-DONOR-1406", "HUMSPRO-DONOR-2998", "HUMCYPIIE-DONOR-9284", "HUMTPA-DONOR-22213", "HUMSPRO-DONOR-1735", "HUMGH-DONOR-315", "HUMPRCA-NEG-8881", "HUMSHBGA-DONOR-4456", "HUMMH6-DONOR-3506", "HUMTHB-DONOR-10723", "HUMSPRO-DONOR-3348", "HUMHPARS1-DONOR-2602", "HUMCKMT-DONOR-4483", "TARHBB-DONOR-1560", "HUMVPNP-DONOR-315", "HUMMHCW1B-DONOR-3405", "HUMMHCW2B-DONOR-3407", "HUMIRBPG-DONOR-7130", "HUMCFVII-DONOR-8418", "HUMTHB-DONOR-8003", "HUMCFVII-DONOR-556", "HUMCR1R-NEG-2581", "HUMSHBGA-DONOR-3802", "HUMPIM1A-ACCEPTOR-1543", "HUMPSAP-DONOR-1188", "HUMSPRO-DONOR-2585", "HUMAPOAI1-DONOR-716", "HUMATP1A2-DONOR-10208", "HUMPLPSPC-DONOR-2287", "HUMCYC1A-ACCEPTOR-2108", "HUMPPPA-DONOR-1912", "HUMRPS14-DONOR-2259", "HUMCP21OHC-DONOR-2499", "HUMMHCP42-DONOR-3427", "HUMMHCP52-DONOR-1964", "HUMACTGA-ACCEPTOR-2139", "HUMIL1B-DONOR-941", "HUMCYC1A-ACCEPTOR-2858"), ValueGroup("HUMALPI1-DONOR-42", "HUMGLTH1-NEG-241", "HUMFGFB-NEG-181", "HUMNAKATP1-NEG-721", "HUMRASH-NEG-601", "HUMIGF2AP-NEG-421", "HUMRASSK2-NEG-1381", "HUMINT2-NEG-481", "HUMPDGA1-NEG-481", "HUMSRTR2A-NEG-61", "HUMARSBX-NEG-421", "HUMADRA-NEG-961", "HUMCSF1G1-NEG-361", "ORARGIT-NEG-241", "HUMUPAA-NEG-1501", "HUMCHROMB-NEG-421", "HUMPLG2B-NEG-181", "HUMPGRR-NEG-1681", "HUMMHAW-NEG-1201", "HUMHBA4-NEG-6601", "HUMATP1A2-NEG-24901", "HUMSISM-NEG-241", "HUMCOL4A2-NEG-2581", "HUMKERMII-NEG-181", "HUMETS2PR-NEG-901", "HUMMG2-NEG-541", "HUMFESFUR-NEG-3481", "HUMTHYR01-NEG-181", "HUMEDHB17-NEG-3541", "ORAHBA01-NEG-121", "HUMRENT1-NEG-601", "HUMMHB27B-NEG-1201", "HUMASP-NEG-541", "HUMTSG14A-NEG-301", "HUMA1ATP-NEG-301", "HUMTIR-NEG-901", "HUMNARIA-NEG-3841", "HUMICAM1-NEG-481", "HUMERSP2B-NEG-781", "HUMEL01-NEG-901", "HUMADRA2R-NEG-2581", "HUMCOL4A6-NEG-301", "HUMSB3B2-NEG-121", "HUMOAT01-NEG-1", "HUMGSTPIG-NEG-1381", "HUMTFP-NEG-61", "HUMEF2AB-NEG-721", "HUMAPOA4-NEG-61", "HUMSISG1-NEG-5401", "HUMCG2A1-NEG-1", "HUMHKR42-NEG-181", "HUMMHBW4-NEG-61", "HUMMYHC08-NEG-421", "HUMLAMC-NEG-181", "GCRHBBA1-NEG-61", "HUMIGMUTDM-NEG-361", "HUMC4A1-NEG-2761", "HUMGIR-NEG-121", "HUMATP1A2-NEG-2101", "HUMMHDRO2-NEG-2461", "HUMCOL11A2-NEG-1861", "HUMOCS1-NEG-1", "HUMANK-NEG-1981", "HUMRNP70K-NEG-1141", "HUMET-NEG-301", "HUMMHCAR4-NEG-241", "HUMCCK2-NEG-361", "HUMBCL3AA-NEG-1381", "HUMAPOBG-NEG-121", "HUMCOL2A1-NEG-961", "HUMCINHP-NEG-121", "HUMFESFPS-NEG-6661", "HUMVWFA24-NEG-781", "HUMRYR-NEG-10201", "HUMCP21OHC-NEG-3061", "HUMMHCBWC-NEG-121", "HUMU170SC-NEG-481", "HUMAPRTA-NEG-1981", "HUMCD1A-NEG-1261", "ORAINVOL-NEG-2161", "HUMCFXII3-NEG-3061", "HUMGP-NEG-1321", "HUMCKBBA-NEG-481", "HUMMYONM-NEG-1501", "HUMAK1-NEG-10921", "HUMCG5B-NEG-1261", "HUMRGE-NEG-1801", "HUMMHCB44A-NEG-361", "HUMKER654-NEG-1441", "HUMTCBCC-NEG-1981", "MACHBB-NEG-4141", "HUMLYL1B-DONOR-882", "MACAPOE-NEG-181", "HUMMHCACA-NEG-181", "HUMPLPSPC-NEG-1801", "HUMIGLAM1-NEG-121", "HUMSNU1P2A-NEG-301", "HUMDB1A-NEG-121", "HUM5FDX-NEG-2041", "HUMRYR-NEG-14761", "HUMPROD4-NEG-721", "HUMGENX-NEG-2341", "HUMLDLRRL-NEG-11761", "HUMHSP70D-NEG-1441", "HUMBCRD-NEG-301", "HUMPROD1-NEG-1741", "HUMMHCW1B-ACCEPTOR-1398", "HUMATP1A2-NEG-20341", "CHPRGITX-NEG-1441", "HUMSERDHY-NEG-241", "HUMEAR2-NEG-1381", "HUMMHDRBC-NEG-661", "HUMMHDRBK-NEG-1321", "HUMELANA-NEG-1261", "HUMINT2-NEG-5041", "HUMALPPB-NEG-781", "CHPAZGLO-NEG-1", "HUMTRK2H-NEG-1081", "HUMSPM1-NEG-181", "HUMCKMM1-NEG-1921", "HUMGALAB-NEG-901", "HUMRCC1B-NEG-361", "HUMARF1A-NEG-301", "HUMCSIST-NEG-2221", "HUMGFIIM-NEG-121", "HUMCP21OH-NEG-1981", "HUMMPO-NEG-301", "HUMOPS-NEG-181", "HUMKERP2-NEG-1381", "HUMBAT2A-NEG-2761", "HUMP42LA-NEG-2161", "HUMNMYCA-NEG-4561", "HUMMHDPBF-NEG-61", "HUMMHHLA-NEG-241", "HUMBHAA-NEG-1261", "HUMSAP1-NEG-121", "HUMPRC1-NEG-61", "HUMGLYSA-NEG-2761", "HUMTPA-NEG-8761", "HUMFCERA-NEG-121", "HUMFVII-ACCEPTOR-136", "HUMC3A1R-NEG-2941", "HUMMETIA-NEG-1921", "HUMMLC2A-NEG-1021", "HUMUPAX-NEG-3541", "HUMHBB-NEG-5761", "HUMCRYGBC-NEG-3781", "HUMHER2A-NEG-3301", "HUMDRD2A-NEG-841", "HUMHLA1EA-NEG-121", "HUMAK1-NEG-6361", "HUMMYHC5-NEG-1141", "HUMALDC-NEG-61", "HUMPRCM-NEG-1681", "HUMATP-NEG-1981", "HUMC1A1-NEG-3901", "AGMORS12A-NEG-181", "HUMAMYA1-NEG-541", "HUMTHR-NEG-1501", "HUMACCYBB-NEG-3481", "HUMAPLA-NEG-1261", "HUMPAIB1-NEG-61", "HUMCSPCP-NEG-1261", "HUMTHB-NEG-17281", "HUMITF1-NEG-601", "HUMA1AR2-NEG-3601", "HUMMHAWB-NEG-721", "HUMTM-NEG-1441", "HUMBAT3B1-NEG-421", "HUMPPOL-NEG-1081", "HUMMHB27B-ACCEPTOR-1202", "HUMAPOA4C-NEG-2761", "HUMRENA3-NEG-361", "HUMGNPAS-NEG-61", "HUMHSP90-NEG-1", "HUMMYCF1-NEG-1081", "HUMIGFII5-NEG-121", "HUMNFH3-NEG-1", "HUMTBBM40-NEG-841", "HUMBMP1A-NEG-2161", "HUMMH6-ACCEPTOR-1451", "HUMP68-NEG-1081", "HUMHCR-NEG-661", "HUMFNRA-NEG-361", "HUMCYC1A-NEG-4381", "HUMIHRP-NEG-541", "HUMPGKA09-NEG-241", "HUMMHCW3-NEG-2761", "HUMCA2VR-NEG-361", "HUMPOMC-NEG-421", "HUMMBP17K-NEG-361", "HUMIGHBU-NEG-301", "HUMCD7-NEG-241", "HUMPEP7-NEG-1", "HUMALIPOA-NEG-781", "HUMHMPFK-NEG-901", "HUMMHDC3B-NEG-2461", "HUMPIGF2A-NEG-301", "HUMIDB-NEG-241", "GCRHBBA6-ACCEPTOR-920", "HUMMHCCWB-NEG-241", "HUMHEXB01-NEG-481", "HUMMHANTLE-ACCEPTOR-1874", "HUMHA44G-NEG-2701", "HUMHKB3R-NEG-121", "HUMKER19-NEG-541", "HUMCOLII2-NEG-781", "HUMASGPR1-NEG-661", "GCRRSAGAB-NEG-121", "HUMMHANTLE-NEG-3181", "HUMGPMSS-NEG-1501", "HUMPGAMMG-NEG-2221", "HUMC4A2-NEG-3721", "HUMFIL-NEG-841", "HUMPSAA-NEG-2521", "HUMPAIA-NEG-9781", "HUMMYCE1-NEG-421", "HUMACTGP1-NEG-481", "HUMSPIA04-NEG-61", "HUMTHBNA-NEG-961", "HUMDCC-NEG-1561", "MACPEPSG-NEG-2161", "HUMXEH-NEG-1441", "HUMGFIA1-NEG-841", "HUMERG11-NEG-181", "HUMMHTRP-NEG-661", "HUMGHCSA-NEG-24841", "HUMALIPOA-NEG-9901", "HUMTGLU-NEG-781", "HUMDBH01-NEG-1321", "HUMLDL100-NEG-1381", "HUMPCNA-NEG-301", "HUMCG1PA1-NEG-1501", "HUMRASFA2-NEG-1321", "HUMHLADP4-NEG-721", "HUMTHBNB-NEG-3541", "HUMTHY1A-ACCEPTOR-517", "HUMMH6-NEG-1201", "HUMTBB5-NEG-7801", "HUMMHCP41-NEG-541", "HUMIGHVA-NEG-481", "HUMFCRHA-NEG-661", "HUMAPOA4A-NEG-3241", "HUMFVII-DONOR-70", "HUMCETP6-NEG-481", "HUMPRF1A-NEG-3601", "HUMGCB-NEG-2281", "HUMFVII-NEG-721", "HUMCG4A11-NEG-961", "HUMUKPM-NEG-781", "HUMADAG-ACCEPTOR-31146", "HUMSPTA1A-NEG-1861", "HUMMHDQBR-NEG-601", "HUMTCRDA4-NEG-661", "HUMT28E-NEG-121", "HUMDMP-NEG-3481", "HUMDES-ACCEPTOR-7283", "HUMIGKVJ2-NEG-961"), ValueGroup("HUMBCL2A-NEG-1", "HUMPYGM09-NEG-1", "HUMRGNTSA-NEG-2161", "HUMPDGFA1-NEG-841", "HUMPDGFB1-NEG-1381", "CHPCD4A-NEG-961", "HUMTRANSB-NEG-181", "HUMRASR2-NEG-1621", "ORAHBPSBD-NEG-2881", "HUMIIP-NEG-61", "HUMCENPBR-NEG-901", "HUMMYCBL1-NEG-1", "HUMGFIA2-NEG-3961", "HUMANTP53-NEG-121", "HUMMYCLYA-NEG-781", "HUMCYPBX3-NEG-241", "HUMHMGIB-NEG-721", "HUMPRCA-NEG-4321", "HUMFUR1-NEG-3901", "HUMMHB39W-NEG-1", "HUMMHCW2B-NEG-3721", "HUMFOLLI1-NEG-2041", "HUMMHCAI-NEG-1", "HUMSISPDG-NEG-2581", "HUMLSZH-NEG-841", "HUMATP1A2-NEG-6661", "HUMINSPR-NEG-61", "CHPINVOL-NEG-1", "HUMTGFBG1-NEG-1801", "HUMMHDQBC-NEG-841", "HUMMYCBLK-NEG-781", "HUMG3PD-NEG-481", "HUMMHDRHA-NEG-5341", "HUMRBP1-NEG-421", "HUMINT1G-NEG-601", "HUMTUBBM-NEG-2281", "HUMMSOD-NEG-61", "HUMIL2RX6-NEG-1", "HUMIGCC4-NEG-781", "HUMTGFB-NEG-541", "HUMLCK-NEG-181", "HUMMHADX2-NEG-1201", "HUMCPA4-NEG-601", "HUMGAA-NEG-781", "HUMHSP90B-NEG-6541", "HUMTKR-NEG-2641", "HUMKAL2-NEG-1741", "HUMPAIA-NEG-14341", "HUMHMGYB-NEG-1561", "HUMMUCC-NEG-121", "HUMAK1-NEG-1801", "CHPCLA108-NEG-841", "HUMENKPH1-NEG-1141", "HUMPKLR-NEG-1441", "HUMBHA05-NEG-61", "HUMRENA4-NEG-2461", "HUMGRP78-NEG-421", "HUMTROPA3-NEG-901", "HUMTHMA-NEG-121", "HUMHST-NEG-3961", "HUMANTCE-NEG-841", "HUM4F2HG1-NEG-1441", "HUMCRYGBC-NEG-22021", "HUMSPR1B-NEG-181", "HUMFERG2-NEG-1", "HUMMHA3-NEG-1501", "HUMIRBPM-NEG-3241", "HUMMYCNH-NEG-1", "HUMALDCG-NEG-1441", "HUMTCBJD-NEG-361", "HUMDES-NEG-2641", "HUMASFB-NEG-1021", "HUMALPL01-NEG-721", "HUMMYCTR-NEG-2041", "HUMEPISIB2-NEG-1081", "HUMAPOBA-NEG-4141", "HUMARAF1R-NEG-1381", "HUMTHYB10-NEG-181", "HUMIL1RA-NEG-2821", "HUMAGG-NEG-541", "HUMACCYBA-NEG-2521", "HUMIL2R-NEG-1", "HUMBN51-NEG-1261", "HUMCSFGM-NEG-1", "HUMPAIA-NEG-661", "HUMC1INHI-NEG-121", "HUMVWFA17-NEG-301", "HUMCFVII-NEG-661", "HUMBCR22I-NEG-1681", "HUMMHB17W-NEG-1681", "HUMTPI-NEG-61", "HUMIGFIIR-NEG-6841", "HUMMHDRA2-NEG-241", "HUMASA-NEG-61", "HUMTPIPSC-NEG-61", "HUMTCBXF-NEG-541", "HUMAPOCIB-NEG-1441", "BABT1GL-NEG-61", "HUMAPC3G5-NEG-481", "HUMPRB1-NEG-601", "HUMMHDOB-NEG-4261", "HUMCGB-NEG-181", "HUMTACEB-NEG-241", "HUMERB2R-NEG-2521", "HUMALPI-NEG-3901", "HUMPSAP-NEG-2401", "HUMPKCB1A-NEG-1321", "HUMPKCB2A-NEG-1321", "HUMNCAMB-NEG-661", "HUMPLAKO-NEG-1861", "HUMBAT3B2-NEG-4141", "HUMCFVII-NEG-5221", "HUMVWFA04-NEG-301", "HUMESTR-NEG-2161", "HUMCATR-NEG-1081", "HUMVWFA29-NEG-661", "HUMPS2G3-NEG-421", "HUMP15095-NEG-4021", "HUMTPA-NEG-22441", "HUMIGLPAV-NEG-301", "HUMC8GA-NEG-241", "HUMSTSB-NEG-1021", "HUMIGKAR-NEG-181", "HUMTHYRR5-NEG-781", "HUMB2M2-NEG-1201", "HUMLKHA4-NEG-61", "HUMALP5-NEG-781", "HUMUMOD-NEG-1621", "HUMGAPJR-NEG-961", "HUMMYHC04-NEG-1", "HUMMYCST-NEG-1", "HUMPOVRA-NEG-541", "BABAPOE-NEG-1201", "HUMMYLCA-NEG-3181", "HUMMHB7A-NEG-1261", "HUMMPOA-NEG-1321", "HUMVIL2-NEG-1441", "HUMGLUTRN-NEG-1021", "HUMCN2-NEG-841", "HUMPOMC3-NEG-241", "HUMAPOAIT-NEG-661", "HUMTCDXA-NEG-1", "HUMEP-NEG-2461", "HUMNGFBA2-NEG-4021", "HUMSECP1-NEG-181", "HUMHLAB13-NEG-901", "HUMCN4GEL-NEG-2521", "HUMSODA-NEG-6361", "HUMINV2-NEG-121", "HUMAPB18-NEG-601", "HUMP45C17-NEG-4861", "HUMNGFR-NEG-2821", "HUMPEPD-NEG-421", "HUMTPA-NEG-17881", "HUMCRYGQ6-NEG-361", "HUMIL71-NEG-61", "HUMMHDRB-NEG-901", "HUMPC125-NEG-3181", "HUMIGFIIR-NEG-2281", "HUMIFNB3-NEG-1081", "HUMCYP2BA-NEG-2761", "HUMGLI-NEG-901", "HUMGLCB-NEG-661", "HUMUKI3-NEG-2941", "HUMLPHAA-NEG-1861", "HUMNGFBA1-NEG-2341", "HUMA2PIBB-NEG-421", "HUMTPA-NEG-27001", "HUMFVIIIB-NEG-4681", "HUMSPARC07-NEG-1", "HUMBCL2C-NEG-3061", "HUMGPIBA-NEG-1561", "HUMTCRAI2-NEG-61", "HUMPSPBA-NEG-601", "HUMAPOAI1-NEG-721", "HUMAPOE4-NEG-3301", "HUMTPO08-NEG-601", "HUMBLAST1-NEG-661", "HUMHAPRA-NEG-481", "HUMCYP450-NEG-2761", "HUMTOPI-NEG-1261", "HUMADAG-NEG-27241", "HUMGCB1-NEG-4201", "HUMGNAZ-NEG-961", "HUMLAM1B-NEG-121", "HUMGIAA-NEG-901", "HUMGASTA-NEG-6241", "HUMIGB7-NEG-1381", "HUMFESFPS-NEG-2101", "HUMMHDBW1-NEG-1", "HUMSAA-NEG-2821", "HUMRAF19-NEG-721", "HUMACTSM7-NEG-61", "HUMMRNAEN-NEG-1681", "HUM4COLA-NEG-1021", "HUMMHDOAG1-NEG-61", "HUMIMP-NEG-181", "HUMEGFRBB3-NEG-1201", "HUMMHDRGB-NEG-121", "HUMP45C21-NEG-901", "HUMLCATG-NEG-3841", "HUMATPAR-NEG-3241", "HUMMET2AB-NEG-61", "HUMGYPCDC-NEG-421", "HUMIGCB4-NEG-61", "HUMTP53B-NEG-1021", "HUMIL4-NEG-301", "HUMGSTPI-NEG-301", "HUMPC2A-NEG-481", "HUMGPP3A08-NEG-3721", "HUMPRA403-NEG-1", "HUMIGVKW1-NEG-841", "HUMD2A-NEG-841", "HUMMHBIA-NEG-541", "HUMMRP14A-NEG-1561", "HUMSATM29B-NEG-181", "HUMSRC2B-NEG-61", "MACHBPEA-NEG-2161", "HUMMYP-NEG-961", "HUMCYP2A4-NEG-1", "HUMLECGBB-NEG-61", "HUMAK1-ACCEPTOR-3918", "HUMIFNATC-NEG-661", "HUMALPHB-NEG-241", "HUMRASH-NEG-5161", "HUMCAMA-NEG-841", "HUMCYC1-NEG-961", "HUMTRGC64-NEG-61", "HUMGRFCIG-NEG-1501", "HUMKADA-NEG-601", "CHPHPA2-NEG-1", "HUMPOLP-NEG-2341", "HUMTCBCE-NEG-1621", "HUMCANPR-NEG-841", "HUMALPPD-DONOR-1715", "HUMADAG-NEG-18121", "HUMIL2RBC-NEG-1681", "HUMTPO-NEG-2401", "HUMNMCY1A-NEG-661", "HUMPDGFRAA-NEG-3421", "HUMGELS3-NEG-121", "HUMLDLRRL-NEG-7201", "HUMMHDCB-NEG-6601", "HUMALPHA-NEG-181", "HUMAE1-NEG-2101", "HUMTCRT3D-NEG-1081", "HUMTCAYJ-NEG-181", "HUMCYP45C-NEG-3781", "HUMGLYPL-NEG-1081", "HUMALPHA-DONOR-2096", "HUMHP2FS-NEG-1021", "HUMCYPX1-NEG-4201", "HUMINSRC-NEG-6661", "HUMGFIBPA-NEG-1441", "HUMINT2-NEG-9601", "HUMRFPA-NEG-841", "HUMFXIIIA-NEG-181", "HUMMHEA-NEG-2041", "HUMIGKCB-NEG-1081", "HUMNGFB-NEG-9361", "HUMPLPSPC-ACCEPTOR-2176", "HUMATCT4-NEG-1561", "HUMPFKM23-NEG-1621", "HUMMHDRBS-NEG-601", "HUMVWFR-NEG-3301", "HUMAMIPEP-NEG-3361", "HUMHBGF1-NEG-601", "HUMFX-NEG-241", "HUMRASAA-NEG-481", "HUMSISG1-NEG-841", "HUMALPI1-ACCEPTOR-124", "HUMADH21C-NEG-541", "HUMUBPSE1-NEG-661", "HUMMHCP42-DONOR-2679", "HUMMHCP52-DONOR-1216", "HUMHBA1-NEG-841", "HUMGLUD1-NEG-721", "HUMPRP-NEG-1", "HUMCNPG3-NEG-121", "HUMINSRA-NEG-4201", "HUMSYNPR-NEG-61", "HUMLAP-NEG-1141", "HUMAPOBB-NEG-8101", "HUMACTGA-NEG-2641", "HUMATPB-NEG-841", "HUMGHCSA-NEG-43081", "HUMMCR-NEG-781", "HUMIGF27-NEG-1", "HUMCR1-NEG-241", "HUMCMOSAA-NEG-121", "HUMA1ACMB-NEG-421", "HUMVWF01-NEG-2101", "HUMPDHA-NEG-1141", "HUMSPTA1A-NEG-6421", "HUMFESFPS-NEG-11221", "HUMC1A2-NEG-661", "HUMGASTA-NEG-1681", "HUMTCBJC-NEG-721", "HUMSNRNP1-NEG-301", "HUMHBB32-NEG-121", "HUMMHDQOB-NEG-1", "HUMFAV-NEG-3961", "HUMCRYGBC-NEG-17461", "HUMMCM-NEG-481", "HUMSPBAA-NEG-3061", "HUMSLK-NEG-841", "HUMHBA4-NEG-2041", "HUMCFVII-NEG-9781", "HUMPAIA-NEG-5221", "HUMTNFAB-DONOR-1537", "HUMA1ATR-NEG-781", "HUMASL1A-NEG-961", "HUMARC6-NEG-61", "HUMBAT2B4-NEG-1441", "HUMCTSE-NEG-661", "HUMFCGRB-NEG-61", "HUMHLADPB-NEG-13321", "HUMHLA1PS-NEG-3061", "HUMUKA-NEG-1501", "HUMC1RX-NEG-1201", "HUMMHDQAW-NEG-301", "HUMFNMC-NEG-3481", "HUMFBRG-NEG-7201"), ValueGroup("HUMIGJHI-NEG-61", "HUMERYA-NEG-661", "HUMHTV1A-NEG-121", "HUMGFP40H-NEG-1201", "HUMMDNCF-NEG-1141", "HUMVTNR-NEG-5641", "HUMCBP-NEG-1321", "HUMIFNAA-NEG-1621", "HUMF13A15-NEG-1981", "HUMPTPASE-NEG-1801", "HUMGHCSA-NEG-15721", "HUMPTHLHA-NEG-1381", "HUMCREB-NEG-2101", "HUMVIPMR5-NEG-121", "HUMHPRTB-NEG-21841", "HUMGCRB-NEG-3481", "HUMHISH3C-NEG-781", "HUMPOMC-NEG-4981", "HUMDBPG-NEG-1141", "HUMRASK25-NEG-361", "HUMAFP-NEG-4141", "HUMFVIII-NEG-2821", "HUMGHR-NEG-841", "HUMMHCD8A-NEG-3781", "HUMGNAS6-NEG-1", "HUMALBGC-NEG-18481", "HUMTM30R-NEG-1441", "TARHBB-NEG-541", "HUMAMYA3-NEG-721", "HUMCRYABA-NEG-3301", "HUMANT1-NEG-2881", "HUMVWFA31-NEG-3961", "HUMFIXG-NEG-8221", "HUMFBRG-NEG-2641", "HUMHPRTB-NEG-12721", "HUMFNRB-NEG-1081", "HUMNFLG-NEG-1861", "HUMGLA-NEG-8281", "HUMTPA-NEG-36121", "HUMNAKATPA-NEG-3061", "HUMGLUCG2-NEG-1621", "HUMPA2I-NEG-541", "HUMPALF1-NEG-781", "HUMHLADZA-NEG-5281", "HUMPLPA-NEG-2041", "HUMIL2-NEG-2401", "GORHBBPG-NEG-181", "HUMEGFRER-NEG-901", "HUMLC3-NEG-901", "HUMMCAF-NEG-661", "CHPRSALPI-NEG-481", "CHPHBBPCH-NEG-301", "HUMHPARS1-NEG-241", "HUMGYPA06-NEG-421", "HUMAPOB10-NEG-301", "HUMAMYB-NEG-2041", "HUMCHEBG2-NEG-1", "HUMTCRDA-NEG-1741", "HUMPGK2P-NEG-361", "AGMORS9A-NEG-481", "HUMADH3G1-NEG-841", "HUMIFNRG-NEG-1561", "HUMAPOBB-NEG-3541", "HUMGLA-NEG-3721", "HUMGDHR-NEG-841", "HUMIL2A-NEG-1261", "HUMPRPC4B-NEG-541", "HUMCALC1-NEG-1561", "HUMAPOB4A-NEG-241", "HUMCOL3A1A-NEG-661", "HUMGNAI1-NEG-1201", "HUMIL2A-NEG-5821", "HUMELIA-NEG-2641", "HUMABL1B-NEG-1621", "HUMIFNAGS-NEG-5281", "HUMIFNAGS-NEG-9841", "HUMFERHX-NEG-481", "HUMIFNN-NEG-601", "HUMIGVKA2-NEG-601", "HUMTNFA-NEG-3241", "HUMSIGMG3-NEG-301", "HUMGBR-NEG-2521", "MACHBPSBD-NEG-1141", "HUMHBEG-NEG-721", "HUMPYHBASA-NEG-2641", "HUMHLASBA-NEG-14281", "MNKHAPSE-NEG-5461", "HUMPTH2-NEG-841", "HUMFAPYSY-NEG-541", "HUMFIBEDA-NEG-2641", "HUMHRGA-NEG-481", "HUMDYZ1-NEG-3301", "HUMSPRO-NEG-1321", "HUMCDW44A-NEG-1261", "HUMGKAS-NEG-1441", "HUMERSP1A-NEG-241", "HUMIGKREC-NEG-1", "HUMFVIIIC-NEG-121", "HUMGPP3A17-NEG-1081", "GORHBPSBD-NEG-2281", "HUMFOLMES-NEG-721", "HUMHK1A-NEG-3421", "CHPGGGLOG-NEG-961", "HUMC7A-NEG-1861", "HUMTCRGE-NEG-121", "HUMALBGC-NEG-4801", "HUMUPIR1-NEG-841", "HUMIFNAGS-NEG-721", "HUMALPSAT-NEG-541", "HUMAACT-NEG-481", "HUMCAATP3-NEG-61", "HUMFIXG-NEG-3661", "HUMFIXG-NEG-35581", "HUMKER18-NEG-1381", "HUMCYAR03-NEG-301", "HUMFABP-NEG-661", "HUM3ALPH-NEG-1", "HUMALBAF1-NEG-301", "HUMAREPAP-NEG-61", "HUMHBEG-NEG-5281", "HUMCRPGP-NEG-481", "CHPHBBC-NEG-1321", "HUMCH15M-NEG-121", "HUMDBLTP-NEG-1081", "HUMSATAX-NEG-1441", "HUMRNPLA-NEG-181", "HUMKER18-NEG-5941", "HUMMETPOA-NEG-901", "HUMAFP-NEG-8701", "HUMHLASBA-NEG-5161", "HUMRSCNTF-NEG-61", "HUMMMTVPOL-NEG-421", "HUMHBEG-NEG-9841", "HUMCYCPSJ-NEG-361", "HUMHBB-NEG-10321", "HUMPIP-NEG-241", "HUMHPS12-NEG-1621", "HUMTHEP2-NEG-121", "HUMKUP-NEG-1021", "HUMTPA-NEG-13321", "GIBHBGGL-NEG-7861", "HUMAFP-NEG-13261", "HUMHPRTB-NEG-26401", "HUMHBVINT-NEG-61", "HUMTPO11-NEG-241", "HUMSATAR2-NEG-181", "HUMALDA1-NEG-661", "HUMTPO06-NEG-1681", "HUMS71AA-NEG-3301", "HUMAPBM-NEG-3841", "HUMMYCL-NEG-1681", "HUMAMY107-NEG-1", "HUMMETIF-NEG-1681", "HUMAPA-NEG-841", "HUMCACY-NEG-3481", "HUMIL1BX-NEG-5461", "HUMABLLA-NEG-1", "HUMJNLTRA-NEG-121", "HUMAPOBA-NEG-8701", "HUMHPRTB-NEG-35521", "HUMRISDAD-NEG-3721", "HUMMHDQAR-NEG-1", "HUMIFNG-NEG-4321", "HUMREPAL1-NEG-2821", "HUMLDLR01-NEG-241", "HUMCERPS-NEG-1381", "HUMADRBR-NEG-1861", "HUMNGFB-NEG-4801", "HUMA1ATP-NEG-4861", "HUMMDR3-NEG-301", "ORAHBBPSE-NEG-2101", "HUMHLADPB-NEG-4201", "HUMM6PR-NEG-1861", "HUMGG-NEG-1021", "HUMMYHC-NEG-1381", "HUMIL2B-NEG-3241", "HUMDONT01-NEG-481", "HUMH38-NEG-661", "HUMCYPNO-NEG-961", "HUMCSYNA-NEG-2461", "HUMIRBPG-NEG-8341", "HUMVIP1-NEG-421", "HUMMHDRHA-NEG-781", "HUMMGA2PS-NEG-1861", "HUMGCB2-NEG-1141", "HUMCYPX4-NEG-2101", "HUMUG3PD-NEG-61", "HUMGLYCA4-NEG-181", "HUMC1A2-NEG-5221", "HUMTBB46P-NEG-421", "CHPRSA-NEG-2161", "HUMCR1L04-NEG-1", "HUMMAP2A-NEG-421", "HUMLIDNA-NEG-2701", "HUMG3PDP-NEG-1", "HUMC5B-NEG-3241", "HUMAREPBV-NEG-61", "HUMOAS06-NEG-181", "HUMCNR-NEG-2101", "HUMIL1AG-NEG-421", "HUMFBRGAB-NEG-1081", "HUMMHDQA1-NEG-661", "HUMIFNAD-NEG-841", "HUMHBB-NEG-28561", "HUMCIX-NEG-2521", "HUMIFNA01-NEG-421", "HUMA1GLY2-NEG-1501", "HUMLOX5A-NEG-2161", "HUMACHRA7-NEG-901", "HUMTCAYS-NEG-121", "HUMFIXG6-NEG-1", "HUMHPARS2-NEG-6961", "HUMTCRB22-NEG-241", "HUMPAI2A4-NEG-121", "HUMHBBGG-NEG-481", "HUMALDB1-NEG-7321", "HUMSTATHG2-NEG-4561", "HUMTGPL1-NEG-1201", "HUMCYCPSE-NEG-541", "HUMKITCR-NEG-1021", "HUMDYS-NEG-3841", "HUMHBBRT-NEG-1141", "HUMGHRA06-NEG-1", "HUMRPS17A-NEG-1561", "HUMTCBV81-NEG-541", "HUMRPHO2A-NEG-1861", "HUMUMPS-NEG-1561", "HUMHPP16C-NEG-541", "HUMTCRGAA-NEG-1", "HUMFIXA-NEG-781", "HUMHPRTB-NEG-8161", "HUMTKRA-NEG-3721", "MACHBPEA-NEG-6721", "HUMUBIBP-NEG-481", "HUMSAA1A-NEG-3901", "HUMAPOBF-NEG-5641", "HUMATP1A2-NEG-15781", "HUMCYES1-NEG-1501", "HUMTHYRR-NEG-5041", "HUMNRNPEP-NEG-1141", "HUMFXIIIC-NEG-1561", "HUMACTCA2-NEG-1381", "HUMPNMTA-NEG-421", "HUMIGKVS-NEG-421", "HUMMHCC6A-NEG-2401", "HUMTGC1-NEG-61", "HUMAMD-NEG-601", "HUMTRD117-NEG-301", "HUMTPO04-NEG-3121", "HUMCRBP2-NEG-421", "HUMHSP90B-NEG-1981", "HUMTFPC-NEG-241", "HUMTFRR-NEG-1741", "HUMTBG-NEG-301", "HUMSATA-NEG-361", "HUMODC-NEG-241", "HUMHSC70-NEG-1021", "HUMVPF-NEG-661", "HUMH1FNC1-NEG-1", "HUMFBRB3-NEG-1", "HUMAPOAIA-NEG-1", "HUMT1418-NEG-2161", "HUMMYCG2-NEG-1501", "HUMALDH2-NEG-1201", "HUMRSKP08-NEG-61", "HUMTYR-NEG-1141", "HUMB100DI-NEG-121", "HUMRCYP3-NEG-1081", "HUMNGFB-NEG-241", "HUMC4BP-NEG-241"), ValueGroup("HUMGHCSA-NEG-11161", "HUMFMSCPO-NEG-61", "HUMADAG-NEG-31801", "HUMHF10-NEG-241", "HUMHBB-NEG-65041", "HUMALBGC-NEG-13921", "HUMLCA-NEG-2521", "HUMTSHBA1-NEG-961", "HUMLDHA7-NEG-361", "HUMERBC-NEG-421", "HUMTGFB2A-NEG-2401", "HUMCYPB1-NEG-1141", "HUMTROPFB-NEG-421", "HUMMYHC10-NEG-1561", "HUMRBSA-NEG-1381", "HUMGRP78-NEG-4981", "HUMPRLR-NEG-1861", "HUMTGL7-NEG-181", "HUMUDPGT-NEG-1801", "HUMMAC1A-NEG-4021", "HUMCA1XIA-NEG-3421", "HUMSBLA-NEG-961", "HUM7SKP41-NEG-541", "HUMPMPCA-NEG-1141", "HUMNAKATP2-NEG-2161", "HUMFNI-NEG-121", "HUMCATF-NEG-721", "HUMAFLP2-NEG-61", "HUMF8L1B-NEG-2041", "HUMHPRTB-NEG-30961", "MACRSMB-NEG-1", "HUMRSLINE-NEG-2161", "HUMMHDR3-NEG-1441", "HUMTCGCF-NEG-121", "HUMCEAE-NEG-61", "LEMHBGA-NEG-361", "HUMPRPH2-NEG-3661", "GORRGITX-NEG-1", "HUMP47-NEG-721", "HUM7B2-NEG-901", "HUMGHCSA-NEG-65881", "HUMIL1AG-NEG-9541", "GORHBBPG-NEG-4741", "HUMPLASTA-NEG-1561", "HUMCS5-NEG-2161", "HUMHBARSA-NEG-1081", "HUMGHCSA-NEG-33961", "HUMEDF-NEG-841", "HUMRHOC9-NEG-121", "HUMAPB09-NEG-1", "HUMBSF2-NEG-4141", "HUMCYPBX1-NEG-541", "HUMCAM-NEG-121", "HUMALDOBG-NEG-1", "HUMCINHA-NEG-541", "HUMHBB-NEG-46801", "HUMTHRAA-NEG-361", "HUMMHSBB1-NEG-1561", "HUMRPS14-NEG-3421", "HUMPHH-NEG-541", "HUMCYAR10-NEG-1381", "HUMSNRNP9-NEG-121", "HUMHELB-NEG-2461", "HUMADAG-NEG-22681", "HUMERMTV2-NEG-301", "HUMIL2R6-NEG-1", "HUMHMG14A-NEG-4621", "HUMGP3A-NEG-1921", "HUMVWFA01-NEG-3961", "HUMRPS6A-NEG-601", "HUMITI2-NEG-481", "HUMAPOB2-NEG-12181", "HUMPDHE1B-NEG-301", "HUMAPOBF-NEG-1081", "HUMCD20-NEG-721", "HUMP3A-NEG-2461", "HUMC8B-NEG-241", "HUMIL1P-NEG-301", "HUMCYPHLP-NEG-421", "HUMHBB-NEG-14881", "HUMGMCSFG-NEG-2821", "HUMMEHR-NEG-361", "HUMALDB1-NEG-2761", "HUMENKPH2-NEG-3781", "HUMHPARS1-NEG-9361", "HUMRSH3-NEG-1621", "HUMHLASBA-NEG-601", "HUMHPA2BR-NEG-1261", "HUMIGKVI5-NEG-2101", "HUMPNU-NEG-721", "HUMTBB14P-NEG-1441", "HUMCARM-NEG-661", "AGMKPNRSB-NEG-1", "HUMTCR3G1-NEG-961", "GIBIL2LTR-NEG-241", "HUMHBB-NEG-19441", "HUMHMG17-NEG-301", "HUMTHYRR-NEG-481", "HUMMRP8A-NEG-1381", "HUMATH3U5-NEG-181", "HUMZFX-NEG-781", "CHPHPA3-NEG-2041", "HUMPLAP2A-NEG-481", "HUMHPRTB-NEG-49201", "HUMF13A05-NEG-121", "HUMINHBA-NEG-541", "HUMHPBA-NEG-901", "HUMCAIII1-NEG-181", "HUMMPRCI-NEG-601", "HUMPS12-NEG-1321", "HUMKALA-NEG-181", "HUMGFIAB5-NEG-421", "HUMTPO03-NEG-61", "HUMHODB2-NEG-301", "ORAHBBPSE-NEG-6661", "HUMMYH1R-NEG-1621", "HUMTOPII-NEG-1081", "HUMCNRAB-NEG-1741", "HUMGHCSA-NEG-2041", "HUMAPB21-NEG-7501", "HUMBPGM3-NEG-661", "HUMTDTA-NEG-781", "HUMREN02-NEG-121", "HUMGSTC-NEG-421", "HUMTBB5-NEG-3241", "HUMC5A2B-NEG-181", "HUMLYL1B-NEG-4081", "HUMANP70-NEG-361", "HUMARMA-NEG-961", "HUMMK562B-NEG-1", "HUMC1SAB-NEG-841", "HUMLAMB2-NEG-961", "HUMMDR1-NEG-2401", "HUMVWFA37-NEG-661", "HUMAPB21-NEG-2941", "HUMFIGRD-NEG-361", "HUMTROPSR-NEG-721", "HUMC21PLA-NEG-121", "HUMATPSY1-NEG-4561", "HUMHOM4-NEG-841", "HUMGHCSA-NEG-6601", "HUMPKCAMD-NEG-781", "HUMG0S19A-NEG-2341", "HUMREPA-NEG-1141", "HUMBGAL-NEG-781", "HUMSNRNP3-NEG-3901", "HUMUGU4CA-NEG-121", "HUMPRC7-NEG-961", "HUMER41-NEG-8221", "HUMTCSM-NEG-241", "HUMFGRINT-NEG-421", "HUMME491-NEG-601", "HUMHBB-NEG-60481", "HUMHMGCOA-NEG-2101", "HUMCYPSCC-NEG-1501", "HUMDBPC-NEG-2821", "HUMIGLV71-NEG-721", "HUMGPBPS-NEG-961", "CHPP44-NEG-481", "HUMGDHA-NEG-181", "HUMATPSYB-NEG-5401", "HUMLDHBR-NEG-121", "HUMHBB-NEG-69601", "HUMTCAXN-NEG-61", "HUMTCAJA-NEG-61", "HUMIFNB3-NEG-10201", "HUMARGL-NEG-241", "HUMSIGMG4-NEG-961", "HUMAPB03-NEG-1441", "HUMPAPA-NEG-2401", "HUMRIBIR-NEG-1681", "HUMCEAB-NEG-121", "HUMIFNF-NEG-601", "HUMIGKVI3-NEG-421", "HUMLDLRRL-NEG-2641", "HUMGLNRS-NEG-361", "HUMVTNR-NEG-1081", "HUMHMG14A-NEG-61", "HUMFOS-NEG-1501", "HUMZFY-NEG-2341", "HUMCSPB-NEG-61", "HUMMDRA3-NEG-961", "HUMDMDA-NEG-1201", "HUMHSPGC-NEG-2821", "HUMPRCA-DONOR-3601", "HUMAMY2A-NEG-361", "HUMCYCB-NEG-181", "CHPHBBPCH-NEG-4861", "HUMMTLMC2-NEG-781", "HUMICAM1A-NEG-1981", "HUMBDGALA-NEG-1441", "HUMRB1RA-NEG-3061", "HUMAPOCII-NEG-121", "HUMCRYGBC-NEG-12901", "HUMFNRAS-NEG-601", "HUMXYES2-NEG-1741", "HUMGPP3A07-NEG-1381", "HUMCY4ARO-NEG-541", "HUMAPB21-NEG-12061", "HUMPOLDNAA-NEG-3481", "HUMCALLA14-NEG-61", "HUMCYPIIE-NEG-3001", "HUMBLYM1-DONOR-127", "HUMERG2-NEG-1321", "HUMRSSA1B-NEG-601", "HUMCEALV-NEG-721", "HUMMHCW1C-NEG-781", "HUMPDGFR-NEG-721", "HUMKER7R-NEG-601", "HUMTPAR-NEG-361", "HUMFVIIIC-NEG-4681", "HUMRSALPI-NEG-181", "HUMCKMT-NEG-5881", "HUMDLDH-NEG-1021", "HUMLYRE-NEG-1801", "HUMCGPRA-NEG-1981", "HUMCR1-NEG-4801", "HUMINCP-NEG-3121", "HUMTPA-NEG-4201", "HUMVWFR1-NEG-3481", "HUMMHDRBX-NEG-61", "HUMPDGFARA-NEG-61", "HUMCG1A1B-NEG-121", "HUMGPPSBB-NEG-1021", "HUMCFTRM-NEG-2281", "HUMPGKPX-NEG-961", "HUMAPB48A-NEG-2041", "HUMGHCSA-NEG-47641", "HUMFSH3-NEG-1021", "HUMADH2E2-NEG-1", "HUMPSG3A-NEG-481", "HUMNID-NEG-3541", "HUMMHSXA-NEG-4561", "HUMTCOBI-NEG-1261", "HUMMPRCI-NEG-5161", "HUMTPOA-NEG-841", "HUMP120PC-NEG-2101", "HUMA1ATP-NEG-9421", "HUMCSFM-NEG-181", "HUMRBS-NEG-2221", "HUMCOLA1C-NEG-1141", "HUMALIPOA-NEG-5341", "HUMG0S19B-NEG-2821", "HUMTFPB-NEG-4981", "HUMA2M-NEG-4441", "HUMTHROMR-NEG-721", "HUMGFRIL-NEG-1441", "HUMCYP4A2-NEG-781", "HUMROSMCF-NEG-121", "HUMHIAPPA-NEG-6421", "HUMEBUR04-NEG-241", "HUMACHRM2-NEG-1681", "HUMSODA-NEG-1801", "HUMTHB-NEG-3601", "HUMIL6-NEG-1021", "HUMPEPC1-NEG-901", "HUMIGLC2A-NEG-361", "HUMPTHL2-NEG-1", "HUMIGGFCRA-NEG-1", "HUMLCAR-NEG-2161", "HUMHTV3A-NEG-841", "HUMIL1BX-NEG-901", "HUMAPOB-NEG-1321", "HUMVWFR1-NEG-8041", "HUMIL1B-NEG-6721", "HUMLMYC2-NEG-901", "HUMADAG-NEG-36361", "HUMABLA-NEG-2701", "HUMIFNA04-NEG-961", "HUMIL5A-NEG-481", "HUMXCGD-NEG-1201", "HUMMHDRC2-NEG-841", "HUMTRA-NEG-781", "HUMCRYGBC-NEG-8341", "HUMDYSIN7-NEG-3541", "HUMINIFI-NEG-481", "HUMHLS2R-NEG-1201", "HUMNAKATP1-NEG-5281", "HUMIFNB2-NEG-361", "HUMFN-NEG-3241", "MACHBDPS2-NEG-1", "HUMADH2C1-NEG-841", "HUMIRBP-NEG-3481", "HUMIGKPC-NEG-61", "HUMTCGXH-NEG-601", "HUMTF-NEG-1261", "HUMG6PDA-NEG-601", "HUMCYPMP-NEG-181", "HUMG6PA-NEG-781", "HUMSLIPG-NEG-961", "HUMETN1-NEG-181", "ORAHBG2F-NEG-181", "HUMCS3-NEG-361", "HUMRGM-NEG-4441", "HUMGHV-NEG-1861", "HUMEGFRN-NEG-4381", "HUMGPPSB1-NEG-241"), ValueGroup("HUMIL2A-DONOR-1533", "HUMIL2B-DONOR-410", "HUMERP-DONOR-1653", "HUMERPA-DONOR-1662", "HUMHPRTB-DONOR-16757", "HUMALBGC-DONOR-13770", "HUMHPRTB-DONOR-40080", "HUMALBGC-DONOR-2592", "HUMAFP-DONOR-1834", "HUMGG-DONOR-3769", "HUMTFPB-DONOR-9438", "HUMHPRTB-DONOR-39832", "HUMFABP-DONOR-2493", "HUMALBGC-DONOR-6223", "HUMFIXG-DONOR-13441", "HUMALBGC-DONOR-15086", "HUMHPRTB-DONOR-31605", "HUMIL2-DONOR-745", "HUMIL2A-DONOR-1683", "HUMIL2B-DONOR-560", "HUMIL5-DONOR-908", "HUMIL5A-DONOR-907", "TARHBB-DONOR-1909", "HUMFIXG-DONOR-9638", "HUMALBGC-DONOR-4179", "HUMALBGC-DONOR-15728", "HUMAFP-DONOR-14687", "HUMFIXG-DONOR-9425", "HUMFABP-DONOR-1126", "HUMALBGC-DONOR-7827", "HUMIL2-DONOR-3182", "HUMIL2A-DONOR-4119", "HUMIL2B-DONOR-2996", "HUMHPRTB-DONOR-14857", "HUMIL5-DONOR-1982", "HUMIL5A-DONOR-1986", "HUMAFP-DONOR-14021", "TARHBD-DONOR-817", "HUMANT1-DONOR-4091", "HUMALBGC-DONOR-1825", "HUMALBGC-DONOR-12584", "HUMLACTA-DONOR-2236", "HUMAFP-DONOR-2929", "HUMHIAPPA-DONOR-629", "HUMPCNA-DONOR-5730", "HUMHPRTB-DONOR-34990", "HUMAFP-DONOR-17179", "HUMSODA-DONOR-3286", "HUMEF1A-DONOR-2242", "HUMCYCAA-DONOR-1365", "LEMHBB-DONOR-569", "HUMFOS-DONOR-2544", "HUMHSP90B-DONOR-4129", "HUMETN3-DONOR-227", "HUMAFP-DONOR-9741", "HUMEF1A-DONOR-2644", "HUMHSP90B-DONOR-5188", "HUMCKMT-DONOR-1625", "HUMEF1A-DONOR-2878", "HUMGRP78-DONOR-2258", "HUMGFP40H-DONOR-1143", "HUMMLCAC-NEG-601", "HUMSODA-DONOR-5211", "HUMTKRA-DONOR-4909", "HUMIFNINI-DONOR-2912", "HUMMHDOB-DONOR-4518", "HUMMYCC-DONOR-2852", "HUMLACTA-DONOR-865", "ORAHBD-DONOR-819", "HUMATPSYB-DONOR-2881", "HUMCSFGMA-DONOR-933", "HUMEF1A-DONOR-3546", "HUMHIAPPA-DONOR-1054", "HUMIL1B-DONOR-1558", "HUMREPHY2-NEG-481", "HUMFIXG-DONOR-3053", "HUMATPSYB-DONOR-5594", "HUMP45C17-DONOR-5125", "HUMRPS17A-DONOR-1717", "HUMIL5-DONOR-667", "HUMKEREP-DONOR-2872", "HUMCP21OH-DONOR-1104", "HUMCP21OHC-DONOR-1105", "HUMMHCP42-DONOR-2035", "HUMMHCP52-DONOR-569", "HUMGFP40H-DONOR-1028", "HUMMHDOB-DONOR-1105", "HUMGG-DONOR-1964", "HUMIL1B-DONOR-5824", "HUMEF1A-DONOR-1696", "HUMSODA-DONOR-6429", "HUMMYLCA-DONOR-2559", "HUMGRP78-DONOR-3086", "HUMHPARS1-DONOR-2990", "HUMAFP-DONOR-15308", "HUMTFPB-DONOR-2272", "MNKHBD-DONOR-698", "HUMCRYGBC-DONOR-18389", "HUMLACTA-DONOR-1671", "HUMFIXG-DONOR-33089", "HUMTNFA-DONOR-1604", "HUMTNFAB-DONOR-5083", "HUMTNFX-DONOR-1378", "GIBHBGGL-DONOR-2624", "GIBHBGGL-DONOR-7544", "HUMATPSYB-DONOR-8071", "HUMPRPH1-DONOR-1687", "LEMHBE-DONOR-633", "HUMHSP90B-DONOR-1172", "LEMHBGA-DONOR-648", "HUMMYLCA-DONOR-2388", "HUMPRPH2-DONOR-2660", "HUMMHDOB-DONOR-3639", "HUMMHDRHA-DONOR-3987", "ORAHBG2F-DONOR-615", "HUMAFP-DONOR-12231", "LEMHBG-DONOR-587", "MACHBCA2-DONOR-809", "ORAHBG1F-DONOR-614", "HUMPRPH1-DONOR-2774", "HUMSHBGA-DONOR-4189", "HUMSODA-DONOR-6932", "HUMSAA-DONOR-701", "HUMKAL2-DONOR-4031", "HUMHSP90B-DONOR-7352", "HUMEF1A-NEG-1561", "HUMAPA4R-NEG-2761", "HUMGCB1-DONOR-581", "HUMDES-DONOR-1824", "HUMHPRTB-DONOR-27927", "HUMSODA-DONOR-3510", "HUMHBBAG-DONOR-1689", "HUMG0S19A-DONOR-2927", "HUMSODA-DONOR-4739", "HUMFXIIIB-NEG-841", "HUMTPA-DONOR-31220", "HUMIL1B-DONOR-416", "HUMSHBGA-DONOR-5396", "HUMCRPG-NEG-781", "HUMPSAP-DONOR-170", "HUMTNFA-DONOR-1839", "HUMTNFAB-DONOR-5318", "HUMCKMT-DONOR-5110", "HUMPRPH1-DONOR-3555", "HUMA1GLY2-DONOR-3352", "HUMACTGA-DONOR-2060", "HUMKTEP2A-NEG-1441", "HUMTCAYD-NEG-1", "HUMSPROTR-NEG-541", "HUMADAG-DONOR-19262", "HUMALBGC-DONOR-17044", "HUMTPA-DONOR-23241", "HUMCFVII-DONOR-6378", "HUMGHN-DONOR-539", "HUMMH6-ACCEPTOR-3562", "HUMSAA-DONOR-34", "HUMAFP-DONOR-18452", "HUMADAG-NEG-9001", "HUMKEREP-DONOR-3368", "HUMCSPB-DONOR-2010", "HUMHPRTB-NEG-53761", "HUMCD1B1-NEG-241", "HUMFIXG-DONOR-20733", "HUMHSP90B-ACCEPTOR-7018", "HUMMYC3L-NEG-3301", "HUMRTVLH2-NEG-121", "HUMGHCSA-NEG-20281", "MNKHBD-DONOR-347", "HUMSODA-DONOR-4951", "HUMPUMP1-NEG-661", "HUMCRYGBC-DONOR-18732", "ORAHBD-DONOR-466", "HUMA1GLY2-DONOR-3606", "HUMHPARS1-DONOR-5547", "HUMATPCAPM-NEG-661", "HUMTHY1A-DONOR-853", "HUMIGHAJ-NEG-541", "HUMVIM06-NEG-1", "HUMSAA1A-DONOR-1815", "HUMMEDA-NEG-2761", "HUMMRP8A-ACCEPTOR-1988", "HUMGFP40H-ACCEPTOR-1110", "HUMCKMT-ACCEPTOR-5313", " * ")), IntervalBounds(14.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 34.5, 35.5), ValueGroups(ValueGroup("G", "N", "D", "S", " * "), ValueGroup("C"), ValueGroup("T"), ValueGroup("A")), Frequencies(909, 418, 1596, 1353, 1091, 683, 1006, 634, 419, 202, 1234, 1092, 905, 511, 1100, 492, 113, 20, 125, 103, 56, 57, 72, 47, 3, 1, 57, 99, 66, 51, 93, 38, 0, 1, 56, 67, 84, 52, 84, 36, 321, 208, 305, 105, 77, 32, 73, 121, 142, 102, 361, 111, 62, 70, 69, 176, 88, 40, 0, 98, 56, 58, 63, 1, 180, 89, 256, 171, 154, 84, 157, 88, 71, 32, 319, 80, 74, 38, 90, 154, 2398, 1340, 3482, 2481, 1730, 1132, 2068, 789, 1844, 603, 1474, 1060, 1519, 685, 793, 509, 1803, 689, 1310, 894, 1358, 700, 588, 499, 110, 41, 114, 61, 149, 36, 49, 44, 306, 107, 158, 83, 100, 36, 62, 47, 0, 1, 51, 80, 119, 63, 51, 22, 0, 0, 10, 56, 139, 40, 69, 9, 45, 33, 1, 60, 133, 25, 58, 3, 66, 49, 5, 77, 107, 50, 59, 0, 195, 100, 53, 161, 205, 106, 126, 4, 98, 63, 23, 87, 108, 47, 62, 6, 2499, 1116, 2653, 1676, 2902, 1295, 1427, 896, 1010, 1178, 979, 559, 1044, 1279, 929, 660, 1394, 1288, 770, 527, 897, 1113, 673, 617, 40, 79, 60, 56, 66, 94, 50, 34, 8, 84, 48, 40, 83, 97, 43, 32, 0, 0, 49, 61, 67, 101, 58, 16, 0, 0, 22, 25, 70, 125, 44, 32, 31, 20, 0, 46, 77, 119, 50, 0, 108, 61, 357, 36, 93, 85, 70, 177, 132, 130, 20, 95, 150, 186, 128, 18, 87, 48, 14, 44, 92, 104, 62, 6, 1487, 1197, 1634, 928, 1946, 2326, 1680, 1689, 731, 727, 1033, 640, 868, 1133, 1430, 759, 236, 329, 1042, 583, 716, 916, 1203, 588, 58, 69, 64, 38, 52, 83, 126, 58, 4, 17, 100, 36, 74, 86, 99, 66, 321, 207, 207, 50, 53, 54, 104, 109, 0, 1, 26, 72, 37, 73, 111, 21, 103, 54, 1, 41, 51, 56, 120, 4, 59, 59, 1, 47, 67, 77, 105, 5, 135, 99, 397, 89, 137, 164, 183, 256, 65, 66, 7, 47, 49, 81, 83, 17, 1641, 1572, 1306, 1365, 1497, 1997, 2250, 1201), 0, Frequencies(2224, 133440, 7), Frequencies(2224, 133440, 7)) ; // DataGrid(SampleId, Pos, Char) Unused Structure(VectorC) SampleIdLabels = VectorC("{HUMVPNP-ACCEPTOR-1688, HUMMHCW2B-ACCEPTOR-2660, HUMMHANTLE-ACCEPTOR-3110, ...}", "{HUMTFPB-ACCEPTOR-6362, HUMHPRTB-ACCEPTOR-27861, HUMIFNINI-ACCEPTOR-3147, ...}", "{HUMMHCD8A-DONOR-1781, HUMMYC3L-DONOR-402, HUMCFVII-DONOR-4425, ...}", "{HUMALPI1-DONOR-42, HUMGLTH1-NEG-241, HUMFGFB-NEG-181, ...}", "{HUMBCL2A-NEG-1, HUMPYGM09-NEG-1, HUMRGNTSA-NEG-2161, ...}", "{HUMIGJHI-NEG-61, HUMERYA-NEG-661, HUMHTV1A-NEG-121, ...}", "{HUMGHCSA-NEG-11161, HUMFMSCPO-NEG-61, HUMADAG-NEG-31801, ...}", "{HUMIL2A-DONOR-1533, HUMIL2B-DONOR-410, HUMERP-DONOR-1653, ...}") ; // Cluster labels for variable SampleId -Unused Structure(Vector) PosSet = TableVector(CC_main_table, Pos) ; // Value distribution for variable Pos -Unused Structure(VectorC) CharSet = TableVectorC(CC_main_table, Char) ; // Value distribution for variable Char +Unused Structure(Vector) PosSet = TableVector(CC_original_main_table, Pos) ; // Value distribution for variable Pos +Unused Structure(VectorC) CharSet = TableVectorC(CC_original_main_table, Char) ; // Value distribution for variable Char Unused Structure(DataGridDeployment) DeployedCoclusteringAtSampleId = DataGridDeployment(Coclustering, 1, PosSet, CharSet) ; // Deployed coclustering for variable SampleId Unused Numerical SampleIdIndex = PredictedPartIndex(DeployedCoclusteringAtSampleId) ; // Predicted cluster index for variable SampleId Categorical SampleIdPredictedLabel = ValueAtC(SampleIdLabels, SampleIdIndex) ; // Predicted label for variable SampleId }; -Dictionary CC_main_table (SampleId) +Dictionary CC_original_main_table (SampleId) { Categorical SampleId ; Numerical Pos ; diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdicj b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdicj index ac832bf5..c030c91e 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdicj +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/Coclustering.kdicj @@ -3,7 +3,7 @@ "version": "10.0", "dictionaries": [ { - "name": "CC_Keys_main_table", + "name": "CC_main_table", "key": ["SampleId"], "variables": [ { @@ -11,9 +11,9 @@ "type": "Categorical" }, { - "name": "CC_main_table", + "name": "CC_original_main_table", "type": "Table", - "objectType": "CC_main_table" + "objectType": "CC_original_main_table" }, { "name": "Coclustering", @@ -37,7 +37,7 @@ "used": false, "type": "Structure", "structureType": "Vector", - "rule": "TableVector(CC_main_table, Pos)" + "rule": "TableVector(CC_original_main_table, Pos)" }, { "name": "CharSet", @@ -45,7 +45,7 @@ "used": false, "type": "Structure", "structureType": "VectorC", - "rule": "TableVectorC(CC_main_table, Char)" + "rule": "TableVectorC(CC_original_main_table, Char)" }, { "name": "DeployedCoclusteringAtSampleId", @@ -71,7 +71,7 @@ ] }, { - "name": "CC_main_table", + "name": "CC_original_main_table", "key": ["SampleId"], "variables": [ { diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/tmp_cc_deploy_model.kdic b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/tmp_cc_deploy_model.kdic index 12ddb4a1..4f1dde70 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/tmp_cc_deploy_model.kdic +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsCoclustering/tmp_cc_deploy_model.kdic @@ -1,12 +1,12 @@ #Khiops 10.0 -Dictionary CC_Keys_main_table (SampleId) +Dictionary CC_main_table (SampleId) { Categorical SampleId ; - Table(CC_main_table) CC_main_table ; + Table(CC_original_main_table) CC_original_main_table ; }; -Dictionary CC_main_table (SampleId) +Dictionary CC_original_main_table (SampleId) { Categorical SampleId ; Numerical Pos ; diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdic b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdic index eb9e2eee..1b3904b6 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdic +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdic @@ -1,7 +1,7 @@ #Khiops 10.0.3 // Recoding dictionary -Root Dictionary R_SpliceJunction (SampleId) +Root Dictionary R_main_table (SampleId) { Unused Categorical SampleId ; Categorical Class ; diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdicj b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdicj index 499a9611..dd7dafab 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdicj +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsEncoder/Modeling.kdicj @@ -3,7 +3,7 @@ "version": "10.0", "dictionaries": [ { - "name": "R_SpliceJunction", + "name": "R_main_table", "label": "Recoding dictionary", "root": true, "key": ["SampleId"], diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdic b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdic index 5331e7fa..ca9cec66 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdic +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdic @@ -1,7 +1,7 @@ #Khiops 10.0.3 -Root Dictionary SNB_SpliceJunction (SampleId) - +Root Dictionary SNB_main_table (SampleId) + { Categorical SampleId ; Unused Numerical Class ; diff --git a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdicj b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdicj index 53afbf93..45897115 100644 --- a/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdicj +++ b/tests/resources/sklearn/results/ref_models/SpliceJunction/KhiopsRegressor/Modeling.kdicj @@ -3,11 +3,11 @@ "version": "10.0", "dictionaries": [ { - "name": "SNB_SpliceJunction", + "name": "SNB_main_table", "root": true, "key": ["SampleId"], "metaData": { - "InitialDictionary": "SpliceJunction", + "InitialDictionary": "main_table", "PredictorLabel": "Selective Naive Bayes", "PredictorType": "Regressor" }, diff --git a/tests/test_dataset_class.py b/tests/test_dataset_class.py index b6d32d69..a63167cf 100644 --- a/tests/test_dataset_class.py +++ b/tests/test_dataset_class.py @@ -7,6 +7,7 @@ import os import shutil import unittest +import warnings import numpy as np import pandas as pd @@ -15,7 +16,7 @@ from pandas.testing import assert_frame_equal from sklearn import datasets -from khiops.sklearn.dataset import Dataset +from khiops.sklearn.dataset import Dataset, _upgrade_mapping_spec class DatasetInputOutputConsistencyTests(unittest.TestCase): @@ -235,34 +236,12 @@ def create_multitable_snowflake_dataframes(self): quaternary_table, ) - def create_multitable_snowflake_data_files( - self, - main_table_path, - secondary_table_path_1, - secondary_table_path_2, - tertiary_table_path, - quaternary_table_path, - ): - ( - main_table, - secondary_table_1, - secondary_table_2, - tertiary_table, - quaternary_table, - ) = self.create_multitable_snowflake_dataframes() - main_table.to_csv(main_table_path, sep="\t", index=False) - secondary_table_1.to_csv(secondary_table_path_1, sep="\t", index=False) - secondary_table_2.to_csv(secondary_table_path_2, sep="\t", index=False) - tertiary_table.to_csv(tertiary_table_path, sep="\t", index=False) - quaternary_table.to_csv(quaternary_table_path, sep="\t", index=False) - def create_fixture_ds_spec(self, multitable, schema): if not multitable: ref_table = self.create_monotable_dataframe() features = ref_table.drop(["class"], axis=1) ds_spec = { - "main_table": "Reviews", - "tables": {"Reviews": (features, "User_ID")}, + "main_table": (features, ["User_ID"]), } label = ref_table["class"] elif schema == "star": @@ -272,10 +251,9 @@ def create_fixture_ds_spec(self, multitable, schema): ) = self.create_multitable_star_dataframes() features_ref_main_table = ref_main_table.drop("class", axis=1) ds_spec = { - "main_table": "id_class", - "tables": { - "id_class": (features_ref_main_table, "User_ID"), - "logs": (ref_secondary_table, "User_ID"), + "main_table": (features_ref_main_table, ["User_ID"]), + "additional_data_tables": { + "logs": (ref_secondary_table, ["User_ID"]), }, } label = ref_main_table["class"] @@ -291,26 +269,16 @@ def create_fixture_ds_spec(self, multitable, schema): features_ref_main_table = ref_main_table.drop("class", axis=1) ds_spec = { - "main_table": "A", - "tables": { - "D": ( - ref_tertiary_table, - ["User_ID", "VAR_1", "VAR_2"], - ), - "B": (ref_secondary_table_1, ["User_ID", "VAR_1"]), - "E": ( + "main_table": (features_ref_main_table, ["User_ID"]), + "additional_data_tables": { + "B": (ref_secondary_table_1, ["User_ID", "VAR_1"], False), + "B/D": (ref_tertiary_table, ["User_ID", "VAR_1", "VAR_2"], False), + "B/D/E": ( ref_quaternary_table, ["User_ID", "VAR_1", "VAR_2", "VAR_3"], ), - "C": (ref_secondary_table_2, ["User_ID"]), - "A": (features_ref_main_table, "User_ID"), + "C": (ref_secondary_table_2, ["User_ID"], True), }, - "relations": [ - ("B", "D", False), - ("A", "C", True), - ("D", "E"), - ("A", "B", False), - ], } label = ref_main_table["class"] @@ -319,7 +287,7 @@ def create_fixture_ds_spec(self, multitable, schema): def get_ref_var_types(self, multitable, schema=None): ref_var_types = {} if not multitable: - ref_var_types["Reviews"] = { + ref_var_types["main_table"] = { "User_ID": "Categorical", "Age": "Numerical", "Clothing ID": "Numerical", @@ -331,7 +299,7 @@ def get_ref_var_types(self, multitable, schema=None): "class": "Categorical", } elif schema == "star": - ref_var_types["id_class"] = { + ref_var_types["main_table"] = { "User_ID": "Categorical", "class": "Categorical", "logs": "Table", @@ -347,7 +315,7 @@ def get_ref_var_types(self, multitable, schema=None): assert ( schema == "snowflake" ), f"'schema' should be 'snowflake' not '{schema}'" - ref_var_types["A"] = { + ref_var_types["main_table"] = { "User_ID": "Categorical", "class": "Categorical", "B": "Table", @@ -385,6 +353,102 @@ def get_ref_var_types(self, multitable, schema=None): return ref_var_types + def test_dataset_of_deprecated_mt_mapping(self): + """Test deprecated multi-table specification handling""" + ( + ref_main_table, + ref_secondary_table_1, + ref_secondary_table_2, + ref_tertiary_table, + ref_quaternary_table, + ) = self.create_multitable_snowflake_dataframes() + + features_ref_main_table = ref_main_table.drop("class", axis=1) + expected_ds_spec = { + "main_table": (features_ref_main_table, ["User_ID"]), + "additional_data_tables": { + "B": (ref_secondary_table_1, ["User_ID", "VAR_1"], False), + "B/D": (ref_tertiary_table, ["User_ID", "VAR_1", "VAR_2"], False), + "B/D/E": ( + ref_quaternary_table, + ["User_ID", "VAR_1", "VAR_2", "VAR_3"], + ), + "C": (ref_secondary_table_2, ["User_ID"], True), + }, + } + deprecated_ds_spec = { + "main_table": "A", + "tables": { + "A": (features_ref_main_table, "User_ID"), + "B": (ref_secondary_table_1, ["User_ID", "VAR_1"]), + "C": (ref_secondary_table_2, "User_ID"), + "D": (ref_tertiary_table, ["User_ID", "VAR_1", "VAR_2"]), + "E": ( + ref_quaternary_table, + ["User_ID", "VAR_1", "VAR_2", "VAR_3"], + ), + }, + "relations": { + ("A", "B", False), + ("B", "D", False), + ("D", "E"), + ("A", "C", True), + }, + } + + label = ref_main_table["class"] + + # Test that deprecation warning is issued when creating a dataset + # according to the deprecated spec + with warnings.catch_warnings(record=True) as warning_list: + _ = Dataset(deprecated_ds_spec, label) + self.assertTrue(len(warning_list) > 0) + deprecation_warning_found = False + for warning in warning_list: + warning_message = warning.message + if ( + issubclass(warning.category, UserWarning) + and len(warning_message.args) == 1 + and "multi-table dataset specification format" + in warning_message.args[0] + and "deprecated" in warning_message.args[0] + ): + deprecation_warning_found = True + break + self.assertTrue(deprecation_warning_found) + + # Test that a deprecated dataset spec is upgraded to the new format + ds_spec = _upgrade_mapping_spec(deprecated_ds_spec) + self.assertEqual(ds_spec.keys(), expected_ds_spec.keys()) + main_table = ds_spec["main_table"] + expected_main_table = expected_ds_spec["main_table"] + + # Test that main table keys are identical + self.assertEqual(main_table[1], expected_main_table[1]) + + # Test that main table data frame are equal + assert_frame_equal(main_table[0], expected_main_table[0]) + + # Test that additional data tables keys are identical + additional_data_tables = ds_spec["additional_data_tables"] + expected_additional_data_tables = expected_ds_spec["additional_data_tables"] + self.assertEqual( + additional_data_tables.keys(), expected_additional_data_tables.keys() + ) + + for table_path, expected_table_data in expected_additional_data_tables.items(): + table_data = additional_data_tables[table_path] + + # Test that secondary table keys are identical + self.assertEqual(table_data[1], expected_table_data[1]) + + # Test that the secondary table data frames are identical + assert_frame_equal(table_data[0], expected_table_data[0]) + + # Test that the secondary table entity statuses are identical if True + if len(expected_table_data) > 2 and expected_table_data[2] is True: + self.assertEqual(table_data[2], expected_table_data[2]) + def test_dataset_is_correctly_built(self): """Test that the dataset structure is consistent with the input spec""" ds_spec, label = self.create_fixture_ds_spec( @@ -392,21 +456,25 @@ def test_dataset_is_correctly_built(self): ) dataset = Dataset(ds_spec, label) - self.assertEqual(dataset.main_table.name, "A") - self.assertEqual(len(dataset.secondary_tables), 4) + self.assertEqual(dataset.main_table.name, "main_table") + self.assertEqual(len(dataset.additional_data_tables), 4) dataset_secondary_table_names = { - secondary_table.name for secondary_table in dataset.secondary_tables + secondary_table.name + for _, secondary_table, _ in dataset.additional_data_tables } self.assertEqual(dataset_secondary_table_names, {"B", "C", "D", "E"}) - self.assertEqual(len(dataset.relations), 4) - spec_relations = ds_spec["relations"] - for relation, spec_relation in zip(dataset.relations, spec_relations): - self.assertEqual(relation[:2], spec_relation[:2]) - if len(spec_relation) == 3: - self.assertEqual(relation[2], spec_relation[2]) + table_specs = ds_spec["additional_data_tables"].items() + for (ds_table_path, _, ds_is_one_to_one), ( + table_path, + table_spec, + ) in zip(dataset.additional_data_tables, table_specs): + # The relation holds the table name, not the table path + self.assertEqual(ds_table_path, table_path) + if len(table_spec) == 3: + self.assertEqual(ds_is_one_to_one, table_spec[2]) else: - self.assertFalse(relation[2]) + self.assertFalse(ds_is_one_to_one) def test_out_file_from_dataframe_monotable(self): """Test consistency of the created data file with the input dataframe @@ -424,7 +492,7 @@ def test_out_file_from_dataframe_monotable(self): # Cast "Date" columns to datetime as we don't automatically recognize dates out_table["Date"] = out_table["Date"].astype("datetime64[ns]") - ref_table = spec["tables"]["Reviews"][0] + ref_table = spec["main_table"][0] ref_table["class"] = y # Check that the dataframes are equal @@ -437,7 +505,7 @@ def test_out_file_from_numpy_array_monotable(self): """Test consistency of the created data file with the input numpy array""" # Create a monotable dataset from a numpy array iris = datasets.load_iris() - spec = {"tables": {"iris": (iris.data, None)}} + spec = {"main_table": (iris.data, None)} dataset = Dataset(spec, y=iris.target, categorical_target=True) # Create and load the intermediary Khiops file @@ -534,7 +602,7 @@ def test_out_file_from_sparse_matrix_monotable_specification(self): ) = self._create_test_sparse_matrix_with_target() # Create monotable dataset from input mapping with the sparse matrix - spec = {"tables": {"example_sparse_matrix": (input_sparse_matrix, None)}} + spec = {"main_table": (input_sparse_matrix, None)} dataset = Dataset(spec, y=input_target, categorical_target=True) # Create and load the intermediary Khiops file @@ -580,9 +648,9 @@ def test_out_files_from_dataframes_multitable_star(self): out_main_table = pd.read_csv(main_table_path, sep="\t") out_secondary_table = pd.read_csv(secondary_table_path, sep="\t") - ref_main_table = ds_spec["tables"]["id_class"][0] + ref_main_table = ds_spec["main_table"][0] ref_main_table["class"] = label - ref_secondary_table = ds_spec["tables"]["logs"][0] + ref_secondary_table = ds_spec["additional_data_tables"]["logs"][0] # Clean created test data assert_frame_equal( @@ -618,7 +686,7 @@ def test_out_files_from_dataframes_multitable_snowflake(self): ) = dataset.create_table_files_for_khiops(self.output_dir) out_main_table = pd.read_csv(main_table_path, sep="\t") - ref_main_table = ds_spec["tables"]["A"][0] + ref_main_table = ds_spec["main_table"][0] ref_main_table["class"] = label # assertions @@ -629,11 +697,11 @@ def test_out_files_from_dataframes_multitable_snowflake(self): out_main_table, ) - additional_table_names = list(additional_table_paths.keys()) - for name in additional_table_names: - additional_table_path = additional_table_paths[name] + additional_table_data_paths = list(additional_table_paths.keys()) + for table_path in additional_table_data_paths: + additional_table_path = additional_table_paths[table_path] out_additional_table = pd.read_csv(additional_table_path, sep="\t") - ref_additional_table = ds_spec["tables"][name][0] + ref_additional_table = ds_spec["additional_data_tables"][table_path][0] assert_frame_equal( ref_additional_table.sort_values( by=ref_additional_table.columns.tolist(), ascending=True @@ -674,11 +742,15 @@ def _test_domain_coherence(self, ds, ref_var_types): out_domain = ds.create_khiops_dictionary_domain() # Check that the domain has the same number of tables as the dataset - self.assertEqual(len(out_domain.dictionaries), 1 + len(ds.secondary_tables)) + self.assertEqual( + len(out_domain.dictionaries), 1 + len(ds.additional_data_tables) + ) # Check that the domain has the same table names as the reference ref_table_names = { - table.name for table in [ds.main_table] + ds.secondary_tables + table.name + for table in [ds.main_table] + + [table for _, table, _ in ds.additional_data_tables] } out_table_names = {dictionary.name for dictionary in out_domain.dictionaries} self.assertEqual(ref_table_names, out_table_names) @@ -691,7 +763,9 @@ def _test_domain_coherence(self, ds, ref_var_types): # Check that: # - the table keys are the same as the dataset # - the domain has the same variable names as the reference - for table in [ds.main_table] + ds.secondary_tables: + for table in [ds.main_table] + [ + table for _, table, _ in ds.additional_data_tables + ]: with self.subTest(table=table.name): self.assertEqual(table.key, out_domain.get_dictionary(table.name).key) out_dictionary_var_types = { diff --git a/tests/test_dataset_errors.py b/tests/test_dataset_errors.py index 7749bbbe..41b39efa 100644 --- a/tests/test_dataset_errors.py +++ b/tests/test_dataset_errors.py @@ -65,8 +65,7 @@ def create_fixture_dataset_spec(self, multitable=True, schema="snowflake"): reference_table = self.create_monotable_dataframe() features = reference_table.drop(["class"], axis=1) dataset_spec = { - "main_table": "Reviews", - "tables": {"Reviews": (features, "User_ID")}, + "main_table": (features, ["User_ID"]), } label = reference_table["class"] @@ -77,10 +76,9 @@ def create_fixture_dataset_spec(self, multitable=True, schema="snowflake"): ) = self.create_multitable_star_dataframes() features_reference_main_table = reference_main_table.drop("class", axis=1) dataset_spec = { - "main_table": "id_class", - "tables": { - "id_class": (features_reference_main_table, "User_ID"), - "logs": (reference_secondary_table, "User_ID"), + "main_table": (features_reference_main_table, ["User_ID"]), + "additional_data_tables": { + "logs": (reference_secondary_table, ["User_ID"]), }, } label = reference_main_table["class"] @@ -96,26 +94,21 @@ def create_fixture_dataset_spec(self, multitable=True, schema="snowflake"): features_reference_main_table = reference_main_table.drop("class", axis=1) dataset_spec = { - "main_table": "A", - "tables": { - "D": ( + "main_table": (features_reference_main_table, ["User_ID"]), + "additional_data_tables": { + "B/D": ( reference_tertiary_table, ["User_ID", "VAR_1", "VAR_2"], + False, ), "B": (reference_secondary_table_1, ["User_ID", "VAR_1"]), - "E": ( + "B/D/E": ( reference_quaternary_table, ["User_ID", "VAR_1", "VAR_2", "VAR_3"], + False, ), - "C": (reference_secondary_table_2, "User_ID"), - "A": (features_reference_main_table, "User_ID"), + "C": (reference_secondary_table_2, ["User_ID"], True), }, - "relations": [ - ("B", "D", False), - ("A", "C", True), - ("D", "E", False), - ("A", "B"), - ], } label = reference_main_table["class"] @@ -309,27 +302,6 @@ def create_multitable_snowflake_dataframes(self): quaternary_table, ) - def create_multitable_snowflake_data_files( - self, - main_table_path, - secondary_table_path_1, - secondary_table_path_2, - tertiary_table_path, - quaternary_table_path, - ): - ( - main_table, - secondary_table_1, - secondary_table_2, - tertiary_table, - quaternary_table, - ) = self.create_multitable_snowflake_dataframes() - main_table.to_csv(main_table_path, sep="\t", index=False) - secondary_table_1.to_csv(secondary_table_path_1, sep="\t", index=False) - secondary_table_2.to_csv(secondary_table_path_2, sep="\t", index=False) - tertiary_table.to_csv(tertiary_table_path, sep="\t", index=False) - quaternary_table.to_csv(quaternary_table_path, sep="\t", index=False) - def assert_dataset_fails( self, dataset_spec, y, expected_exception_type, expected_msg ): @@ -365,7 +337,7 @@ def test_y_type_must_be_str_or_array_like_1d(self): def test_df_dataset_fails_if_target_column_is_already_in_the_features(self): """Test in-memory table failing when the target is already in the features""" spec, _ = self.create_fixture_dataset_spec(multitable=False, schema=None) - features_table = spec["tables"]["Reviews"][0] + features_table = spec["main_table"][0] bad_y = features_table["Recommended IND"] with self.assertRaises(ValueError) as context: Dataset(spec, bad_y) @@ -379,57 +351,48 @@ def test_df_dataset_fails_if_target_column_is_already_in_the_features(self): # Tests for dictionary dataset spec # ##################################### - def test_dict_spec_key_tables_must_be_present(self): + def test_dict_spec_key_main_table_must_be_present(self): """Test Dataset raising ValueError if the 'tables' key is missing""" bad_spec, y = self.create_fixture_dataset_spec() - del bad_spec["tables"] - expected_msg = "'tables' entry missing from dataset dict spec" - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_key_tables_must_be_mapping(self): - """Test Dataset raising TypeError if the 'tables' key is not a mapping""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["tables"] = AnotherType() - expected_msg = type_error_message("'tables' entry", bad_spec["tables"], Mapping) - self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - - def test_dict_spec_table_list_cannot_be_empty(self): - """Test Dataset raising ValueError if the 'tables' key is empty""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["tables"] = {} - expected_msg = "'tables' dictionary cannot be empty" + del bad_spec["main_table"] + expected_msg = "'main_table' entry missing from dataset dict spec" self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - def test_dict_spec_table_input_type_must_be_a_tuple(self): - """Test Dataset raising TypeError when a relation tuple is a list""" + def test_dict_spec_main_table_input_type_must_be_a_tuple(self): + """Test Dataset raising TypeError when the main table spec is a list""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["tables"]["D"] = list(bad_spec["tables"]["D"]) + bad_spec["main_table"] = list(bad_spec["main_table"]) expected_msg = type_error_message( - "'D' table entry", bad_spec["tables"]["D"], tuple + "'main_table' entry", bad_spec["main_table"], tuple ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) def test_dict_spec_source_table_type_must_be_adequate(self): """Test Dataset raising TypeError when a table entry is not str nor DataFrame""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["tables"]["D"] = (AnotherType(), bad_spec["tables"]["D"][-1]) + bad_spec["additional_data_tables"]["B/D"] = ( + AnotherType(), + bad_spec["additional_data_tables"]["B/D"][-1], + ) expected_msg = type_error_message( - "'D' table's source", - bad_spec["tables"]["D"][0], + "Source of table at data path 'B/D'", + bad_spec["additional_data_tables"]["B/D"][0], "array-like", "scipy.sparse.spmatrix", str, ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - def test_dict_spec_table_key_must_be_str_or_sequence(self): - """Test Dataset raising TypeError when a table's key is not str or Sequence""" + def test_dict_spec_table_key_must_sequence(self): + """Test Dataset raising TypeError when a table's key is not a Sequence""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["tables"]["D"] = (bad_spec["tables"]["D"][0], AnotherType()) + bad_spec["additional_data_tables"]["B/D"] = ( + bad_spec["additional_data_tables"]["B/D"][0], + AnotherType(), + ) expected_msg = type_error_message( - "'D' table's key", - bad_spec["tables"]["D"][1], - str, + "'B/D' table's key", + bad_spec["additional_data_tables"]["B/D"][1], Sequence, ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) @@ -437,11 +400,11 @@ def test_dict_spec_table_key_must_be_str_or_sequence(self): def test_dict_spec_table_key_column_type_must_be_str(self): """Test Dataset raising TypeError when a table key contains a non-string""" bad_spec, y = self.create_fixture_dataset_spec() - dataframe, _ = bad_spec["tables"]["D"] + dataframe, _, _ = bad_spec["additional_data_tables"]["B/D"] bad_key = ["User_ID", AnotherType(), "VAR_2"] - bad_spec["tables"]["D"] = (dataframe, bad_key) + bad_spec["additional_data_tables"]["B/D"] = (dataframe, bad_key) expected_msg = type_error_message( - "'D' table's key column name", bad_key[1], str + "'B/D' table's key column name", bad_key[1], str ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) @@ -449,35 +412,25 @@ def test_dict_spec_main_table_must_be_specified_for_multitable_datasets(self): """Test Dataset raising ValueError if 'main_table' is not a key in an MT spec""" bad_spec, y = self.create_fixture_dataset_spec() del bad_spec["main_table"] - expected_msg = "'main_table' entry must be specified for multi-table datasets" + expected_msg = "'main_table' entry missing from dataset dict spec" self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) def test_dict_spec_main_table_must_be_str(self): - """Test Dataset raising ValueError when 'main_table' is not a str""" + """Test Dataset raising ValueError when 'main_table' is not a tuple""" bad_spec, y = self.create_fixture_dataset_spec() bad_spec["main_table"] = 1 expected_msg = type_error_message( - "'main_table' entry", bad_spec["main_table"], str + "'main_table' entry", bad_spec["main_table"], tuple ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - def test_dict_spec_main_table_not_declared_in_tables(self): - """Test Dataset raising ValueError if the main table is not in the table list""" - bad_spec, y = self.create_fixture_dataset_spec() - del bad_spec["tables"][bad_spec["main_table"]] - expected_msg = ( - "A table entry with the main table's name ('A') " - "must be present in the 'tables' dictionary" - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - def test_dict_spec_main_table_key_must_be_specified(self): """Test Dataset raise ValueError if an MT spec doesn't have a main table key""" bad_spec, y = self.create_fixture_dataset_spec() - dataframe, _ = bad_spec["tables"][bad_spec["main_table"]] - bad_spec["tables"][bad_spec["main_table"]] = (dataframe, None) + dataframe, _ = bad_spec["main_table"] + bad_spec["main_table"] = (dataframe, None) expected_msg = ( - "key of main table 'A' is 'None': " + "The key of the main table is 'None': " "table keys must be specified in multi-table datasets" ) self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) @@ -485,61 +438,25 @@ def test_dict_spec_main_table_key_must_be_specified(self): def test_dict_spec_table_key_must_be_non_empty_for_multitable_datasets(self): """Test Dataset raising ValueError if an MT spec have an empty table key""" bad_spec, y = self.create_fixture_dataset_spec() - dataframe, _ = bad_spec["tables"][bad_spec["main_table"]] - bad_spec["tables"][bad_spec["main_table"]] = (dataframe, []) - expected_msg = f"'{bad_spec['main_table']}' table's key is empty" + dataframe, _ = bad_spec["main_table"] + bad_spec["main_table"] = (dataframe, []) + expected_msg = ( + "The key of the main table is empty: " + "table keys must be specified in multi-table datasets" + ) self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) def test_dict_spec_secondary_table_key_must_be_specified(self): """Test Dataset raise ValueError if an MT spec doesn't have a sec. table key""" bad_spec, y = self.create_fixture_dataset_spec() - dataframe, _ = bad_spec["tables"]["D"] - bad_spec["tables"]["D"] = (dataframe, None) + dataframe, _, _ = bad_spec["additional_data_tables"]["B/D"] + bad_spec["additional_data_tables"]["B/D"] = (dataframe, None) expected_msg = ( - "key of secondary table 'D' is 'None': " + "Key of secondary table at path 'B/D' is 'None': " "table keys must be specified in multi-table datasets" ) self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - def test_dict_spec_format_must_be_tuple(self): - """Test Dataset raising a TypeError if the format field is not a tuple""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["format"] = AnotherType() - expected_msg = type_error_message("'format' entry", bad_spec["format"], tuple) - self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - - def test_dict_spec_format_must_have_size_2(self): - """Test Dataset raising a ValueError if its 'format' entry is not of size 2""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["format"] = (",", True, AnotherType(), AnotherType(), AnotherType()) - expected_msg = "'format' entry must be a tuple of size 2, not 5" - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_format_tuple_1st_element_must_be_str(self): - """Test Dataset raising a TypeError if any of the format fields are not str""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["format"] = (AnotherType(), True) - expected_msg = type_error_message( - "'format' tuple's 1st element (separator)", bad_spec["format"][0], str - ) - self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - - def test_dict_spec_format_tuple_2nd_element_must_be_bool(self): - """Test Dataset raising a TypeError if any of the format fields are not bool""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["format"] = (",", AnotherType()) - expected_msg = type_error_message( - "'format' tuple's 2nd element (header)", bad_spec["format"][1], bool - ) - self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - - def test_dict_spec_format_tuple_1st_element_must_be_a_single_character(self): - """Test Dataset raising a ValueError if the format sep. is not a single char""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["format"] = (";;", True) - expected_msg = "'format' separator must be a single char, got ';;'" - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - def test_dict_spec_y_type_must_be_series_or_df_when_x_is_df_spec(self): """Test Dataset raising TypeError if X a is ds-spec and y isn't array-like""" spec, _ = self.create_fixture_dataset_spec(multitable=False, schema=None) @@ -550,20 +467,7 @@ def test_dict_spec_y_type_must_be_series_or_df_when_x_is_df_spec(self): ) self.assert_dataset_fails(spec, bad_y, TypeError, expected_msg) - def test_dict_spec_table_name_must_be_str(self): - """Test Dataset raising TypeError when a table name is not a str""" - spec, _ = self.create_fixture_dataset_spec(multitable=False, schema=None) - features_table = spec["tables"]["Reviews"][0] - with self.assertRaises(TypeError) as context: - PandasTable( - AnotherType(), - features_table, - ) - output_error_msg = str(context.exception) - expected_msg = type_error_message("name", AnotherType(), str) - self.assertEqual(output_error_msg, expected_msg) - - def test_dict_spec_table_nameis_empty_string(self): + def test_pandas_table_name_must_not_be_the_empty_string(self): """Test Dataset raising ValueError when a table name is empty""" spec, _ = self.create_fixture_dataset_spec(multitable=False, schema=None) with self.assertRaises(ValueError) as context: @@ -575,13 +479,13 @@ def test_dict_spec_table_nameis_empty_string(self): def test_dict_spec_key_type_must_be_str_or_list_like(self): """Test Dataset raising TypeError when a key is not of the proper type""" bad_key = AnotherType() - expected_error_msg = type_error_message("key", bad_key, str, int, "list-like") + expected_error_msg = type_error_message("key", bad_key, "list-like") dataset_spec, _ = self.create_fixture_dataset_spec( multitable=False, schema=None ) - features_table = dataset_spec["tables"]["Reviews"][0] + features_table = dataset_spec["main_table"][0] with self.assertRaises(TypeError) as context: - PandasTable("reviews", features_table, key=bad_key) + PandasTable(name="reviews", dataframe=features_table, key=bad_key) output_error_msg = str(context.exception) self.assertEqual(output_error_msg, expected_error_msg) @@ -595,131 +499,72 @@ def test_dict_spec_key_column_type_must_be_str_or_int(self): dataset_spec, _ = self.create_fixture_dataset_spec( multitable=False, schema=None ) - features_table = dataset_spec["tables"]["Reviews"][0] + features_table = dataset_spec["main_table"][0] with self.assertRaises(TypeError) as context: - PandasTable("reviews", features_table, key=bad_key) + PandasTable(name="reviews", dataframe=features_table, key=bad_key) output_error_msg = str(context.exception) self.assertEqual(expected_error_msg, output_error_msg) - def test_dict_spec_relations_must_be_list_like(self): - """Test Dataset raising TypeError when dict spec "relations" is a dict-like""" + def test_dict_spec_additional_data_tables_must_be_dict(self): + """Test Dataset raising TypeError when additional_data_tables is not dict""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"] = AnotherType() + bad_spec["additional_data_tables"] = AnotherType() expected_msg = type_error_message( - "'relations' entry", - bad_spec["relations"], - "list-like", + "'additional_data_tables' entry", + bad_spec["additional_data_tables"], + Mapping, ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - def test_dict_spec_relations_must_be_tuple(self): - """Test Dataset raising TypeError when a relation is not a tuple""" + def test_dict_spec_additional_data_tables_item_must_be_tuple(self): + """Test Dataset raising TypeError when a secondary table spec is not a tuple""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"][0] = AnotherType() - expected_msg = type_error_message("Relation", bad_spec["relations"][0], "tuple") + bad_spec["additional_data_tables"]["B"] = AnotherType() + expected_msg = type_error_message( + "'B' table entry", bad_spec["additional_data_tables"]["B"], tuple + ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - def test_dict_spec_relations_must_be_of_size_2_or_3(self): - """Test Dataset raising ValueError when a relation is not of size 2 or 3""" + def test_dict_spec_additional_data_tables_item_must_be_of_size_2_or_3(self): + """Test Dataset raising ValueError when a secondary table spec is not of + size 2 or 3 + """ bad_spec, y = self.create_fixture_dataset_spec() for size in [0, 1, 4, 5]: - bad_spec["relations"][0] = tuple((f"Table{i}" for i in range(size))) - expected_msg = f"A relation must be of size 2 or 3, not {size}" + bad_spec["additional_data_tables"]["B"] = tuple( + (f"Table{i}" for i in range(size)) + ) + expected_msg = f"'B' table entry must have size 2 or 3, not {size}" with self.subTest(tuple_size=size): self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - def test_dict_spec_table_relation_must_be_str(self): - """Test Dataset raising TypeError when a relation table is not a str""" + def test_dict_spec_secondary_table_data_path_must_be_str(self): + """Test Dataset raising TypeError when a secondary table data path is + not a str + """ # Test the error in the left table bad_spec, y = self.create_fixture_dataset_spec() - first_relation = bad_spec["relations"][0] - bad_spec["relations"][0] = (AnotherType(), "D") - expected_msg = type_error_message( - "Relation #1's parent table", bad_spec["relations"][0][0], str - ) + first_relation = bad_spec["additional_data_tables"]["B"] + bad_spec["additional_data_tables"][AnotherType()] = first_relation + expected_msg = type_error_message("Table path", AnotherType(), str) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - # Test the error in the right table - bad_spec["relations"][0] = first_relation - bad_spec["relations"][1] = ("A", AnotherType()) - expected_msg = type_error_message( - "Relation #2's child table", bad_spec["relations"][1][1], str - ) - self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - - def test_dict_spec_entity_flag_relation_must_be_bool(self): + def test_dict_spec_entity_flag_must_be_bool(self): """Test Dataset raising TypeError when the entity flag is not boolean""" bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"][0] = ("B", "D", AnotherType()) + original_bad_spec = bad_spec["additional_data_tables"]["B/D"] + bad_spec["additional_data_tables"]["B/D"] = ( + original_bad_spec[0], + original_bad_spec[1], + AnotherType(), + ) expected_msg = type_error_message( - "Relation #1 (B, D) 1-1 flag", bad_spec["relations"][0][2], bool + "Table at data path B/D 1-1 flag", + bad_spec["additional_data_tables"]["B/D"][2], + bool, ) self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg) - def test_dict_spec_relation_tables_must_not_be_the_same(self): - """Test Dataset raising ValueError when tables of a relation are equal""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"][0] = ("Table", "Table") - expected_msg = ( - "Relation #1's tables are equal: (Table, Table). They must be different." - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_relation_table_must_be_in_table_list(self): - """Test Dataset raising ValueError when a rel. table is not in the table list""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"][0] = ("NonExistentTable", "D") - expected_msg = ( - "Relation #1 (NonExistentTable, D) contains " - "non-existent table 'NonExistentTable'. " - "All relation tables must exist in the 'tables' entry." - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_relation_must_appear_once(self): - """Test Dataset raising ValueError if a relation appears more than once""" - bad_spec, y = self.create_fixture_dataset_spec() - bad_spec["relations"].append(("B", "D")) - expected_msg = ( - "Relation #1 (B, D) occurs 2 times. Each relation must be unique." - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_relation_non_hierarchical_key(self): - """Test Dataset raising ValueError on non-hierarchical table keys""" - ref_spec, y = self.create_fixture_dataset_spec() - bad_spec = { - "main_table": "B", - "tables": { - "A": ref_spec["tables"]["A"], - "B": ref_spec["tables"]["B"], - "C": ref_spec["tables"]["C"], - }, - "relations": [("A", "C"), ("B", "A")], - } - expected_msg = ( - "Relation #2 child table 'A' key ([User_ID]) " - "does not contain that of parent table 'B' ([User_ID, VAR_1])." - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - - def test_dict_spec_relation_cycle(self): - """Test Dataset raising ValueError when there is a relation cycle""" - ref_spec, y = self.create_fixture_dataset_spec() - bad_spec = { - "main_table": "A", - "tables": { - "A": ref_spec["tables"]["A"], - "B": ref_spec["tables"]["B"], - "C": ref_spec["tables"]["C"], - }, - "relations": [("A", "C"), ("A", "B"), ("C", "A")], - } - expected_msg = ( - "'relations' entry contains a cycle that includes " "the relation (C, A)." - ) - self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg) - ############################ # Tests for DatasetTable's # ############################ @@ -727,7 +572,7 @@ def test_dict_spec_relation_cycle(self): def test_pandas_table_input_type_must_be_dataframe(self): """Test PandasTable raising TypeError if dataframe is not a pandas.DataFrame""" with self.assertRaises(TypeError) as context: - PandasTable("reviews", AnotherType()) + PandasTable(name="reviews", dataframe=AnotherType()) output_error_msg = str(context.exception) expected_msg = type_error_message("dataframe", AnotherType(), pd.DataFrame) self.assertEqual(output_error_msg, expected_msg) @@ -735,7 +580,7 @@ def test_pandas_table_input_type_must_be_dataframe(self): def test_pandas_table_input_table_must_not_be_empty(self): """Test PandasTable raising ValueError if the input dataframe is empty""" with self.assertRaises(ValueError) as context: - PandasTable("reviews", pd.DataFrame()) + PandasTable(name="reviews", dataframe=pd.DataFrame()) output_error_msg = str(context.exception) expected_msg = "'dataframe' is empty" self.assertEqual(output_error_msg, expected_msg) @@ -743,10 +588,10 @@ def test_pandas_table_input_table_must_not_be_empty(self): def test_pandas_table_column_ids_must_all_be_int_or_str(self): """Test that in-memory dataset all columns ids must be int or str""" spec, _ = self.create_fixture_dataset_spec(multitable=False, schema=None) - features_table = spec["tables"]["Reviews"][0] + features_table = spec["main_table"][0] features_table.rename(columns={"User_ID": 1}, inplace=True) with self.assertRaises(TypeError) as context: - PandasTable("reviews", features_table) + PandasTable(name="reviews", dataframe=features_table) output_error_msg = str(context.exception) expected_msg = ( "Dataframe column ids must be either all integers or all " diff --git a/tests/test_estimator_attributes.py b/tests/test_estimator_attributes.py index 0efb09b3..958cb50e 100644 --- a/tests/test_estimator_attributes.py +++ b/tests/test_estimator_attributes.py @@ -54,21 +54,15 @@ def _create_multitable_input(self, size=None): # Create the multi-table dataset spec X = { - "main_table": "Accidents", - "tables": { - "Accidents": ( - accidents_df.drop("Gravity", axis=1)[:size], - "AccidentId", - ), + "main_table": ( + accidents_df.drop("Gravity", axis=1)[:size], + ["AccidentId"], + ), + "additional_data_tables": { "Vehicles": (vehicles_df, ["AccidentId", "VehicleId"]), - "Users": (users_df, ["AccidentId", "VehicleId"]), - "Places": (places_df, ["AccidentId"]), + "Vehicles/Users": (users_df, ["AccidentId", "VehicleId"]), + "Places": (places_df, ["AccidentId"], True), }, - "relations": [ - ("Accidents", "Vehicles"), - ("Vehicles", "Users"), - ("Accidents", "Places", True), - ], } y = accidents_df["Gravity"][:size] @@ -195,7 +189,7 @@ def test_classifier_attributes_multitable(self): X, y = self._create_multitable_input() khc_accidents = KhiopsClassifier(n_trees=0, n_pairs=10) khc_accidents.fit(X, y) - self.assert_attribute_values_ok(khc_accidents, X["tables"]["Accidents"][0], y) + self.assert_attribute_values_ok(khc_accidents, X["main_table"][0], y) self.assertTrue(khc_accidents.is_multitable_model_) def test_regressor_attributes_monotable(self): @@ -229,8 +223,8 @@ def test_regressor_attributes_multitable(self): by Khiops post training. """ X, _ = self._create_multitable_input(750) - y = X["tables"]["Accidents"][0]["Commune"] - X["tables"]["Accidents"][0].drop("Commune", axis=1, inplace=True) + y = X["main_table"][0]["Commune"] + X["main_table"][0].drop("Commune", axis=1, inplace=True) khr_accidents = KhiopsRegressor(n_trees=0) with warnings.catch_warnings(): warnings.filterwarnings( @@ -240,9 +234,7 @@ def test_regressor_attributes_multitable(self): ) khr_accidents.fit(X, y) - self.assert_attribute_values_ok( - khr_accidents, X["tables"]["Accidents"][0], None - ) + self.assert_attribute_values_ok(khr_accidents, X["main_table"][0], None) self.assertTrue(khr_accidents.is_multitable_model_) def test_encoder_attributes_monotable(self): diff --git a/tests/test_helper_functions.py b/tests/test_helper_functions.py index 7c8b34ae..1cd4bbe3 100644 --- a/tests/test_helper_functions.py +++ b/tests/test_helper_functions.py @@ -107,13 +107,11 @@ def test_train_test_split_dataset_dataframe(self): calls_df = pd.read_csv(io.StringIO(CALLS_CSV)) connections_df = pd.read_csv(io.StringIO(CONNECTIONS_CSV)) ds_spec = { - "main_table": "clients", - "tables": { - "clients": (clients_df.drop("class", axis=1), ["id"]), - "calls": (calls_df, ["id", "call_id"]), - "connections": (connections_df, ["id", "call_id"]), + "main_table": (clients_df.drop("class", axis=1), ["id"]), + "additional_data_tables": { + "calls": (calls_df, ["id", "call_id"], False), + "calls/connections": (connections_df, ["id", "call_id"], False), }, - "relations": [("clients", "calls", False), ("calls", "connections", False)], } y = clients_df["class"] @@ -150,13 +148,18 @@ def test_train_test_split_dataset_dataframe(self): }, } for split, ref_tables in ref_table_dfs.items(): - for table_name in ds_spec["tables"]: - with self.subTest(split=split, table_name=table_name): + for table_path in ds_spec["additional_data_tables"]: + with self.subTest(split=split, table_path=table_path): self._assert_frame_equal( - split_ds_specs[split]["tables"][table_name][0].reset_index( - drop=True - ), - ref_tables[table_name].reset_index(drop=True), + split_ds_specs[split]["additional_data_tables"][table_path][ + 0 + ].reset_index(drop=True), + ref_tables[table_path.split("/")[-1]].reset_index(drop=True), + ) + + self._assert_frame_equal( + split_ds_specs[split]["main_table"][0].reset_index(drop=True), + ref_tables["clients"].reset_index(drop=True), ) def _assert_dataset_keeps_structure(self, ds_spec, ref_ds_spec): @@ -166,18 +169,18 @@ def _assert_dataset_keeps_structure(self, ds_spec, ref_ds_spec): """ # Check that the spec dictionary is the same excluding the tables self.assertIn("main_table", ref_ds_spec) - self.assertIn("tables", ref_ds_spec) - self.assertIn("relations", ref_ds_spec) - self.assertEqual(ds_spec["main_table"], ref_ds_spec["main_table"]) - self.assertEqual(ds_spec["relations"], ref_ds_spec["relations"]) - self.assertEqual(ds_spec["tables"].keys(), ref_ds_spec["tables"].keys()) - if "format" in ref_ds_spec: - self.assertIn("format", ds_spec) - self.assertEqual(ds_spec["format"], ref_ds_spec["format"]) + self.assertIn("additional_data_tables", ref_ds_spec) + self.assertEqual( + ds_spec["additional_data_tables"].keys(), + ref_ds_spec["additional_data_tables"].keys(), + ) # Check that the table keys are equal - for table_name, table_spec in ds_spec["tables"].items(): - self.assertEqual(table_spec[1], ref_ds_spec["tables"][table_name][1]) + self.assertEqual(ds_spec["main_table"][1], ref_ds_spec["main_table"][1]) + for table_path, table_spec in ds_spec["additional_data_tables"].items(): + self.assertEqual( + table_spec[1], ref_ds_spec["additional_data_tables"][table_path][1] + ) def _assert_frame_equal(self, ref_df, out_df): """Wrapper for the assert_frame_equal pandas function diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py index e126718a..15b765dc 100644 --- a/tests/test_khiops_integrations.py +++ b/tests/test_khiops_integrations.py @@ -294,15 +294,11 @@ def test_estimator_multiple_create_and_fit_does_not_raise_exception(self): secondary_table_data, "SampleId", primary_table=splice_junction_df ) dataset = { - "main_table": "SpliceJunction", - "tables": { - "SpliceJunction": ( - splice_junction_df, - "SampleId", - ), + "main_table": (splice_junction_df, ["SampleId"]), + "additional_data_tables": { "SpliceJunctionDNA": ( splice_junction_dna_df, - "SampleId", + ["SampleId"], ), }, } diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py index e36df908..d43f4ce8 100644 --- a/tests/test_sklearn.py +++ b/tests/test_sklearn.py @@ -365,13 +365,15 @@ def setUpClass(cls): "deploy_model": { "expected_n_dictionaries": 2, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "CC_Keys_main_table", - "expected_additional_data_table_names": ["CC_main_table"], + "expected_main_dictionary_name": "CC_main_table", + "expected_additional_data_table_names": [ + "CC_original_main_table" + ], }, "extract_keys_from_data_table": { "expected_n_dictionaries": 1, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "main_table", + "expected_main_dictionary_name": "CC_main_table", "expected_additional_data_table_names": [], }, } @@ -415,7 +417,7 @@ def setUpClass(cls): "train_predictor": { "expected_n_dictionaries": 2, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "SpliceJunction", + "expected_main_dictionary_name": "main_table", "expected_additional_data_table_names": [ "SpliceJunctionDNA" ], @@ -423,7 +425,7 @@ def setUpClass(cls): "deploy_model": { "expected_n_dictionaries": 2, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "SNB_SpliceJunction", + "expected_main_dictionary_name": "SNB_main_table", "expected_additional_data_table_names": [ "SNB_SpliceJunctionDNA" ], @@ -433,7 +435,7 @@ def setUpClass(cls): "train_recoder": { "expected_n_dictionaries": 2, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "SpliceJunction", + "expected_main_dictionary_name": "main_table", "expected_additional_data_table_names": [ "SpliceJunctionDNA" ], @@ -441,7 +443,7 @@ def setUpClass(cls): "deploy_model": { "expected_n_dictionaries": 2, "expected_main_table_key": "SampleId", - "expected_main_dictionary_name": "R_SpliceJunction", + "expected_main_dictionary_name": "R_main_table", "expected_additional_data_table_names": [ "R_SpliceJunctionDNA" ], @@ -485,10 +487,10 @@ def setUpClass(cls): 2: os.path.join( cls.output_dir, "main_table_Coclustering.khcj" ), - 3: "CC_main_table", + 3: "CC_original_main_table", 4: "SampleId", 5: os.path.join( - cls.output_dir, "CC_Keys_main_table_deployed.kdic" + cls.output_dir, "CC_main_table_deployed.kdic" ), }, ("khiops.core", "read_coclustering_results_file"): { @@ -511,20 +513,20 @@ def setUpClass(cls): 2: os.path.join( cls.output_dir, "main_table_Coclustering.khcj" ), - 3: "CC_main_table", + 3: "CC_original_main_table", 4: "SampleId", 5: os.path.join( - cls.output_dir, "CC_Keys_main_table_deployed.kdic" + cls.output_dir, "CC_main_table_deployed.kdic" ), }, }, "predict": { ("khiops.core", "deploy_model"): { - 1: "CC_Keys_main_table", + 1: "CC_main_table", 3: cls.output_dir, }, ("khiops.core", "extract_keys_from_data_table"): { - 1: "main_table", + 1: "CC_original_main_table", 2: "copy_main_table.txt", 3: "keys_main_table.txt", }, @@ -587,14 +589,14 @@ def setUpClass(cls): KhiopsRegressor: { "fit": { ("khiops.core", "train_predictor"): { - 1: "SpliceJunction", + 1: "main_table", 3: "Class", } }, "predict": { ("khiops.core", "deploy_model"): { - 1: "SNB_SpliceJunction", - 2: "SpliceJunction.txt", + 1: "SNB_main_table", + 2: "main_table.txt", 3: cls.output_dir, } }, @@ -602,14 +604,14 @@ def setUpClass(cls): KhiopsClassifier: { "fit": { ("khiops.core", "train_predictor"): { - 1: "SpliceJunction", + 1: "main_table", 3: "Class", } }, "predict": { ("khiops.core", "deploy_model"): { - 1: "SNB_SpliceJunction", - 2: "SpliceJunction.txt", + 1: "SNB_main_table", + 2: "main_table.txt", 3: cls.output_dir, } }, @@ -617,15 +619,15 @@ def setUpClass(cls): KhiopsEncoder: { "fit": { ("khiops.core", "train_recoder"): { - 1: "SpliceJunction", + 1: "main_table", 3: "Class", 4: cls.output_dir, } }, "predict": { ("khiops.core", "deploy_model"): { - 1: "R_SpliceJunction", - 2: "SpliceJunction.txt", + 1: "R_main_table", + 2: "main_table.txt", 3: cls.output_dir, } }, @@ -724,7 +726,7 @@ def setUpClass(cls): ("khiops.core", "deploy_model"): { "detect_format": False, "header_line": True, - "additional_data_tables": {"CC_main_table"}, + "additional_data_tables": {"CC_original_main_table"}, "log_file_path": os.path.join( cls.output_dir, "khiops.log" ), @@ -1117,18 +1119,16 @@ def _create_train_test_multitable_dataframe(cls, transform_for_regression=False) secondary_table_data, "SampleId", primary_table=root_test_data[0] ) X_train_data = { - "main_table": "SpliceJunction", - "tables": { - "SpliceJunction": (root_train_data[0], "SampleId"), - "SpliceJunctionDNA": (secondary_train_data[0], "SampleId"), + "main_table": (root_train_data[0], ["SampleId"]), + "additional_data_tables": { + "SpliceJunctionDNA": (secondary_train_data[0], ["SampleId"]), }, } y_train_data = root_train_data[1] X_test_data = { - "main_table": "SpliceJunction", - "tables": { - "SpliceJunction": (root_test_data[0], "SampleId"), - "SpliceJunctionDNA": (secondary_test_data[0][0], "SampleId"), + "main_table": (root_test_data[0], ["SampleId"]), + "additional_data_tables": { + "SpliceJunctionDNA": (secondary_test_data[0][0], ["SampleId"]), }, } return (X_train_data, y_train_data, X_test_data) @@ -1252,11 +1252,15 @@ def _test_template( ) # choose train and test sets according to the source type assert source_type == "dataframe" - X_train_data = data["X_train"]["tables"]["SpliceJunctionDNA"][ + X_train_data = data["X_train"]["additional_data_tables"][ + "SpliceJunctionDNA" + ][ 0 ] # XXX leaky y_train_data = None - X_test_data = data["X_test"]["tables"]["SpliceJunctionDNA"][0] # XXX leaky + X_test_data = data["X_test"]["additional_data_tables"]["SpliceJunctionDNA"][ + 0 + ] # XXX leaky else: assert issubclass(estimator_type, KhiopsSupervisedEstimator) data = self._retrieve_data( diff --git a/tests/test_sklearn_output_types.py b/tests/test_sklearn_output_types.py index 5d75e3af..40235bee 100644 --- a/tests/test_sklearn_output_types.py +++ b/tests/test_sklearn_output_types.py @@ -61,12 +61,10 @@ def test_classifier_output_types(self): X, y = create_iris() raw_X_main_mt, raw_X_sec_mt, _ = create_iris_mt() X_mt = { - "main_table": "iris_main", - "tables": { - "iris_main": (raw_X_main_mt, "Id"), - "iris_sec": (raw_X_sec_mt, "Id"), + "main_table": (raw_X_main_mt, ["Id"]), + "additional_data_tables": { + "iris_sec": (raw_X_sec_mt, ["Id"]), }, - "relations": [("iris_main", "iris_sec")], } khc = KhiopsClassifier(n_trees=0) khc.fit(X, y) @@ -220,10 +218,9 @@ def test_regression_output_types(self): "Xs": { "mono": X, "multi": { - "main_table": "iris_main", - "tables": { - "iris_main": (X_mt, "Id"), - "iris_sec": (X_sec_mt, "Id"), + "main_table": (X_mt, ["Id"]), + "additional_data_tables": { + "iris_sec": (X_sec_mt, ["Id"]), }, }, },