Skip to content

Commit 980bbf1

Browse files
committed
Add support for instance-variable coclustering to the CoclusteringResults object
This includes: - parsing `innerVariables` field from the .khcj file into "sub"-dimensions - full JSONization support, including the serialization of the `innerVariables` variable parts "sub"-dimensions.
1 parent 7624906 commit 980bbf1

3 files changed

Lines changed: 472 additions & 5 deletions

File tree

khiops/core/coclustering_results.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,9 @@ class CoclusteringDimension:
589589
Maximum value of a numerical dimension/variable.
590590
parts : list of `CoclusteringDimensionPart`
591591
Partition of this dimension.
592+
variable_part_dimensions : list of `CoclusteringDimension`
593+
Variable part instance-variable coclustering dimensions. ``None`` for
594+
variable-variable clustering.
592595
clusters : list of `CoclusteringCluster`
593596
Clusters of this dimension's hierarchy. Note that includes intermediary
594597
clusters.
@@ -628,6 +631,9 @@ def __init__(self):
628631
# Clusters internal dictionary
629632
self._clusters_by_name = {}
630633

634+
# Variable part dimensions
635+
self.variable_part_dimensions = None
636+
631637
def init_summary(self, json_data=None):
632638
"""Initializes the summary attributes from a Python JSON object
633639
@@ -741,6 +747,42 @@ def init_partition(self, json_data=None):
741747
self.default_group = self.parts[default_group_index]
742748
self.default_group.is_default_part = True
743749

750+
# Instance-variable coclustering: initialize inner variables
751+
if self.is_variable_part:
752+
753+
# Create inner variables dimensions (subpartition)
754+
if "innerVariables" not in json_data:
755+
raise KhiopsJSONError("'innerVariables' key not found")
756+
self.variable_part_dimensions = []
757+
json_inner_variables = json_data["innerVariables"]
758+
if "dimensionSummaries" in json_inner_variables:
759+
for json_dimension_summary in json_inner_variables[
760+
"dimensionSummaries"
761+
]:
762+
dimension = CoclusteringDimension().init_summary(
763+
json_dimension_summary
764+
)
765+
self.variable_part_dimensions.append(dimension)
766+
767+
# Initialize inner variables dimensions' partitions
768+
if "dimensionPartitions" in json_inner_variables:
769+
json_dimension_partitions = json_inner_variables[
770+
"dimensionPartitions"
771+
]
772+
if len(self.variable_part_dimensions) != len(
773+
json_dimension_partitions
774+
):
775+
raise KhiopsJSONError(
776+
"'ineerVariables/dimensionPartitions' list has length "
777+
f"{len(json_dimension_partitions)} instead of "
778+
f"{len(self.variable_part_dimensions)}"
779+
)
780+
for i, json_dimension_partition in enumerate(
781+
json_dimension_partitions
782+
):
783+
dimension = self.variable_part_dimensions[i]
784+
dimension.init_partition(json_dimension_partition)
785+
744786
return self
745787

746788
def init_hierarchy(self, json_data):
@@ -884,10 +926,23 @@ def to_json(self, report_type):
884926

885927
# Get default group index
886928
for i, part in enumerate(self.parts):
887-
if part.is_default_part is True:
929+
if part.is_default_part:
888930
default_group_index = i
889931
break
890932
report["defaultGroupIndex"] = default_group_index
933+
934+
# Inner variables dimensions for instance-variable coclustering
935+
if self.is_variable_part:
936+
report["innerVariables"] = {
937+
"dimensionSummaries": [
938+
dimension.to_json(report_type="summary")
939+
for dimension in self.variable_part_dimensions
940+
],
941+
"dimensionPartitions": [
942+
dimension.to_json(report_type="partition")
943+
for dimension in self.variable_part_dimensions
944+
],
945+
}
891946
return report
892947
elif report_type == "hierarchy":
893948
report = {
@@ -1204,7 +1259,9 @@ def __init__(self, json_data=None):
12041259
json_data = {}
12051260
# Otherwise raise an error if the relevant keys are not found
12061261
else:
1207-
mandatory_keys = ("values", "valueFrequencies", "valueTypicalities")
1262+
# Value typicalities are absent for variable parts dimensions in
1263+
# instance-variable coclustering
1264+
mandatory_keys = ("values", "valueFrequencies")
12081265
for key in mandatory_keys:
12091266
if key not in json_data:
12101267
raise KhiopsJSONError(f"'{key}' key not found")
@@ -1225,7 +1282,12 @@ def __init__(self, json_data=None):
12251282
self.values.append(value)
12261283
value.value = json_value
12271284
value.frequency = json_value_frequencies[i]
1228-
value.typicality = json_value_typicalities[i]
1285+
1286+
# valueTypicalities are absent for variable part dimension parts,
1287+
# as used in instance-variable coclustering
1288+
value.typicality = (
1289+
json_value_typicalities[i] if i < len(json_value_typicalities) else None
1290+
)
12291291

12301292
# Initialize default values (set for real from another class)
12311293
self.is_default_part = False
@@ -1244,12 +1306,18 @@ def __str__(self):
12441306
return label
12451307

12461308
def to_json(self):
1247-
return {
1309+
"""Serialize object instance to the Khiops JSON format"""
1310+
report = {
12481311
"cluster": self.cluster_name,
12491312
"values": [value.value for value in self.values],
12501313
"valueFrequencies": [value.frequency for value in self.values],
1251-
"valueTypicalities": [value.typicality for value in self.values],
12521314
}
1315+
typicalities = [
1316+
value.typicality for value in self.values if value.typicality is not None
1317+
]
1318+
if typicalities:
1319+
report["valueTypicalities"] = typicalities
1320+
return report
12531321

12541322
def part_type(self):
12551323
"""Part type of this instance

0 commit comments

Comments
 (0)