Skip to content

Commit 13c10c2

Browse files
committed
Dump coclustering JSON report keys in the expected order
Thus, the `write_khiops_json_file` method can be used to dump the JSON report with the keys exactly in the same order as expected by MODL_Coclustering. Thus, coclustering reports are now verbatim-identical to the reference reports.
1 parent b296bdb commit 13c10c2

File tree

3 files changed

+251
-10
lines changed

3 files changed

+251
-10
lines changed

khiops/core/coclustering_results.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,127 @@ class CoclusteringResults(KhiopsJSONObject):
7777
Coclustering modeling report.
7878
"""
7979

80+
# Set coclustering report order key specification
81+
# pylint: disable=line-too-long
82+
json_key_sort_spec = {
83+
"tool": None,
84+
"version": None,
85+
"shortDescription": None,
86+
"coclusteringReport": {
87+
"summary": {
88+
"instances": None,
89+
"cells": None,
90+
"nullCost": None,
91+
"cost": None,
92+
"level": None,
93+
"initialDimensions": None,
94+
"frequencyVariable": None,
95+
"dictionary": None,
96+
"database": None,
97+
"samplePercentage": None,
98+
"samplingMode": None,
99+
"selectionVariable": None,
100+
"selectionValue": None,
101+
},
102+
"dimensionSummaries": [
103+
{
104+
"name": None,
105+
"isVarPart": None,
106+
"type": None,
107+
"parts": None,
108+
"initialParts": None,
109+
"values": None,
110+
"interest": None,
111+
"description": None,
112+
"min": None,
113+
"max": None,
114+
},
115+
],
116+
"dimensionPartitions": [
117+
{
118+
"name": None,
119+
"type": None,
120+
"innerVariables": {
121+
"dimensionSummaries": [
122+
{
123+
"name": None,
124+
"type": None,
125+
"parts": None,
126+
"initialParts": None,
127+
"values": None,
128+
"interest": None,
129+
"description": None,
130+
"min": None,
131+
"max": None,
132+
}
133+
],
134+
"dimensionPartitions": [
135+
{
136+
"name": None,
137+
"type": None,
138+
"intervals": [
139+
{
140+
"cluster": None,
141+
"bounds": None,
142+
}
143+
],
144+
"valueGroups": [
145+
{
146+
"cluster": None,
147+
"values": None,
148+
"valueFrequencies": None,
149+
}
150+
],
151+
"defaultGroupIndex": None,
152+
}
153+
],
154+
},
155+
"intervals": [
156+
{
157+
"cluster": None,
158+
"bounds": None,
159+
}
160+
],
161+
"valueGroups": [
162+
{
163+
"cluster": None,
164+
"values": None,
165+
"valueFrequencies": None,
166+
"valueTypicalities": None,
167+
}
168+
],
169+
"defaultGroupIndex": None,
170+
},
171+
],
172+
"dimensionHierarchies": [
173+
{
174+
"name": None,
175+
"type": None,
176+
"clusters": [
177+
{
178+
"cluster": None,
179+
"parentCluster": None,
180+
"frequency": None,
181+
"interest": None,
182+
"hierarchicalLevel": None,
183+
"rank": None,
184+
"hierarchicalRank": None,
185+
"isLeaf": None,
186+
"shortDescription": None,
187+
"description": None,
188+
}
189+
],
190+
}
191+
],
192+
"cellPartIndexes": None,
193+
"cellFrequencies": None,
194+
},
195+
"khiops_encoding": None,
196+
"ansi_chars": None,
197+
"colliding_utf8_chars": None,
198+
}
199+
# pylint: enable=line-too-long
200+
80201
def __init__(self, json_data=None):
81202
"""See class docstring"""
82203
# Initialize super class

khiops/core/internals/io.py

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from khiops.core.exceptions import KhiopsJSONError
1717
from khiops.core.internals.common import (
1818
deprecation_message,
19+
is_dict_like,
20+
is_list_like,
1921
is_string_like,
2022
type_error_message,
2123
)
@@ -150,8 +152,20 @@ class KhiopsJSONObject:
150152
**Deprecated** will be removed in Khiops 12.
151153
"""
152154

155+
# Set default JSON key sort specification attribute
156+
# Can be set in classes that specialize this class
157+
json_key_sort_spec = None
158+
153159
def __init__(self, json_data=None):
154160
"""See class docstring"""
161+
# Check the type of the json_key_sort_spec class attribute
162+
if self.json_key_sort_spec is not None and not isinstance(
163+
self.json_key_sort_spec, dict
164+
):
165+
raise TypeError(
166+
type_error_message("key_sort_spec", self.json_key_sort_spec, dict)
167+
)
168+
155169
# Check the type of json_data
156170
if json_data is not None and not isinstance(json_data, dict):
157171
raise TypeError(type_error_message("json_data", json_data, dict))
@@ -261,17 +275,83 @@ def to_dict(self):
261275
report["subTool"] = self.sub_tool
262276
return report
263277

264-
def write_khiops_json_file(self, json_file_path):
278+
def _json_key_sort_by_spec(self, jdict, key_sort_spec=None):
279+
sorted_jdict = {}
280+
if key_sort_spec is None:
281+
key_sort_spec = self.json_key_sort_spec
282+
for spec_key, spec_value in key_sort_spec.items():
283+
if not (spec_value is None or isinstance(spec_value, (dict, list))):
284+
raise ValueError(
285+
type_error_message(
286+
"specification value",
287+
spec_value,
288+
"'None' or dict or list",
289+
)
290+
)
291+
if spec_key in jdict:
292+
json_value = jdict[spec_key]
293+
294+
# If value is not a dict, then:
295+
# - if not list-like, then add it as such to the output dict
296+
# - else, iterate on the list-like value
297+
# else, recurse on the dict structure
298+
if not is_dict_like(json_value):
299+
if not is_list_like(json_value):
300+
sorted_jdict[spec_key] = json_value
301+
else:
302+
sorted_jdict[spec_key] = []
303+
for json_el in json_value:
304+
if not is_dict_like(json_el):
305+
sorted_jdict[spec_key].append(json_el)
306+
else:
307+
if is_list_like(spec_value):
308+
sorted_jdict[spec_key].append(
309+
self._json_key_sort_by_spec(
310+
json_el, key_sort_spec=spec_value[0]
311+
)
312+
)
313+
else:
314+
sorted_jdict[spec_key] = self._json_key_sort_by_spec(
315+
json_value, key_sort_spec=spec_value
316+
)
317+
return sorted_jdict
318+
319+
def write_khiops_json_file(
320+
self, json_file_path, _sort_keys=False, _ensure_ascii=False
321+
):
265322
"""Write the JSON data of the object to a Khiops JSON file
266323
324+
The JSON keys are sorted according to ``.KhiopsJSONObject.json_key_sort_spec`
325+
class attribute, if set.
326+
267327
Parameters
268328
----------
269329
json_file_path : str
270330
Path to the Khiops JSON file.
331+
_sort_keys : bool, default False
332+
Ignored if the ``.KhiopsJSONObject.json_key_sort_spec`` class
333+
attribute is set.
334+
Otherwise, if True, then sort the keys lexicographically and
335+
recursively.
336+
_ensure_ascii : bool, default False
337+
If True, then non-ASCII characters in the report are escaped. Otherwise,
338+
they are dumped as-is.
271339
"""
272340
# Serialize JSON data to string
273341
# Do not escape non-ASCII Unicode characters
274-
json_string = json.dumps(self.to_dict(), ensure_ascii=False)
342+
json_dict = self.to_dict()
343+
if self.json_key_sort_spec is not None:
344+
json_dict = self._json_key_sort_by_spec(json_dict)
345+
json_string = json.dumps(json_dict, indent=4, ensure_ascii=_ensure_ascii)
346+
elif _sort_keys:
347+
json_string = json.dumps(
348+
json_dict,
349+
indent=4,
350+
ensure_ascii=_ensure_ascii,
351+
sort_keys=_sort_keys,
352+
)
353+
else:
354+
json_string = json.dumps(json_dict, indent=4, ensure_ascii=_ensure_ascii)
275355
with io.BytesIO() as json_stream:
276356
writer = self.create_output_file_writer(json_stream)
277357
writer.write(json_string)

tests/test_core.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import json
1111
import os
1212
import shutil
13+
import tempfile
1314
import textwrap
1415
import unittest
1516
import warnings
@@ -33,7 +34,47 @@
3334
class KhiopsCoreIOTests(unittest.TestCase):
3435
"""Tests the reading/writing of files for the core module classes/functions"""
3536

36-
def _assert_report_is_dumped_to_correct_json(self, report, ref_json_report):
37+
def _assert_coclustering_report_is_written_to_sorted_json_file(
38+
self, cc_report, ref_json_report
39+
):
40+
# Write the coclustering report to a JSON file, sorted according to
41+
# the spec defined in the CoclusteringResults class
42+
# Set ensure_ascii, as non-ASCII characters are escaped in the reference
43+
# reports
44+
tmp_dir = tempfile.mkdtemp()
45+
output_report = os.path.join(tmp_dir, "TestCoclustering.khcj")
46+
cc_report.write_khiops_json_file(
47+
output_report, _sort_keys=True, _ensure_ascii=True
48+
)
49+
50+
# Load JSON Khiops reports into Python dictionaries
51+
with open(ref_json_report, encoding="utf-8") as ref_json_file:
52+
ref_json = json.load(ref_json_file)
53+
with open(output_report, encoding="utf-8") as output_json_file:
54+
output_json = json.load(output_json_file)
55+
shutil.rmtree(tmp_dir)
56+
57+
# Dump reports with consistent indentation
58+
ref_json_string = json.dumps(ref_json, indent=4)
59+
output_json_string = json.dumps(output_json, indent=4)
60+
61+
# Succeed if the dumped reports are equal
62+
if output_json_string == ref_json_string:
63+
return
64+
65+
# On failure print the differences
66+
output_json_lines = output_json_string.splitlines(keepends=True)
67+
ref_json_lines = ref_json_string.splitlines(keepends=True)
68+
out_ref_diff = "".join(unified_diff(ref_json_lines, output_json_lines))
69+
if out_ref_diff:
70+
self.fail(
71+
"CoclusteringResults JSON dump differs from reference "
72+
f"'{ref_json_report}':\n{out_ref_diff}"
73+
)
74+
75+
def _assert_analysis_report_is_dumped_to_correct_json(
76+
self, report, ref_json_report
77+
):
3778
# Dump the report as JSON (4-space indented and keys sorted in
3879
# lexicographic order)
3980
output_json = report.to_dict()
@@ -54,11 +95,8 @@ def _assert_report_is_dumped_to_correct_json(self, report, ref_json_report):
5495
ref_json_lines = ref_json_string.splitlines(keepends=True)
5596
out_ref_diff = "".join(unified_diff(ref_json_lines, output_json_lines))
5697
if out_ref_diff:
57-
report_type = (
58-
"Analysis" if ref_json_report.endswith(".khj") else "Coclustering"
59-
)
6098
self.fail(
61-
f"{report_type}Results JSON dump differs from reference "
99+
f"AnalysisResults JSON dump differs from reference "
62100
f"'{ref_json_report}':\n{out_ref_diff}"
63101
)
64102

@@ -111,12 +149,12 @@ def test_analysis_results(self):
111149
elif report in reports_warn:
112150
with self.assertWarns(UserWarning):
113151
results = kh.read_analysis_results_file(ref_json_report)
114-
self._assert_report_is_dumped_to_correct_json(
152+
self._assert_analysis_report_is_dumped_to_correct_json(
115153
results, ref_json_report
116154
)
117155
else:
118156
results = kh.read_analysis_results_file(ref_json_report)
119-
self._assert_report_is_dumped_to_correct_json(
157+
self._assert_analysis_report_is_dumped_to_correct_json(
120158
results, ref_json_report
121159
)
122160

@@ -152,7 +190,9 @@ def test_coclustering_results(self):
152190
results = kh.read_coclustering_results_file(ref_json_report)
153191
else:
154192
results = kh.read_coclustering_results_file(ref_json_report)
155-
self._assert_report_is_dumped_to_correct_json(results, ref_json_report)
193+
self._assert_coclustering_report_is_written_to_sorted_json_file(
194+
results, ref_json_report
195+
)
156196

157197
def test_binary_dictionary_domain(self):
158198
"""Test binary dictionary write"""

0 commit comments

Comments
 (0)