Skip to content

Commit ce5f03d

Browse files
authored
Fix interval length check in coclustering report parsing (#484)
Systematically check that there is at least one interval. Check that there are at least two intervals only if the first interval contains only missing values.
1 parent babb123 commit ce5f03d

3 files changed

Lines changed: 178 additions & 3 deletions

File tree

khiops/core/coclustering_results.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def __init__(self, json_data=None):
432432
# Check minimum consistency of input data
433433
if "cellFrequencies" not in json_data:
434434
raise KhiopsJSONError(
435-
"'cellFrequencies' key not found " "but 'cellPartIndexes' found."
435+
"'cellFrequencies' key not found but 'cellPartIndexes' found."
436436
)
437437
json_cell_frequencies = json_data["cellFrequencies"]
438438
json_cell_part_indexes = json_data["cellPartIndexes"]
@@ -909,8 +909,8 @@ def init_partition(self, json_data=None):
909909
)
910910
elif not isinstance(json_data["intervals"], list):
911911
raise KhiopsJSONError("'intervals' key must be a list")
912-
elif len(json_data["intervals"]) < 2:
913-
raise KhiopsJSONError("'intervals' key must have length at least 2")
912+
elif len(json_data["intervals"]) < 1:
913+
raise KhiopsJSONError("'intervals' key must have length at least 1")
914914

915915
# Initialize the intervals data
916916
for json_interval in json_data["intervals"]:
@@ -921,6 +921,11 @@ def init_partition(self, json_data=None):
921921
# Initialize open interval flags
922922
first_interval = self.parts[0]
923923
if first_interval.is_missing:
924+
if len(json_data["intervals"]) < 2:
925+
raise KhiopsJSONError(
926+
"'intervals' key must have at least 2 elements "
927+
"when one element contains missing values."
928+
)
924929
first_interval = self.parts[1]
925930
first_interval.is_left_open = True
926931
last_interval = self.parts[-1]
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{
2+
"tool": "Khiops Coclustering",
3+
"version": "VERSION",
4+
"shortDescription": "",
5+
"coclusteringReport": {
6+
"summary": {
7+
"instances": 150,
8+
"cells": 1,
9+
"nullCost": 1379.668177,
10+
"cost": 1164.273132,
11+
"level": 0.1561209056,
12+
"initialDimensions": 3,
13+
"frequencyVariable": "",
14+
"dictionary": "Iris",
15+
"database": "..\/..\/..\/datasets\/Iris\/Iris.txt",
16+
"samplePercentage": 100,
17+
"samplingMode": "Include sample",
18+
"selectionVariable": "",
19+
"selectionValue": ""
20+
},
21+
"dimensionSummaries": [
22+
{
23+
"name": "PetalLength",
24+
"type": "Numerical",
25+
"parts": 1,
26+
"initialParts": 1,
27+
"values": 150,
28+
"interest": 1,
29+
"description": "",
30+
"min": 1,
31+
"max": 6.9
32+
},
33+
{
34+
"name": "PetalWidth",
35+
"type": "Numerical",
36+
"parts": 1,
37+
"initialParts": 1,
38+
"values": 150,
39+
"interest": 1,
40+
"description": "",
41+
"min": 0.1,
42+
"max": 2.5
43+
},
44+
{
45+
"name": "Class",
46+
"type": "Categorical",
47+
"parts": 1,
48+
"initialParts": 1,
49+
"values": 3,
50+
"interest": 1,
51+
"description": ""
52+
}
53+
],
54+
"dimensionPartitions": [
55+
{
56+
"name": "PetalLength",
57+
"type": "Numerical",
58+
"intervals": [
59+
{
60+
"cluster": "]-inf;+inf[",
61+
"bounds": [
62+
1,
63+
6.9
64+
]
65+
}
66+
]
67+
},
68+
{
69+
"name": "PetalWidth",
70+
"type": "Numerical",
71+
"intervals": [
72+
{
73+
"cluster": "]-inf;+inf[",
74+
"bounds": [
75+
0.1,
76+
2.5
77+
]
78+
}
79+
]
80+
},
81+
{
82+
"name": "Class",
83+
"type": "Categorical",
84+
"valueGroups": [
85+
{
86+
"cluster": "C1",
87+
"values": [
88+
"Iris-setosa",
89+
"Iris-versicolor",
90+
"Iris-virginica"
91+
],
92+
"valueFrequencies": [
93+
50,
94+
50,
95+
50
96+
],
97+
"valueTypicalities": [
98+
1,
99+
1,
100+
1
101+
]
102+
}
103+
],
104+
"defaultGroupIndex": 0
105+
}
106+
],
107+
"dimensionHierarchies": [
108+
{
109+
"name": "PetalLength",
110+
"type": "Numerical",
111+
"clusters": [
112+
{
113+
"cluster": "]-inf;+inf[",
114+
"parentCluster": "",
115+
"frequency": 150,
116+
"interest": 0.648091,
117+
"hierarchicalLevel": -0.0145257,
118+
"rank": 2,
119+
"hierarchicalRank": 4,
120+
"isLeaf": true
121+
}
122+
]
123+
},
124+
{
125+
"name": "PetalWidth",
126+
"type": "Numerical",
127+
"clusters": [
128+
{
129+
"cluster": "]-inf;+inf[",
130+
"parentCluster": "",
131+
"frequency": 150,
132+
"interest": 0.681865,
133+
"hierarchicalLevel": 0.374198,
134+
"rank": 2,
135+
"hierarchicalRank": 5,
136+
"isLeaf": true
137+
}
138+
]
139+
},
140+
{
141+
"name": "Class",
142+
"type": "Categorical",
143+
"clusters": [
144+
{
145+
"cluster": "C1",
146+
"parentCluster": "",
147+
"frequency": 150,
148+
"interest": 0.941775,
149+
"hierarchicalLevel": 0,
150+
"rank": 2,
151+
"hierarchicalRank": 3,
152+
"isLeaf": true
153+
}
154+
]
155+
}
156+
],
157+
"cellPartIndexes": [
158+
[
159+
0,
160+
0,
161+
0
162+
]
163+
],
164+
"cellFrequencies": [
165+
150
166+
]
167+
},
168+
"khiops_encoding": "ascii"
169+
}

tests/test_core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def test_coclustering_results(self):
167167
reports = [
168168
"Adult",
169169
"Iris",
170+
"IrisSimplified",
170171
"IrisIV",
171172
"Ansi_Coclustering",
172173
"AnsiGreek_Coclustering",

0 commit comments

Comments
 (0)