Skip to content

Commit 07598f6

Browse files
committed
Fix interval length check in coclustering report parsing
Systematically check that there is at least one interval. Check that there are at least two intervals only if the first interval contains only missing values.
1 parent babb123 commit 07598f6

3 files changed

Lines changed: 178 additions & 3 deletions

File tree

khiops/core/coclustering_results.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def __init__(self, json_data=None):
432432
# Check minimum consistency of input data
433433
if "cellFrequencies" not in json_data:
434434
raise KhiopsJSONError(
435-
"'cellFrequencies' key not found " "but 'cellPartIndexes' found."
435+
"'cellFrequencies' key not found but 'cellPartIndexes' found."
436436
)
437437
json_cell_frequencies = json_data["cellFrequencies"]
438438
json_cell_part_indexes = json_data["cellPartIndexes"]
@@ -909,8 +909,8 @@ def init_partition(self, json_data=None):
909909
)
910910
elif not isinstance(json_data["intervals"], list):
911911
raise KhiopsJSONError("'intervals' key must be a list")
912-
elif len(json_data["intervals"]) < 2:
913-
raise KhiopsJSONError("'intervals' key must have length at least 2")
912+
elif len(json_data["intervals"]) < 1:
913+
raise KhiopsJSONError("'intervals' key must have length at least 1")
914914

915915
# Initialize the intervals data
916916
for json_interval in json_data["intervals"]:
@@ -921,6 +921,11 @@ def init_partition(self, json_data=None):
921921
# Initialize open interval flags
922922
first_interval = self.parts[0]
923923
if first_interval.is_missing:
924+
if len(json_data["intervals"]) < 2:
925+
raise KhiopsJSONError(
926+
"'intervals' key must have at least 2 elements "
927+
"when one element contains missing values."
928+
)
924929
first_interval = self.parts[1]
925930
first_interval.is_left_open = True
926931
last_interval = self.parts[-1]
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
{
2+
"tool": "Khiops Coclustering",
3+
"version": "VERSION",
4+
"shortDescription": "",
5+
"coclusteringReport": {
6+
"summary": {
7+
"instances": 150,
8+
"cells": 1,
9+
"nullCost": 1379.668177,
10+
"cost": 1164.273132,
11+
"level": 0.1561209056,
12+
"initialDimensions": 3,
13+
"frequencyVariable": "",
14+
"dictionary": "Iris",
15+
"database": "..\/..\/..\/datasets\/Iris\/Iris.txt",
16+
"samplePercentage": 100,
17+
"samplingMode": "Include sample",
18+
"selectionVariable": "",
19+
"selectionValue": ""
20+
},
21+
"dimensionSummaries": [
22+
{
23+
"name": "PetalLength",
24+
"type": "Numerical",
25+
"parts": 1,
26+
"initialParts": 1,
27+
"values": 150,
28+
"interest": 1,
29+
"description": "",
30+
"min": 1,
31+
"max": 6.9
32+
},
33+
{
34+
"name": "PetalWidth",
35+
"type": "Numerical",
36+
"parts": 1,
37+
"initialParts": 1,
38+
"values": 150,
39+
"interest": 1,
40+
"description": "",
41+
"min": 0.1,
42+
"max": 2.5
43+
},
44+
{
45+
"name": "Class",
46+
"type": "Categorical",
47+
"parts": 1,
48+
"initialParts": 1,
49+
"values": 3,
50+
"interest": 1,
51+
"description": ""
52+
}
53+
],
54+
"dimensionPartitions": [
55+
{
56+
"name": "PetalLength",
57+
"type": "Numerical",
58+
"intervals": [
59+
{
60+
"cluster": "]-inf;+inf[",
61+
"bounds": [
62+
1,
63+
6.9
64+
]
65+
}
66+
]
67+
},
68+
{
69+
"name": "PetalWidth",
70+
"type": "Numerical",
71+
"intervals": [
72+
{
73+
"cluster": "]-inf;+inf[",
74+
"bounds": [
75+
0.1,
76+
2.5
77+
]
78+
}
79+
]
80+
},
81+
{
82+
"name": "Class",
83+
"type": "Categorical",
84+
"valueGroups": [
85+
{
86+
"cluster": "C1",
87+
"values": [
88+
"Iris-setosa",
89+
"Iris-versicolor",
90+
"Iris-virginica"
91+
],
92+
"valueFrequencies": [
93+
50,
94+
50,
95+
50
96+
],
97+
"valueTypicalities": [
98+
1,
99+
1,
100+
1
101+
]
102+
}
103+
],
104+
"defaultGroupIndex": 0
105+
}
106+
],
107+
"dimensionHierarchies": [
108+
{
109+
"name": "PetalLength",
110+
"type": "Numerical",
111+
"clusters": [
112+
{
113+
"cluster": "]-inf;+inf[",
114+
"parentCluster": "",
115+
"frequency": 150,
116+
"interest": 0.648091,
117+
"hierarchicalLevel": -0.0145257,
118+
"rank": 2,
119+
"hierarchicalRank": 4,
120+
"isLeaf": true
121+
}
122+
]
123+
},
124+
{
125+
"name": "PetalWidth",
126+
"type": "Numerical",
127+
"clusters": [
128+
{
129+
"cluster": "]-inf;+inf[",
130+
"parentCluster": "",
131+
"frequency": 150,
132+
"interest": 0.681865,
133+
"hierarchicalLevel": 0.374198,
134+
"rank": 2,
135+
"hierarchicalRank": 5,
136+
"isLeaf": true
137+
}
138+
]
139+
},
140+
{
141+
"name": "Class",
142+
"type": "Categorical",
143+
"clusters": [
144+
{
145+
"cluster": "C1",
146+
"parentCluster": "",
147+
"frequency": 150,
148+
"interest": 0.941775,
149+
"hierarchicalLevel": 0,
150+
"rank": 2,
151+
"hierarchicalRank": 3,
152+
"isLeaf": true
153+
}
154+
]
155+
}
156+
],
157+
"cellPartIndexes": [
158+
[
159+
0,
160+
0,
161+
0
162+
]
163+
],
164+
"cellFrequencies": [
165+
150
166+
]
167+
},
168+
"khiops_encoding": "ascii"
169+
}

tests/test_core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def test_coclustering_results(self):
167167
reports = [
168168
"Adult",
169169
"Iris",
170+
"IrisSimplified",
170171
"IrisIV",
171172
"Ansi_Coclustering",
172173
"AnsiGreek_Coclustering",

0 commit comments

Comments
 (0)