Skip to content

Commit 6e23a3d

Browse files
committed
Add comments and internal comments
- add comments to Dictionary, Variable and VariableBlock objects; - add internal comments to Dictionary and VariableBlock objects. Note: the position of the comments that are interspersed with dictionary information (e.g. before/after key, before/after metadata, etc.) cannot be reconstructed, because kdic file parsing gets the correct comment *attachments* to the entities they apply to, but not their textual position with respect to these entities. The attachments are sufficient for constituting the JSON dictionary representations though. Hence, semantically no information is lost upon parsing.
1 parent b29aac3 commit 6e23a3d

File tree

8 files changed

+496
-16
lines changed

8 files changed

+496
-16
lines changed

khiops/core/dictionary.py

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -548,18 +548,22 @@ class Dictionary:
548548
----------
549549
name : str
550550
Dictionary name.
551-
label : str
552-
Dictionary label/comment.
553551
root : bool
554552
True if the dictionary is the root of an dictionary hierarchy.
555553
key : list of str
556554
Names of the key variables.
557-
meta_data : `MetaData`
558-
MetaData object of the dictionary.
559555
variables : list of `Variable`
560556
The dictionary variables.
561557
variable_blocks : list of `VariableBlock`
562558
The dictionary variable blocks.
559+
label : str
560+
Dictionary label.
561+
comments : list of str
562+
List of dictionary comments.
563+
internal_comments : list of str
564+
List of internal dictionary comments.
565+
meta_data : `MetaData`
566+
MetaData object of the dictionary.
563567
"""
564568

565569
def __init__(self, json_data=None):
@@ -579,6 +583,8 @@ def __init__(self, json_data=None):
579583
# Initialize main attributes
580584
self.name = json_data.get("name", "")
581585
self.label = json_data.get("label", "")
586+
self.comments = json_data.get("comments", [])
587+
self.internal_comments = json_data.get("internalComments", [])
582588
self.root = json_data.get("root", False)
583589

584590
# Initialize names of key variable
@@ -634,6 +640,8 @@ def copy(self):
634640
# Copy dictionary main features
635641
dictionary_copy.name = self.name
636642
dictionary_copy.label = self.label
643+
dictionary_copy.comments = self.comments.copy()
644+
dictionary_copy.internal_comments = self.internal_comments.copy()
637645
dictionary_copy.root = self.root
638646
dictionary_copy.key = self.key.copy()
639647
dictionary_copy.meta_data = self.meta_data.copy()
@@ -653,6 +661,10 @@ def copy(self):
653661
variable_block_copy = VariableBlock()
654662
variable_block_copy.name = variable.variable_block.name
655663
variable_block_copy.label = variable.variable_block.label
664+
variable_block_copy.comments = variable.variable_block.comments.copy()
665+
variable_block_copy.internal_comments = (
666+
variable.variable_block.internal_comments.copy()
667+
)
656668
variable_block_copy.rule = variable.variable_block.rule
657669
variable_block_copy.meta_data = variable_block.meta_data.copy()
658670

@@ -900,6 +912,10 @@ def write(self, writer):
900912
if self.label:
901913
writer.write("// ")
902914
writer.writeln(self.label)
915+
if self.comments:
916+
for comment in self.comments:
917+
writer.write("// ")
918+
writer.writeln(comment)
903919
if self.root:
904920
writer.write("Root\t")
905921
writer.write("Dictionary\t")
@@ -929,6 +945,11 @@ def write(self, writer):
929945
else:
930946
variable.variable_block.write(writer)
931947
i += len(variable.variable_block.variables)
948+
949+
# Write internal comments if available
950+
for comment in self.internal_comments:
951+
writer.write("// ")
952+
writer.writeln(comment)
932953
writer.writeln("};")
933954

934955

@@ -946,8 +967,6 @@ class Variable:
946967
----------
947968
name : str
948969
Variable name.
949-
label : str
950-
Variable label/comment.
951970
used : bool
952971
True if the variable is used.
953972
type : str
@@ -958,11 +977,15 @@ class Variable:
958977
Type complement for the ``Structure`` type. Set to "" for other types.
959978
rule : str
960979
Derivation rule. Set to "" if there is no rule associated to this variable.
961-
meta_data : `MetaData`
962-
Variable metadata.
963980
variable_block : `VariableBlock`
964981
Block to which the variable belongs. Not set if the variable does not belong to
965982
a block.
983+
label : str
984+
Variable label.
985+
comments : list of str
986+
List of variable comments.
987+
meta_data : `MetaData`
988+
Variable metadata.
966989
"""
967990

968991
def __init__(self, json_data=None):
@@ -974,6 +997,7 @@ def __init__(self, json_data=None):
974997
# Main attributes
975998
self.name = ""
976999
self.label = ""
1000+
self.comments = []
9771001
self.used = True
9781002
self.type = ""
9791003

@@ -1007,6 +1031,7 @@ def __init__(self, json_data=None):
10071031
# Initialize main attributes
10081032
self.name = json_data.get("name")
10091033
self.label = json_data.get("label", "")
1034+
self.comments = json_data.get("comments", [])
10101035
self.used = json_data.get("used", True)
10111036
self.type = json_data.get("type")
10121037

@@ -1045,6 +1070,7 @@ def copy(self):
10451070
variable = Variable()
10461071
variable.name = self.name
10471072
variable.label = self.label
1073+
variable.comments = self.comments.copy()
10481074
variable.used = self.used
10491075
variable.type = self.type
10501076
variable.object_type = self.object_type
@@ -1142,6 +1168,11 @@ def write(self, writer):
11421168
if not isinstance(writer, KhiopsOutputWriter):
11431169
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
11441170

1171+
# Write comments if available
1172+
for comment in self.comments:
1173+
writer.write("\t// ")
1174+
writer.writeln(comment)
1175+
11451176
# Write "Unused" flag if variable not used
11461177
if not self.used:
11471178
writer.write("Unused")
@@ -1167,7 +1198,7 @@ def write(self, writer):
11671198
self.meta_data.write(writer)
11681199
writer.write("\t")
11691200

1170-
# Write label/commentary if available
1201+
# Write label if available
11711202
if self.label:
11721203
writer.write("// ")
11731204
writer.write(self.label)
@@ -1186,16 +1217,20 @@ class VariableBlock:
11861217
11871218
Attributes
11881219
----------
1189-
name :
1220+
name : str
11901221
Block name.
1191-
label :
1192-
Block label/commentary.
11931222
rule :
11941223
Block derivation rule.
1195-
meta_data :
1196-
Metadata object of the block.
11971224
variables :
11981225
List of the Variable objects of the block.
1226+
label : str
1227+
Block label.
1228+
comments : list of str
1229+
List of block comments.
1230+
internal_comments : list of str
1231+
List of internal block comments.
1232+
meta_data :
1233+
Metadata object of the block.
11991234
"""
12001235

12011236
def __init__(self, json_data=None):
@@ -1215,6 +1250,8 @@ def __init__(self, json_data=None):
12151250
# Initialize main attributes
12161251
self.name = json_data.get("blockName", "")
12171252
self.label = json_data.get("label", "")
1253+
self.comments = json_data.get("comments", [])
1254+
self.internal_comments = json_data.get("internalComments", [])
12181255

12191256
# Initialize derivation rule
12201257
self.rule = json_data.get("rule", "")
@@ -1305,10 +1342,21 @@ def write(self, writer):
13051342
# Check file object type
13061343
if not isinstance(writer, KhiopsOutputWriter):
13071344
raise TypeError(type_error_message("writer", writer, KhiopsOutputWriter))
1345+
1346+
# Write comments if available
1347+
for comment in self.comments:
1348+
writer.write("\t// ")
1349+
writer.writeln(comment)
1350+
13081351
# Write variables
13091352
writer.writeln("\t{")
13101353
for variable in self.variables:
13111354
variable.write(writer)
1355+
1356+
# Write internal comments if available
1357+
for comment in self.internal_comments:
1358+
writer.write("\t// ")
1359+
writer.writeln(comment)
13121360
writer.write("\t}")
13131361

13141362
# Write block's name
@@ -1328,7 +1376,7 @@ def write(self, writer):
13281376
self.meta_data.write(writer)
13291377
writer.write("\t")
13301378

1331-
# Write label/commentary if available
1379+
# Write label if available
13321380
if self.label:
13331381
writer.write("// ")
13341382
writer.write(self.label)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#Khiops VERSION
2+
3+
// Label Iris
4+
// Comment Iris 1
5+
// Comment Iris 2
6+
// Comment Iris 3
7+
// Comment Iris 4
8+
// Comment Iris 5
9+
Dictionary Iris
10+
{
11+
// Comment SepalLength 1
12+
// Comment SepalLength 2
13+
Numerical SepalLength ; // Label SepalLength
14+
Numerical SepalWidth ;
15+
Numerical PetalLength ;
16+
// Comment PetalWidth
17+
Numerical PetalWidth ; // Label PetalWidth
18+
// Comment Class
19+
Categorical Class ;
20+
// Comment NoSetosa 1
21+
// Comment NoSetosa 2
22+
Unused Numerical NoSetosa = NEQc(Class, "Iris-setosa") ; // Label NoSetosa
23+
// Comment internal Iris 1
24+
// Comment internal Iris 2
25+
};
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#Khiops VERSION
2+
3+
Dictionary Address (id_customer)
4+
{
5+
Categorical id_customer ;
6+
Numerical StreetNumber ;
7+
Categorical StreetName ;
8+
Categorical City ;
9+
};
10+
11+
Root Dictionary Customer (id_customer)
12+
{
13+
Categorical id_customer ;
14+
Categorical id = CopyC(id_customer) ;
15+
Categorical Name ;
16+
Entity(Address) Address ;
17+
Table(Usage) Usages ;
18+
// Partition comment
19+
Structure(Partition) partition1 = Partition(ValueSetC("Mobile", "Tel", " * ")) ; // Partition label
20+
// TablePartition comment 1
21+
// TablePartition comment 2
22+
{
23+
// Usages_S_M_Nb comment
24+
Unused Table(Usage) Usages_S_M_Nb ; <VarKey=1> // Usages_S_M_Nb label
25+
// Usages_S_T_Nb comment
26+
Unused Table(Usage) Usages_S_T_Nb ; <VarKey=2> // Usages_S_T_Nb label
27+
} tablePartition1 = TablePartition(Usages, partition1, Service) ; // TablePartition label
28+
{
29+
Numerical S_M_Nb ; <VarKey=1>
30+
Numerical S_T_Nb ; <VarKey=2>
31+
// Internal variable block comment
32+
} PartCounts = TablePartitionCount(tablePartition1) ; // TablePartitionCount label
33+
// S_Total comment
34+
Numerical S_Total = Sum(S_M_Nb, S_T_Nb) ;
35+
Table(Usage) UsagesMobile = TableSelection(Usages, EQc(Service, "Mobile")) ;
36+
Table(Usage) UsagesTel = TableSelection(Usages, EQc(Service, "Tel")) ;
37+
Numerical D_M_Nb = TableCount(UsagesMobile) ;
38+
Numerical D_T_Nb = TableCount(UsagesTel) ;
39+
Numerical D_Total = Sum(D_M_Nb, D_T_Nb) ;
40+
};
41+
42+
Dictionary Usage (id_customer)
43+
{
44+
Categorical id_customer ;
45+
Categorical Service ;
46+
Numerical Duration ;
47+
Numerical Price ;
48+
};
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#Khiops VERSION
2+
3+
// Label SpliceJunction
4+
// Comment SpliceJunction key before
5+
// Comment SpliceJunction key after
6+
// Comment end
7+
Root Dictionary SpliceJunction (SampleId)
8+
{
9+
Categorical SampleId ;
10+
Categorical Class ;
11+
// Comment DNA
12+
Table(SpliceJunctionDNA) DNA ; // Label DNA
13+
};
14+
15+
// Label SpliceJunction
16+
// Comment SpliceJunction key before
17+
// Comment SpliceJunction key after
18+
// Comment SpliceJunction meta-data before
19+
// Comment SpliceJunction meta-data after
20+
// Comment end
21+
Dictionary SpliceJunctionDNA (SampleId)
22+
<TestComment>
23+
{
24+
Categorical SampleId ;
25+
Numerical Pos ;
26+
Categorical Char ;
27+
// Categorical LowerChar = ToLower(Char) ;
28+
};
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"tool": "Khiops Dictionary",
3+
"version": "VERSION",
4+
"dictionaries": [
5+
{
6+
"name": "Iris",
7+
"label": "Label Iris",
8+
"comments": [
9+
"Comment Iris 1",
10+
"Comment Iris 2",
11+
"Comment Iris 3",
12+
"Comment Iris 4",
13+
"Comment Iris 5"
14+
],
15+
"variables": [
16+
{
17+
"name": "SepalLength",
18+
"label": "Label SepalLength",
19+
"comments": [
20+
"Comment SepalLength 1",
21+
"Comment SepalLength 2"
22+
],
23+
"type": "Numerical"
24+
},
25+
{
26+
"name": "SepalWidth",
27+
"type": "Numerical"
28+
},
29+
{
30+
"name": "PetalLength",
31+
"type": "Numerical"
32+
},
33+
{
34+
"name": "PetalWidth",
35+
"label": "Label PetalWidth",
36+
"comments": [
37+
"Comment PetalWidth"
38+
],
39+
"type": "Numerical"
40+
},
41+
{
42+
"name": "Class",
43+
"comments": [
44+
"Comment Class"
45+
],
46+
"type": "Categorical"
47+
},
48+
{
49+
"name": "NoSetosa",
50+
"label": "Label NoSetosa",
51+
"comments": [
52+
"Comment NoSetosa 1",
53+
"Comment NoSetosa 2"
54+
],
55+
"used": false,
56+
"type": "Numerical",
57+
"rule": "NEQc(Class, \"Iris-setosa\")"
58+
}
59+
],
60+
"internalComments": [
61+
"Comment internal Iris 1",
62+
"Comment internal Iris 2"
63+
]
64+
}
65+
],
66+
"khiops_encoding": "ascii"
67+
}

0 commit comments

Comments
 (0)