Skip to content

Commit 6f90c47

Browse files
committed
WIP: Start updating the report API + references
- add missing_number for Categorical as well - add sparse_missing_number for both Numerical and Categorical - drop detailed statistics - add number of text features
1 parent 645223c commit 6f90c47

11 files changed

Lines changed: 63 additions & 276 deletions

File tree

khiops/core/analysis_results.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class AnalysisResults(KhiopsJSONObject):
9292
specified it returns an empty instance.
9393
9494
.. note::
95-
See also the `.read_analysis_results_file` function from the core API to
96-
obtain an instance of this class from a Khiops JSON file.
95+
See also the `.read_analysis_results_file` function to obtain an instance
96+
of this class from a Khiops JSON file.
9797
9898
Attributes
9999
----------
@@ -325,7 +325,10 @@ class PreparationReport:
325325
target_stats_std_dev : float
326326
Standard deviation of a numerical target variable.
327327
target_stats_missing_number : int
328-
Number of missing values for a numerical target variable.
328+
Number of missing values for a numerical or categorical target variable.
329+
target_stats_sparse_missing_number : int
330+
Number of missing values for a sparse block of numerical or categorical target
331+
variables.
329332
target_stats_mode : str
330333
Mode of a categorical target variable.
331334
target_stats_mode_frequency : int
@@ -340,6 +343,8 @@ class PreparationReport:
340343
*Supervised analysis only:* Number of informative variables.
341344
max_constructed_variables : int
342345
Maximum number of constructed variable specified for the analysis.
346+
max_text_features : int
347+
Maximum number of text features specified for the analysis.
343348
max_trees : int
344349
Maximum number of constructed trees specified for the analysis.
345350
max_pairs : int
@@ -403,13 +408,14 @@ def __init__(self, json_data=None):
403408
json_target_values = json_summary.get("targetValues", {})
404409
self.target_values = json_target_values.get("values")
405410
self.target_value_frequencies = json_target_values.get("frequencies")
411+
self.target_stats_missing_number = json_stats.get("missingNumber")
412+
self.target_stats_sparse_missing_number = json_stats.get("sparseMissingNumber")
406413

407414
# Initialize regression only target stats
408415
self.target_stats_min = json_stats.get("min")
409416
self.target_stats_max = json_stats.get("max")
410417
self.target_stats_mean = json_stats.get("mean")
411418
self.target_stats_std_dev = json_stats.get("stdDev")
412-
self.target_stats_missing_number = json_stats.get("missingNumber")
413419

414420
# Initialize classification only target stats
415421
self.main_target_value = json_summary.get("mainTargetValue")
@@ -423,6 +429,7 @@ def __init__(self, json_data=None):
423429
self.max_constructed_variables = json_feature_eng.get(
424430
"maxNumberOfConstructedVariables"
425431
)
432+
self.max_text_features = json_feature_eng.get("maxNumberOfTextFeatures")
426433
self.max_trees = json_feature_eng.get("maxNumberOfTrees")
427434
self.max_pairs = json_feature_eng.get("maxNumberOfVariablePairs")
428435
self.discretization = json_summary.get("discretization", "")
@@ -513,6 +520,12 @@ def write_report(self, writer):
513520
writer.writeln(f"Instances\t{self.instance_number}")
514521
writer.writeln(f"Learning task\t{self.learning_task}")
515522

523+
# Write common attributes for classification and regression
524+
if self.target_stats_missing_number is not None:
525+
writer.writeln(f"\tMissing number\t{self.target_stats_missing_number}")
526+
if self.target_stats_sparse_missing_number is not None:
527+
writer.writeln(f"\tSparse missing number\t{self.target_stats_sparse_missing_number}")
528+
516529
# Write classification specific attributes
517530
if "Classification" in self.learning_task:
518531
writer.writeln(f"Target variable\t{self.target_variable}")
@@ -536,7 +549,6 @@ def write_report(self, writer):
536549
writer.writeln(f"\tMax\t{self.target_stats_max}")
537550
writer.writeln(f"\tMean\t{self.target_stats_mean}")
538551
writer.writeln(f"\tStd dev\t{self.target_stats_std_dev}")
539-
writer.writeln(f"\tMissing number\t{self.target_stats_missing_number}")
540552
# Write variable preparation summary attributes
541553
if len(self.variable_types) > 0 and self.instance_number > 0:
542554
writer.writeln(f"Evaluated variables\t{self.evaluated_variable_number}")
@@ -546,6 +558,11 @@ def write_report(self, writer):
546558
"Max number of constructed variables\t"
547559
f"{self.max_constructed_variables}"
548560
)
561+
if self.max_text_features is not None:
562+
writer.writeln(
563+
"Max number of text features\t"
564+
f"{self.max_text_features}"
565+
)
549566
if self.max_trees is not None:
550567
writer.writeln(f"Max number of trees\t{self.max_trees}")
551568
if self.max_pairs is not None:
@@ -1458,6 +1475,8 @@ class VariableStatistics:
14581475
Standard deviation of the variable.
14591476
missing_number : int
14601477
Number of missing values of the variable.
1478+
sparse_missing_number : int
1479+
Number of missing values of the sparse block.
14611480
mode : float
14621481
Most common value.
14631482
mode_frequency : int
@@ -1499,13 +1518,14 @@ def __init__(self, json_data=None):
14991518
self.target_part_number = json_data.get("targetParts")
15001519
self.part_number = json_data.get("parts")
15011520
self.value_number = json_data.get("values", 0)
1521+
self.missing_number = json_data.get("missingNumber")
1522+
self.sparse_missing_number = json_data.get("sparseMissingNumber")
15021523

15031524
# Initialize numerical variable attributes
15041525
self.min = json_data.get("min")
15051526
self.max = json_data.get("max")
15061527
self.mean = json_data.get("mean")
15071528
self.std_dev = json_data.get("stdDev")
1508-
self.missing_number = json_data.get("missingNumber")
15091529

15101530
# Initialize categorical variable attributes
15111531
self.mode = json_data.get("mode")
@@ -1593,6 +1613,7 @@ def write_report_header_line(self, writer):
15931613
writer.write("Mean\t")
15941614
writer.write("StdDev\t")
15951615
writer.write("Missing number\t")
1616+
writer.write("Sparse missing number\t")
15961617
writer.write("Mode\t")
15971618
writer.write("Mode frequency\t")
15981619
writer.write("Construction cost\t")
@@ -1639,15 +1660,19 @@ def write_report_line(self, writer):
16391660
writer.write(f"{self.mean}\t")
16401661
writer.write(f"{self.std_dev}\t")
16411662
writer.write(f"{self.missing_number}\t")
1663+
writer.write(f"{self.sparse_missing_number}\t")
16421664
else:
1643-
writer.write("\t" * 5)
1665+
writer.write("\t" * 6)
16441666

16451667
# Write attributes available only for categorical variables
16461668
if self.type == "Categorical":
1669+
writer.write(f"{self.missing_number}\t")
1670+
writer.write(f"{self.sparse_missing_number}\t")
16471671
writer.write(f"{self.mode}\t")
16481672
writer.write(f"{self.mode_frequency}\t")
16491673
else:
1650-
writer.write("\t\t")
1674+
writer.write("\t" * 2)
1675+
16511676
writer.write(f"{self.construction_cost}\t")
16521677

16531678
# Write preparation cost only for the supervised case
@@ -2465,18 +2490,16 @@ class TrainedPredictor:
24652490
24662491
Attributes
24672492
----------
2468-
type : str
2469-
Predictor type. Valid values are found in the ``predictor_types`` class
2470-
attribute. They are:
2493+
family : str
2494+
Predictor family name. Valid values are found in the ``predictor_families``
2495+
class variable. They are:
24712496
2472-
- "Selective Naive Bayes"
2473-
- "MAP Naive Bayes" **Deprecated**
2474-
- "Naive Bayes"
2475-
- "Univariate"
2497+
- "Baseline": for regression only,
2498+
- "Selective Naive Bayes": in all other cases.
24762499
2477-
family : "Classifier" or "Regressor"
2478-
Predictor family name. Valid values are found in the ``predictor_families``
2479-
class variable.
2500+
type : "Classifier" or "Regressor"
2501+
Predictor type. Valid values are found in the ``predictor_types`` class
2502+
attribute.
24802503
name : str
24812504
Human readable predictor name.
24822505
variable_number : int
@@ -2489,9 +2512,6 @@ class variable.
24892512
predictor_types = ["Classifier", "Regressor"]
24902513
predictor_families = [
24912514
"Selective Naive Bayes",
2492-
"MAP Naive Bayes",
2493-
"Naive Bayes",
2494-
"Univariate",
24952515
]
24962516

24972517
def __init__(self, json_data=None):

tests/resources/analysis_results/ref_json_reports/Adult.khj

Lines changed: 0 additions & 41 deletions
Large diffs are not rendered by default.

tests/resources/analysis_results/ref_json_reports/AdultEvaluation.khj

Lines changed: 0 additions & 17 deletions
Large diffs are not rendered by default.

tests/resources/analysis_results/ref_json_reports/Iris2D.khj

Lines changed: 0 additions & 49 deletions
Large diffs are not rendered by default.

tests/resources/analysis_results/ref_json_reports/IrisC.khj

Lines changed: 0 additions & 49 deletions
Large diffs are not rendered by default.

tests/resources/analysis_results/ref_json_reports/IrisG.khj

Lines changed: 0 additions & 49 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)