@@ -92,8 +92,8 @@ class AnalysisResults(KhiopsJSONObject):
9292 specified it returns an empty instance.
9393
9494 .. note::
95- See also the `.read_analysis_results_file` function from the core API to
96- obtain an instance of this class from a Khiops JSON file.
95+ See also the `.read_analysis_results_file` function to obtain an instance
96+ of this class from a Khiops JSON file.
9797
9898 Attributes
9999 ----------
@@ -325,7 +325,10 @@ class PreparationReport:
325325 target_stats_std_dev : float
326326 Standard deviation of a numerical target variable.
327327 target_stats_missing_number : int
328- Number of missing values for a numerical target variable.
328+ Number of missing values for a numerical or categorical target variable.
329+ target_stats_sparse_missing_number : int
330+ Number of missing values for a sparse block of numerical or categorical target
331+ variables.
329332 target_stats_mode : str
330333 Mode of a categorical target variable.
331334 target_stats_mode_frequency : int
@@ -340,6 +343,8 @@ class PreparationReport:
340343 *Supervised analysis only:* Number of informative variables.
341344 max_constructed_variables : int
342345 Maximum number of constructed variable specified for the analysis.
346+ max_text_features : int
347+ Maximum number of text features specified for the analysis.
343348 max_trees : int
344349 Maximum number of constructed trees specified for the analysis.
345350 max_pairs : int
@@ -403,13 +408,14 @@ def __init__(self, json_data=None):
403408 json_target_values = json_summary .get ("targetValues" , {})
404409 self .target_values = json_target_values .get ("values" )
405410 self .target_value_frequencies = json_target_values .get ("frequencies" )
411+ self .target_stats_missing_number = json_stats .get ("missingNumber" )
412+ self .target_stats_sparse_missing_number = json_stats .get ("sparseMissingNumber" )
406413
407414 # Initialize regression only target stats
408415 self .target_stats_min = json_stats .get ("min" )
409416 self .target_stats_max = json_stats .get ("max" )
410417 self .target_stats_mean = json_stats .get ("mean" )
411418 self .target_stats_std_dev = json_stats .get ("stdDev" )
412- self .target_stats_missing_number = json_stats .get ("missingNumber" )
413419
414420 # Initialize classification only target stats
415421 self .main_target_value = json_summary .get ("mainTargetValue" )
@@ -423,6 +429,7 @@ def __init__(self, json_data=None):
423429 self .max_constructed_variables = json_feature_eng .get (
424430 "maxNumberOfConstructedVariables"
425431 )
432+ self .max_text_features = json_feature_eng .get ("maxNumberOfTextFeatures" )
426433 self .max_trees = json_feature_eng .get ("maxNumberOfTrees" )
427434 self .max_pairs = json_feature_eng .get ("maxNumberOfVariablePairs" )
428435 self .discretization = json_summary .get ("discretization" , "" )
@@ -513,6 +520,12 @@ def write_report(self, writer):
513520 writer .writeln (f"Instances\t { self .instance_number } " )
514521 writer .writeln (f"Learning task\t { self .learning_task } " )
515522
523+ # Write common attributes for classification and regression
524+ if self .target_stats_missing_number is not None :
525+ writer .writeln (f"\t Missing number\t { self .target_stats_missing_number } " )
526+ if self .target_stats_sparse_missing_number is not None :
527+ writer .writeln (f"\t Sparse missing number\t { self .target_stats_sparse_missing_number } " )
528+
516529 # Write classification specific attributes
517530 if "Classification" in self .learning_task :
518531 writer .writeln (f"Target variable\t { self .target_variable } " )
@@ -536,7 +549,6 @@ def write_report(self, writer):
536549 writer .writeln (f"\t Max\t { self .target_stats_max } " )
537550 writer .writeln (f"\t Mean\t { self .target_stats_mean } " )
538551 writer .writeln (f"\t Std dev\t { self .target_stats_std_dev } " )
539- writer .writeln (f"\t Missing number\t { self .target_stats_missing_number } " )
540552 # Write variable preparation summary attributes
541553 if len (self .variable_types ) > 0 and self .instance_number > 0 :
542554 writer .writeln (f"Evaluated variables\t { self .evaluated_variable_number } " )
@@ -546,6 +558,11 @@ def write_report(self, writer):
546558 "Max number of constructed variables\t "
547559 f"{ self .max_constructed_variables } "
548560 )
561+ if self .max_text_features is not None :
562+ writer .writeln (
563+ "Max number of text features\t "
564+ f"{ self .max_text_features } "
565+ )
549566 if self .max_trees is not None :
550567 writer .writeln (f"Max number of trees\t { self .max_trees } " )
551568 if self .max_pairs is not None :
@@ -1458,6 +1475,8 @@ class VariableStatistics:
14581475 Standard deviation of the variable.
14591476 missing_number : int
14601477 Number of missing values of the variable.
1478+ sparse_missing_number : int
1479+ Number of missing values of the sparse block.
14611480 mode : float
14621481 Most common value.
14631482 mode_frequency : int
@@ -1499,13 +1518,14 @@ def __init__(self, json_data=None):
14991518 self .target_part_number = json_data .get ("targetParts" )
15001519 self .part_number = json_data .get ("parts" )
15011520 self .value_number = json_data .get ("values" , 0 )
1521+ self .missing_number = json_data .get ("missingNumber" )
1522+ self .sparse_missing_number = json_data .get ("sparseMissingNumber" )
15021523
15031524 # Initialize numerical variable attributes
15041525 self .min = json_data .get ("min" )
15051526 self .max = json_data .get ("max" )
15061527 self .mean = json_data .get ("mean" )
15071528 self .std_dev = json_data .get ("stdDev" )
1508- self .missing_number = json_data .get ("missingNumber" )
15091529
15101530 # Initialize categorical variable attributes
15111531 self .mode = json_data .get ("mode" )
@@ -1593,6 +1613,7 @@ def write_report_header_line(self, writer):
15931613 writer .write ("Mean\t " )
15941614 writer .write ("StdDev\t " )
15951615 writer .write ("Missing number\t " )
1616+ writer .write ("Sparse missing number\t " )
15961617 writer .write ("Mode\t " )
15971618 writer .write ("Mode frequency\t " )
15981619 writer .write ("Construction cost\t " )
@@ -1639,15 +1660,19 @@ def write_report_line(self, writer):
16391660 writer .write (f"{ self .mean } \t " )
16401661 writer .write (f"{ self .std_dev } \t " )
16411662 writer .write (f"{ self .missing_number } \t " )
1663+ writer .write (f"{ self .sparse_missing_number } \t " )
16421664 else :
1643- writer .write ("\t " * 5 )
1665+ writer .write ("\t " * 6 )
16441666
16451667 # Write attributes available only for categorical variables
16461668 if self .type == "Categorical" :
1669+ writer .write (f"{ self .missing_number } \t " )
1670+ writer .write (f"{ self .sparse_missing_number } \t " )
16471671 writer .write (f"{ self .mode } \t " )
16481672 writer .write (f"{ self .mode_frequency } \t " )
16491673 else :
1650- writer .write ("\t \t " )
1674+ writer .write ("\t " * 2 )
1675+
16511676 writer .write (f"{ self .construction_cost } \t " )
16521677
16531678 # Write preparation cost only for the supervised case
@@ -2465,18 +2490,16 @@ class TrainedPredictor:
24652490
24662491 Attributes
24672492 ----------
2468- type : str
2469- Predictor type . Valid values are found in the ``predictor_types`` class
2470- attribute . They are:
2493+ family : str
2494+ Predictor family name . Valid values are found in the ``predictor_families``
2495+ class variable . They are:
24712496
2472- - "Selective Naive Bayes"
2473- - "MAP Naive Bayes" **Deprecated**
2474- - "Naive Bayes"
2475- - "Univariate"
2497+ - "Baseline": for regression only,
2498+ - "Selective Naive Bayes": in all other cases.
24762499
2477- family : "Classifier" or "Regressor"
2478- Predictor family name . Valid values are found in the ``predictor_families``
2479- class variable .
2500+ type : "Classifier" or "Regressor"
2501+ Predictor type . Valid values are found in the ``predictor_types`` class
2502+ attribute .
24802503 name : str
24812504 Human readable predictor name.
24822505 variable_number : int
@@ -2489,9 +2512,6 @@ class variable.
24892512 predictor_types = ["Classifier" , "Regressor" ]
24902513 predictor_families = [
24912514 "Selective Naive Bayes" ,
2492- "MAP Naive Bayes" ,
2493- "Naive Bayes" ,
2494- "Univariate" ,
24952515 ]
24962516
24972517 def __init__ (self , json_data = None ):
0 commit comments