77import tarfile
88from abc import abstractmethod
99from enum import Enum
10+ from typing import TYPE_CHECKING
1011from typing import Any
1112from typing import ClassVar
1213from typing import Dict
3738
3839EVIDENTLY_DATASET_EXT = "evidently_dataset"
3940
41+ if TYPE_CHECKING :
42+ from evidently .core .container import MetricOrContainer
43+
4044
4145class ColumnRole (Enum ):
4246 Unset = "Unset"
@@ -175,6 +179,19 @@ class LLMClassification:
175179 name : str = "llm_default"
176180
177181
182+ class SpecialColumnInfo (AutoAliasMixin , EvidentlyBaseModel ):
183+ __alias_type__ : ClassVar = "special_column_info"
184+
185+ class Config :
186+ is_base_type = True
187+
188+ def get_metrics (self ) -> List ["MetricOrContainer" ]:
189+ return []
190+
191+ def get_column_type (self , column_name : str ) -> Optional [ColumnType ]:
192+ return None
193+
194+
178195LLMDefinition = Union [Completion , RAG , LLMClassification ]
179196
180197
@@ -200,6 +217,7 @@ class DataDefinition(BaseModel):
200217 categorical_descriptors : List [str ] = []
201218 test_descriptors : Optional [List [str ]] = None
202219 ranking : Optional [List [Recsys ]] = None
220+ special_columns : List [SpecialColumnInfo ] = []
203221
204222 def __init__ (
205223 self ,
@@ -259,6 +277,10 @@ def get_column_type(self, column_name: str) -> ColumnType:
259277 return ColumnType .Date
260278 if column_name == self .id_column :
261279 return ColumnType .Id
280+ for special_column in self .special_columns :
281+ ct = special_column .get_column_type (column_name )
282+ if ct is not None :
283+ return ct
262284 return ColumnType .Unknown
263285
264286 def get_classification (self , classification_id : str ) -> Optional [Classification ]:
@@ -393,6 +415,12 @@ def list_input_columns(self) -> Optional[List[str]]: # todo: make not optional
393415 def get_sub_descriptors (self ) -> List ["Descriptor" ]:
394416 return [t .to_descriptor (self ) for t in self .tests ]
395417
418+ def get_special_columns_info (self , rename : Dict [str , str ]) -> List [SpecialColumnInfo ]:
419+ return []
420+
421+ def add_to_descriptors_list (self ) -> bool :
422+ return True
423+
396424
397425class SingleInputDescriptor (Descriptor , abc .ABC ):
398426 column : str
@@ -425,13 +453,55 @@ def generate_data(
425453 return DatasetColumn (ColumnType .Categorical , res )
426454
427455
456+ class TestSummaryInfo (SpecialColumnInfo ):
457+ all_column : Optional [str ] = None
458+ any_column : Optional [str ] = None
459+ count_column : Optional [str ] = None
460+ rate_column : Optional [str ] = None
461+ score_column : Optional [str ] = None
462+ score_weights : Optional [Dict [str , float ]] = None
463+
464+ @property
465+ def has_all (self ):
466+ return self .any_column is not None
467+
468+ @property
469+ def has_any (self ):
470+ return self .any_column is not None
471+
472+ @property
473+ def has_count (self ):
474+ return self .count_column is not None
475+
476+ @property
477+ def has_rate (self ):
478+ return self .rate_column is not None
479+
480+ @property
481+ def has_score (self ):
482+ return self .score_column is not None
483+
484+ def get_metrics (self ) -> List ["MetricOrContainer" ]:
485+ from evidently .presets .special import TestSummaryInfoPreset
486+
487+ return [TestSummaryInfoPreset (column_info = self )]
488+
489+ def get_column_type (self , column_name : str ) -> Optional [ColumnType ]:
490+ if column_name in (self .all_column , self .any_column ):
491+ return ColumnType .Categorical
492+ if column_name in (self .count_column , self .rate_column , self .score_column ):
493+ return ColumnType .Numerical
494+ return None
495+
496+
428497class TestSummary (Descriptor ):
429498 success_all : bool = True
430499 success_any : bool = False
431500 success_count : bool = False
432501 success_rate : bool = False
433502 score : bool = False
434503 score_weights : Optional [Dict [str , float ]] = None
504+ normalize_scores : bool = True
435505
436506 def __init__ (
437507 self ,
@@ -442,6 +512,7 @@ def __init__(
442512 score : bool = False ,
443513 score_weights : Optional [Dict [str , float ]] = None ,
444514 alias : Optional [str ] = None ,
515+ normalize_scores : bool = True ,
445516 ** data : Any ,
446517 ):
447518 self .success_all = success_all
@@ -450,6 +521,7 @@ def __init__(
450521 self .success_rate = success_rate
451522 self .score = score
452523 self .score_weights = score_weights
524+ self .normalize_scores = normalize_scores
453525 super ().__init__ (alias = alias or "summary" , ** data )
454526
455527 def generate_data (
@@ -470,7 +542,7 @@ def generate_data(
470542 summary_columns ["success_any" ] = (ColumnType .Categorical , test_results .any (axis = 1 ))
471543 if self .score :
472544 weights = self .score_weights or {t : 1 for t in tests }
473- total_weight = sum (weights .values ())
545+ total_weight = sum (weights .values ()) if self . normalize_scores else 1
474546 summary_columns ["score" ] = ( # type: ignore[assignment]
475547 ColumnType .Numerical ,
476548 sum (test_results [col ] * weight / total_weight for col , weight in weights .items ()),
@@ -488,6 +560,33 @@ def list_input_columns(self) -> Optional[List[str]]:
488560 return list (self .score_weights .keys ())
489561 return None
490562
563+ def get_special_columns_info (self , rename : Dict [str , str ]) -> List [SpecialColumnInfo ]:
564+ alias = self .alias or "summary"
565+ if len (rename ) == 1 :
566+ return [
567+ TestSummaryInfo (
568+ all_column = rename [alias ] if self .success_all else None ,
569+ any_column = rename [alias ] if self .success_any else None ,
570+ count_column = rename [alias ] if self .success_count else None ,
571+ rate_column = rename [alias ] if self .success_rate else None ,
572+ score_column = rename [alias ] if self .score else None ,
573+ )
574+ ]
575+
576+ return [
577+ TestSummaryInfo (
578+ all_column = rename [f"{ alias } _success_all" ] if self .success_all else None ,
579+ any_column = rename [f"{ alias } _success_any" ] if self .success_any else None ,
580+ count_column = rename [f"{ alias } _success_count" ] if self .success_count else None ,
581+ rate_column = rename [f"{ alias } _success_rate" ] if self .success_rate else None ,
582+ score_column = rename [f"{ alias } _score" ] if self .score else None ,
583+ score_weights = self .score_weights ,
584+ )
585+ ]
586+
587+ def add_to_descriptors_list (self ) -> bool :
588+ return False
589+
491590
492591class FeatureDescriptor (Descriptor ):
493592 feature : GeneratedFeatures
@@ -918,27 +1017,30 @@ def _generate_data_definition(
9181017 def stats (self ) -> DatasetStats :
9191018 return self ._dataset_stats
9201019
921- def add_column (self , key : str , data : DatasetColumn ):
1020+ def add_column (self , key : str , data : DatasetColumn , add_to_descriptor_list : bool = True ):
9221021 self ._dataset_stats .column_count += 1
9231022 self ._dataset_stats .column_stats [key ] = self ._collect_stats (data .type , data .data )
9241023 self ._data [key ] = data .data
925- if data .type == ColumnType .Numerical :
1024+ if add_to_descriptor_list and data .type == ColumnType .Numerical :
9261025 self ._data_definition .numerical_descriptors .append (key )
927- if data .type == ColumnType .Categorical :
1026+ if add_to_descriptor_list and data .type == ColumnType .Categorical :
9281027 self ._data_definition .categorical_descriptors .append (key )
9291028
9301029 def add_descriptor (self , descriptor : Descriptor , options : AnyOptions = None ):
9311030 descriptor .validate_input (self ._data_definition )
9321031 new_columns = descriptor .generate_data (self , Options .from_any_options (options ))
9331032 if isinstance (new_columns , DatasetColumn ):
9341033 new_columns = {descriptor .alias : new_columns }
1034+ rename = {}
9351035 for col , value in new_columns .items ():
9361036 name = _determine_descriptor_column_name (col , self ._data .columns .tolist ())
937- self .add_column (name , value )
1037+ rename [col ] = name
1038+ self .add_column (name , value , descriptor .add_to_descriptors_list ())
9381039 if isinstance (descriptor , ColumnTest ):
9391040 if self ._data_definition .test_descriptors is None :
9401041 self ._data_definition .test_descriptors = []
9411042 self ._data_definition .test_descriptors .append (name )
1043+ self .data_definition .special_columns .extend (descriptor .get_special_columns_info (rename ))
9421044 for sub in descriptor .get_sub_descriptors ():
9431045 self .add_descriptor (sub , options )
9441046
0 commit comments