Skip to content

Commit 95f689e

Browse files
authored
Render row tests (#1695)
* Render row tests * fix lint and tests * fix lint
1 parent 3e57b55 commit 95f689e

9 files changed

Lines changed: 268 additions & 6 deletions

File tree

src/evidently/core/datasets.py

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tarfile
88
from abc import abstractmethod
99
from enum import Enum
10+
from typing import TYPE_CHECKING
1011
from typing import Any
1112
from typing import ClassVar
1213
from typing import Dict
@@ -37,6 +38,9 @@
3738

3839
EVIDENTLY_DATASET_EXT = "evidently_dataset"
3940

41+
if TYPE_CHECKING:
42+
from evidently.core.container import MetricOrContainer
43+
4044

4145
class ColumnRole(Enum):
4246
Unset = "Unset"
@@ -175,6 +179,19 @@ class LLMClassification:
175179
name: str = "llm_default"
176180

177181

182+
class SpecialColumnInfo(AutoAliasMixin, EvidentlyBaseModel):
183+
__alias_type__: ClassVar = "special_column_info"
184+
185+
class Config:
186+
is_base_type = True
187+
188+
def get_metrics(self) -> List["MetricOrContainer"]:
189+
return []
190+
191+
def get_column_type(self, column_name: str) -> Optional[ColumnType]:
192+
return None
193+
194+
178195
LLMDefinition = Union[Completion, RAG, LLMClassification]
179196

180197

@@ -200,6 +217,7 @@ class DataDefinition(BaseModel):
200217
categorical_descriptors: List[str] = []
201218
test_descriptors: Optional[List[str]] = None
202219
ranking: Optional[List[Recsys]] = None
220+
special_columns: List[SpecialColumnInfo] = []
203221

204222
def __init__(
205223
self,
@@ -259,6 +277,10 @@ def get_column_type(self, column_name: str) -> ColumnType:
259277
return ColumnType.Date
260278
if column_name == self.id_column:
261279
return ColumnType.Id
280+
for special_column in self.special_columns:
281+
ct = special_column.get_column_type(column_name)
282+
if ct is not None:
283+
return ct
262284
return ColumnType.Unknown
263285

264286
def get_classification(self, classification_id: str) -> Optional[Classification]:
@@ -393,6 +415,12 @@ def list_input_columns(self) -> Optional[List[str]]: # todo: make not optional
393415
def get_sub_descriptors(self) -> List["Descriptor"]:
394416
return [t.to_descriptor(self) for t in self.tests]
395417

418+
def get_special_columns_info(self, rename: Dict[str, str]) -> List[SpecialColumnInfo]:
419+
return []
420+
421+
def add_to_descriptors_list(self) -> bool:
422+
return True
423+
396424

397425
class SingleInputDescriptor(Descriptor, abc.ABC):
398426
column: str
@@ -425,13 +453,55 @@ def generate_data(
425453
return DatasetColumn(ColumnType.Categorical, res)
426454

427455

456+
class TestSummaryInfo(SpecialColumnInfo):
457+
all_column: Optional[str] = None
458+
any_column: Optional[str] = None
459+
count_column: Optional[str] = None
460+
rate_column: Optional[str] = None
461+
score_column: Optional[str] = None
462+
score_weights: Optional[Dict[str, float]] = None
463+
464+
@property
465+
def has_all(self):
466+
return self.any_column is not None
467+
468+
@property
469+
def has_any(self):
470+
return self.any_column is not None
471+
472+
@property
473+
def has_count(self):
474+
return self.count_column is not None
475+
476+
@property
477+
def has_rate(self):
478+
return self.rate_column is not None
479+
480+
@property
481+
def has_score(self):
482+
return self.score_column is not None
483+
484+
def get_metrics(self) -> List["MetricOrContainer"]:
485+
from evidently.presets.special import TestSummaryInfoPreset
486+
487+
return [TestSummaryInfoPreset(column_info=self)]
488+
489+
def get_column_type(self, column_name: str) -> Optional[ColumnType]:
490+
if column_name in (self.all_column, self.any_column):
491+
return ColumnType.Categorical
492+
if column_name in (self.count_column, self.rate_column, self.score_column):
493+
return ColumnType.Numerical
494+
return None
495+
496+
428497
class TestSummary(Descriptor):
429498
success_all: bool = True
430499
success_any: bool = False
431500
success_count: bool = False
432501
success_rate: bool = False
433502
score: bool = False
434503
score_weights: Optional[Dict[str, float]] = None
504+
normalize_scores: bool = True
435505

436506
def __init__(
437507
self,
@@ -442,6 +512,7 @@ def __init__(
442512
score: bool = False,
443513
score_weights: Optional[Dict[str, float]] = None,
444514
alias: Optional[str] = None,
515+
normalize_scores: bool = True,
445516
**data: Any,
446517
):
447518
self.success_all = success_all
@@ -450,6 +521,7 @@ def __init__(
450521
self.success_rate = success_rate
451522
self.score = score
452523
self.score_weights = score_weights
524+
self.normalize_scores = normalize_scores
453525
super().__init__(alias=alias or "summary", **data)
454526

455527
def generate_data(
@@ -470,7 +542,7 @@ def generate_data(
470542
summary_columns["success_any"] = (ColumnType.Categorical, test_results.any(axis=1))
471543
if self.score:
472544
weights = self.score_weights or {t: 1 for t in tests}
473-
total_weight = sum(weights.values())
545+
total_weight = sum(weights.values()) if self.normalize_scores else 1
474546
summary_columns["score"] = ( # type: ignore[assignment]
475547
ColumnType.Numerical,
476548
sum(test_results[col] * weight / total_weight for col, weight in weights.items()),
@@ -488,6 +560,33 @@ def list_input_columns(self) -> Optional[List[str]]:
488560
return list(self.score_weights.keys())
489561
return None
490562

563+
def get_special_columns_info(self, rename: Dict[str, str]) -> List[SpecialColumnInfo]:
564+
alias = self.alias or "summary"
565+
if len(rename) == 1:
566+
return [
567+
TestSummaryInfo(
568+
all_column=rename[alias] if self.success_all else None,
569+
any_column=rename[alias] if self.success_any else None,
570+
count_column=rename[alias] if self.success_count else None,
571+
rate_column=rename[alias] if self.success_rate else None,
572+
score_column=rename[alias] if self.score else None,
573+
)
574+
]
575+
576+
return [
577+
TestSummaryInfo(
578+
all_column=rename[f"{alias}_success_all"] if self.success_all else None,
579+
any_column=rename[f"{alias}_success_any"] if self.success_any else None,
580+
count_column=rename[f"{alias}_success_count"] if self.success_count else None,
581+
rate_column=rename[f"{alias}_success_rate"] if self.success_rate else None,
582+
score_column=rename[f"{alias}_score"] if self.score else None,
583+
score_weights=self.score_weights,
584+
)
585+
]
586+
587+
def add_to_descriptors_list(self) -> bool:
588+
return False
589+
491590

492591
class FeatureDescriptor(Descriptor):
493592
feature: GeneratedFeatures
@@ -918,27 +1017,30 @@ def _generate_data_definition(
9181017
def stats(self) -> DatasetStats:
9191018
return self._dataset_stats
9201019

921-
def add_column(self, key: str, data: DatasetColumn):
1020+
def add_column(self, key: str, data: DatasetColumn, add_to_descriptor_list: bool = True):
9221021
self._dataset_stats.column_count += 1
9231022
self._dataset_stats.column_stats[key] = self._collect_stats(data.type, data.data)
9241023
self._data[key] = data.data
925-
if data.type == ColumnType.Numerical:
1024+
if add_to_descriptor_list and data.type == ColumnType.Numerical:
9261025
self._data_definition.numerical_descriptors.append(key)
927-
if data.type == ColumnType.Categorical:
1026+
if add_to_descriptor_list and data.type == ColumnType.Categorical:
9281027
self._data_definition.categorical_descriptors.append(key)
9291028

9301029
def add_descriptor(self, descriptor: Descriptor, options: AnyOptions = None):
9311030
descriptor.validate_input(self._data_definition)
9321031
new_columns = descriptor.generate_data(self, Options.from_any_options(options))
9331032
if isinstance(new_columns, DatasetColumn):
9341033
new_columns = {descriptor.alias: new_columns}
1034+
rename = {}
9351035
for col, value in new_columns.items():
9361036
name = _determine_descriptor_column_name(col, self._data.columns.tolist())
937-
self.add_column(name, value)
1037+
rename[col] = name
1038+
self.add_column(name, value, descriptor.add_to_descriptors_list())
9381039
if isinstance(descriptor, ColumnTest):
9391040
if self._data_definition.test_descriptors is None:
9401041
self._data_definition.test_descriptors = []
9411042
self._data_definition.test_descriptors.append(name)
1043+
self.data_definition.special_columns.extend(descriptor.get_special_columns_info(rename))
9421044
for sub in descriptor.get_sub_descriptors():
9431045
self.add_descriptor(sub, options)
9441046

src/evidently/core/registries/descriptors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# ruff: noqa: E501
22
# fmt: off
33
from evidently.core.datasets import Descriptor
4+
from evidently.core.datasets import SpecialColumnInfo
45
from evidently.pydantic_utils import register_type_alias
56

67
register_type_alias(Descriptor, "evidently.core.datasets.FeatureDescriptor", "evidently:descriptor_v2:FeatureDescriptor")
@@ -14,3 +15,5 @@
1415
register_type_alias(Descriptor, "evidently.core.datasets.TestSummary", "evidently:descriptor_v2:TestSummary")
1516

1617
register_type_alias(Descriptor, "evidently.descriptors.llm_judges.GenericLLMDescriptor", "evidently:descriptor_v2:GenericLLMDescriptor")
18+
19+
register_type_alias(SpecialColumnInfo, "evidently.core.datasets.TestSummaryInfo", "evidently:special_column_info:TestSummaryInfo")

src/evidently/core/registries/presets.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,5 @@
1919
register_type_alias(MetricContainer, "evidently.presets.regression.RegressionPreset", "evidently:metric_container:RegressionPreset")
2020
register_type_alias(MetricContainer, "evidently.presets.regression.RegressionQuality", "evidently:metric_container:RegressionQuality")
2121
register_type_alias(MetricContainer, "evidently.metrics.row_test_summary.RowTestSummary", "evidently:metric_container:RowTestSummary")
22+
23+
register_type_alias(MetricContainer, "evidently.presets.special.TestSummaryInfoPreset", "evidently:metric_container:TestSummaryInfoPreset")

src/evidently/legacy/renderers/html_widgets.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828

2929
class WidgetSize(int, Enum):
30+
SMALL = 0
3031
HALF = 1
3132
FULL = 2
3233

src/evidently/presets/dataset_stats.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,8 @@ def generate_metrics(self, context: Context) -> Sequence[MetricOrContainer]:
476476
metrics: List[MetricOrContainer] = [RowTestSummary(), RowCount(tests=self._get_tests(self.row_count_tests))]
477477
value_stats = self.get_value_stats(context)
478478
metrics.extend(list(chain(*[vs.metrics(context)[1:] for vs in value_stats])))
479+
for column_info in context.data_definition.special_columns:
480+
metrics.extend(column_info.get_metrics())
479481
return metrics
480482

481483
def render(
@@ -484,7 +486,14 @@ def render(
484486
child_widgets: Optional[List[Tuple[Optional[MetricId], List[BaseWidgetInfo]]]] = None,
485487
) -> List[BaseWidgetInfo]:
486488
value_stats = self.get_value_stats(context)
487-
return list(chain(*([RowTestSummary().render(context)] + [vs.render(context) for vs in value_stats])))
489+
result = list(chain(*([RowTestSummary().render(context)] + [vs.render(context) for vs in value_stats])))
490+
for column_info in context.data_definition.special_columns:
491+
for metric in column_info.get_metrics():
492+
if isinstance(metric, MetricContainer):
493+
result.extend(metric.render(context))
494+
else:
495+
result.extend(context.get_metric_result(metric).widget or [])
496+
return result
488497

489498

490499
class DataSummaryPreset(MetricContainer):

0 commit comments

Comments
 (0)