|
4 | 4 |
|
5 | 5 | from vectordb_bench import config |
6 | 6 | from vectordb_bench.backend.clients.api import MetricType |
7 | | -from vectordb_bench.backend.filter import Filter, FilterOp, IntFilter, LabelFilter, NonFilter, non_filter |
| 7 | +from vectordb_bench.backend.filter import Filter, FilterOp, IntFilter, LabelFilter, NewIntFilter, NonFilter, non_filter |
8 | 8 | from vectordb_bench.base import BaseModel |
9 | 9 | from vectordb_bench.frontend.components.custom.getCustomConfig import CustomDatasetConfig |
10 | 10 |
|
@@ -52,6 +52,8 @@ class CaseType(Enum): |
52 | 52 |
|
53 | 53 | StreamingPerformanceCase = 200 |
54 | 54 |
|
| 55 | + NewIntFilterPerformanceCase = 250 |
| 56 | + |
55 | 57 | LabelFilterPerformanceCase = 300 |
56 | 58 |
|
57 | 59 | def case_cls(self, custom_configs: dict | None = None) -> type["Case"]: |
@@ -130,6 +132,7 @@ class PerformanceCase(Case): |
130 | 132 | filter_rate: float | None = None |
131 | 133 | load_timeout: float | int = config.LOAD_TIMEOUT_DEFAULT |
132 | 134 | optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT |
| 135 | + int_value: float | None = None |
133 | 136 |
|
134 | 137 |
|
135 | 138 | class CapacityDim960(CapacityCase): |
@@ -471,6 +474,46 @@ def __init__( |
471 | 474 | ) |
472 | 475 |
|
473 | 476 |
|
| 477 | +class NewIntFilterPerformanceCase(PerformanceCase): |
| 478 | + case_id: CaseType = CaseType.NewIntFilterPerformanceCase |
| 479 | + dataset_with_size_type: DatasetWithSizeType |
| 480 | + filter_rate: float |
| 481 | + |
| 482 | + def __init__( |
| 483 | + self, |
| 484 | + dataset_with_size_type: DatasetWithSizeType | str, |
| 485 | + filter_rate: float, |
| 486 | + int_value: float | None = 0, |
| 487 | + **kwargs, |
| 488 | + ): |
| 489 | + if not isinstance(dataset_with_size_type, DatasetWithSizeType): |
| 490 | + dataset_with_size_type = DatasetWithSizeType(dataset_with_size_type) |
| 491 | + name = f"Int-Filter-{filter_rate*100:.1f}% - {dataset_with_size_type.value}" |
| 492 | + description = f"Int-Filter-{filter_rate*100:.1f}% Performance Test ({dataset_with_size_type.value})" |
| 493 | + dataset = dataset_with_size_type.get_manager() |
| 494 | + load_timeout = dataset_with_size_type.get_load_timeout() |
| 495 | + optimize_timeout = dataset_with_size_type.get_optimize_timeout() |
| 496 | + filters = IntFilter(filter_rate=filter_rate, int_value=int_value) |
| 497 | + filter_rate = filters.filter_rate |
| 498 | + super().__init__( |
| 499 | + name=name, |
| 500 | + description=description, |
| 501 | + dataset=dataset, |
| 502 | + load_timeout=load_timeout, |
| 503 | + optimize_timeout=optimize_timeout, |
| 504 | + filter_rate=filter_rate, |
| 505 | + int_value=int_value, |
| 506 | + dataset_with_size_type=dataset_with_size_type, |
| 507 | + **kwargs, |
| 508 | + ) |
| 509 | + |
| 510 | + @property |
| 511 | + def filters(self) -> Filter: |
| 512 | + int_field = self.dataset.data.train_id_field |
| 513 | + int_value = int(self.dataset.data.size * self.filter_rate) |
| 514 | + return NewIntFilter(filter_rate=self.filter_rate, int_field=int_field, int_value=int_value) |
| 515 | + |
| 516 | + |
474 | 517 | class LabelFilterPerformanceCase(PerformanceCase): |
475 | 518 | case_id: CaseType = CaseType.LabelFilterPerformanceCase |
476 | 519 | dataset_with_size_type: DatasetWithSizeType |
@@ -529,5 +572,6 @@ def filters(self) -> Filter: |
529 | 572 | CaseType.Performance1536D50K: Performance1536D50K, |
530 | 573 | CaseType.PerformanceCustomDataset: PerformanceCustomDataset, |
531 | 574 | CaseType.StreamingPerformanceCase: StreamingPerformanceCase, |
| 575 | + CaseType.NewIntFilterPerformanceCase: NewIntFilterPerformanceCase, |
532 | 576 | CaseType.LabelFilterPerformanceCase: LabelFilterPerformanceCase, |
533 | 577 | } |
0 commit comments