|
4 | 4 |
|
5 | 5 | from vectordb_bench import config
|
6 | 6 | from vectordb_bench.backend.clients.api import MetricType
|
7 |
| -from vectordb_bench.backend.filter import Filter, FilterOp, IntFilter, LabelFilter, NonFilter, non_filter |
| 7 | +from vectordb_bench.backend.filter import Filter, FilterOp, IntFilter, LabelFilter, NewIntFilter, NonFilter, non_filter |
8 | 8 | from vectordb_bench.base import BaseModel
|
9 | 9 | from vectordb_bench.frontend.components.custom.getCustomConfig import CustomDatasetConfig
|
10 | 10 |
|
@@ -54,6 +54,8 @@ class CaseType(Enum):
|
54 | 54 |
|
55 | 55 | LabelFilterPerformanceCase = 300
|
56 | 56 |
|
| 57 | + NewIntFilterPerformanceCase = 400 |
| 58 | + |
57 | 59 | def case_cls(self, custom_configs: dict | None = None) -> type["Case"]:
|
58 | 60 | if custom_configs is None:
|
59 | 61 | return type2case.get(self)()
|
@@ -130,6 +132,7 @@ class PerformanceCase(Case):
|
130 | 132 | filter_rate: float | None = None
|
131 | 133 | load_timeout: float | int = config.LOAD_TIMEOUT_DEFAULT
|
132 | 134 | optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
|
| 135 | + int_value: float | None = None |
133 | 136 |
|
134 | 137 |
|
135 | 138 | class CapacityDim960(CapacityCase):
|
@@ -471,6 +474,46 @@ def __init__(
|
471 | 474 | )
|
472 | 475 |
|
473 | 476 |
|
| 477 | +class NewIntFilterPerformanceCase(PerformanceCase): |
| 478 | + case_id: CaseType = CaseType.NewIntFilterPerformanceCase |
| 479 | + dataset_with_size_type: DatasetWithSizeType |
| 480 | + filter_rate: float |
| 481 | + |
| 482 | + def __init__( |
| 483 | + self, |
| 484 | + dataset_with_size_type: DatasetWithSizeType | str, |
| 485 | + filter_rate: float, |
| 486 | + int_value: float | None = 0, |
| 487 | + **kwargs, |
| 488 | + ): |
| 489 | + if not isinstance(dataset_with_size_type, DatasetWithSizeType): |
| 490 | + dataset_with_size_type = DatasetWithSizeType(dataset_with_size_type) |
| 491 | + name = f"Int-Filter-{filter_rate*100:.1f}% - {dataset_with_size_type.value}" |
| 492 | + description = f"Int-Filter-{filter_rate*100:.1f}% Performance Test ({dataset_with_size_type.value})" |
| 493 | + dataset = dataset_with_size_type.get_manager() |
| 494 | + load_timeout = dataset_with_size_type.get_load_timeout() |
| 495 | + optimize_timeout = dataset_with_size_type.get_optimize_timeout() |
| 496 | + filters = IntFilter(filter_rate=filter_rate, int_value=int_value) |
| 497 | + filter_rate = filters.filter_rate |
| 498 | + super().__init__( |
| 499 | + name=name, |
| 500 | + description=description, |
| 501 | + dataset=dataset, |
| 502 | + load_timeout=load_timeout, |
| 503 | + optimize_timeout=optimize_timeout, |
| 504 | + filter_rate=filter_rate, |
| 505 | + int_value=int_value, |
| 506 | + dataset_with_size_type=dataset_with_size_type, |
| 507 | + **kwargs, |
| 508 | + ) |
| 509 | + |
| 510 | + @property |
| 511 | + def filters(self) -> Filter: |
| 512 | + int_field = self.dataset.data.train_id_field |
| 513 | + int_value = int(self.dataset.data.size * self.filter_rate) |
| 514 | + return NewIntFilter(filter_rate=self.filter_rate, int_field=int_field, int_value=int_value) |
| 515 | + |
| 516 | + |
474 | 517 | class LabelFilterPerformanceCase(PerformanceCase):
|
475 | 518 | case_id: CaseType = CaseType.LabelFilterPerformanceCase
|
476 | 519 | dataset_with_size_type: DatasetWithSizeType
|
@@ -529,5 +572,6 @@ def filters(self) -> Filter:
|
529 | 572 | CaseType.Performance1536D50K: Performance1536D50K,
|
530 | 573 | CaseType.PerformanceCustomDataset: PerformanceCustomDataset,
|
531 | 574 | CaseType.StreamingPerformanceCase: StreamingPerformanceCase,
|
| 575 | + CaseType.NewIntFilterPerformanceCase: NewIntFilterPerformanceCase, |
532 | 576 | CaseType.LabelFilterPerformanceCase: LabelFilterPerformanceCase,
|
533 | 577 | }
|
0 commit comments