|
19 | 19 | from haystack.dataclasses.document import Document |
20 | 20 | from haystack.errors import FilterError |
21 | 21 | from haystack.testing.document_store import ( |
| 22 | + CountDocumentsByFilterTest, |
22 | 23 | CountDocumentsTest, |
| 24 | + CountUniqueMetadataByFilterTest, |
23 | 25 | DeleteAllTest, |
24 | 26 | DeleteByFilterTest, |
25 | 27 | DeleteDocumentsTest, |
26 | 28 | FilterableDocsFixtureMixin, |
27 | 29 | FilterDocumentsTest, |
| 30 | + GetMetadataFieldMinMaxTest, |
| 31 | + GetMetadataFieldsInfoTest, |
| 32 | + GetMetadataFieldUniqueValuesTest, |
28 | 33 | UpdateByFilterTest, |
29 | 34 | WriteDocumentsTest, |
30 | 35 | ) |
@@ -256,80 +261,6 @@ def _build_mock_document_store_with_schema(index_fields): |
256 | 261 | return store, search_client, index_client |
257 | 262 |
|
258 | 263 |
|
259 | | -def test_count_documents_by_filter(): |
260 | | - index_fields = [ |
261 | | - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), |
262 | | - SearchableField(name="content", type=SearchFieldDataType.String), |
263 | | - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), |
264 | | - ] |
265 | | - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) |
266 | | - count_result = Mock() |
267 | | - count_result.get_count.return_value = 3 |
268 | | - search_client.search.return_value = count_result |
269 | | - |
270 | | - count = document_store.count_documents_by_filter({"field": "meta.category", "operator": "==", "value": "news"}) |
271 | | - |
272 | | - assert count == 3 |
273 | | - search_client.search.assert_called_once() |
274 | | - assert search_client.search.call_args.kwargs["include_total_count"] is True |
275 | | - |
276 | | - |
277 | | -def test_count_unique_metadata_by_filter(): |
278 | | - index_fields = [ |
279 | | - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), |
280 | | - SearchableField(name="content", type=SearchFieldDataType.String), |
281 | | - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), |
282 | | - SimpleField(name="status", type=SearchFieldDataType.String, filterable=True), |
283 | | - ] |
284 | | - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) |
285 | | - search_client.search.return_value = [ |
286 | | - {"category": "news", "status": "draft"}, |
287 | | - {"category": "docs", "status": "draft"}, |
288 | | - {"category": "news", "status": "published"}, |
289 | | - ] |
290 | | - |
291 | | - counts = document_store.count_unique_metadata_by_filter( |
292 | | - filters={"field": "meta.status", "operator": "!=", "value": "archived"}, |
293 | | - metadata_fields=["meta.category", "status"], |
294 | | - ) |
295 | | - |
296 | | - assert counts == {"category": 2, "status": 2} |
297 | | - |
298 | | - |
299 | | -def test_get_metadata_fields_info(): |
300 | | - index_fields = [ |
301 | | - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), |
302 | | - SearchableField(name="content", type=SearchFieldDataType.String), |
303 | | - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), |
304 | | - SimpleField(name="status", type=SearchFieldDataType.String, filterable=True), |
305 | | - SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True), |
306 | | - ] |
307 | | - document_store, _, _ = _build_mock_document_store_with_schema(index_fields) |
308 | | - |
309 | | - info = document_store.get_metadata_fields_info() |
310 | | - |
311 | | - assert info == { |
312 | | - "content": {"type": "text"}, |
313 | | - "category": {"type": "keyword"}, |
314 | | - "status": {"type": "keyword"}, |
315 | | - "priority": {"type": "long"}, |
316 | | - } |
317 | | - |
318 | | - |
319 | | -def test_get_metadata_field_min_max(): |
320 | | - index_fields = [ |
321 | | - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), |
322 | | - SearchableField(name="content", type=SearchFieldDataType.String), |
323 | | - SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True), |
324 | | - ] |
325 | | - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) |
326 | | - search_client.search.return_value = [{"priority": 10}, {"priority": 2}, {"priority": 7}] |
327 | | - |
328 | | - result = document_store.get_metadata_field_min_max("meta.priority") |
329 | | - |
330 | | - assert result == {"min": 2, "max": 10} |
331 | | - |
332 | | - |
333 | 264 | def test_get_metadata_field_unique_values(): |
334 | 265 | index_fields = [ |
335 | 266 | SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), |
@@ -397,6 +328,11 @@ class TestDocumentStore( |
397 | 328 | FilterableDocsFixtureMixin, |
398 | 329 | WriteDocumentsTest, |
399 | 330 | UpdateByFilterTest, |
| 331 | + CountDocumentsByFilterTest, |
| 332 | + CountUniqueMetadataByFilterTest, |
| 333 | + GetMetadataFieldsInfoTest, |
| 334 | + GetMetadataFieldMinMaxTest, |
| 335 | + GetMetadataFieldUniqueValuesTest, |
400 | 336 | ): |
401 | 337 | def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): |
402 | 338 | _assert_documents_are_equal(received, expected) |
@@ -526,6 +462,135 @@ def test_update_by_filter_invalid_field(self, document_store: AzureAISearchDocum |
526 | 462 | assert "nonexistent_field" in str(exc_info.value) |
527 | 463 | assert "not defined in index schema" in str(exc_info.value) |
528 | 464 |
|
| 465 | + @pytest.mark.parametrize( |
| 466 | + "document_store", |
| 467 | + [{"metadata_fields": {"category": str, "status": str}}], |
| 468 | + indirect=True, |
| 469 | + ) |
| 470 | + def test_count_documents_by_filter_simple(self, document_store: AzureAISearchDocumentStore): |
| 471 | + """Override to use a document_store with required metadata fields.""" |
| 472 | + CountDocumentsByFilterTest.test_count_documents_by_filter_simple(document_store) |
| 473 | + |
| 474 | + @pytest.mark.parametrize( |
| 475 | + "document_store", |
| 476 | + [{"metadata_fields": {"category": str, "status": str}}], |
| 477 | + indirect=True, |
| 478 | + ) |
| 479 | + def test_count_documents_by_filter_compound(self, document_store: AzureAISearchDocumentStore): |
| 480 | + """Override to use a document_store with required metadata fields.""" |
| 481 | + CountDocumentsByFilterTest.test_count_documents_by_filter_compound(document_store) |
| 482 | + |
| 483 | + @pytest.mark.parametrize( |
| 484 | + "document_store", |
| 485 | + [{"metadata_fields": {"category": str}}], |
| 486 | + indirect=True, |
| 487 | + ) |
| 488 | + def test_count_documents_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore): |
| 489 | + """Override to use a document_store with required metadata fields.""" |
| 490 | + CountDocumentsByFilterTest.test_count_documents_by_filter_no_matches(document_store) |
| 491 | + |
| 492 | + @pytest.mark.parametrize( |
| 493 | + "document_store", |
| 494 | + [{"metadata_fields": {"category": str}}], |
| 495 | + indirect=True, |
| 496 | + ) |
| 497 | + def test_count_documents_by_filter_empty_collection(self, document_store: AzureAISearchDocumentStore): |
| 498 | + """Override to use a document_store with required metadata fields.""" |
| 499 | + CountDocumentsByFilterTest.test_count_documents_by_filter_empty_collection(document_store) |
| 500 | + |
| 501 | + @pytest.mark.parametrize( |
| 502 | + "document_store", |
| 503 | + [{"metadata_fields": {"category": str, "status": str, "priority": int}}], |
| 504 | + indirect=True, |
| 505 | + ) |
| 506 | + def test_count_unique_metadata_by_filter_all_documents(self, document_store: AzureAISearchDocumentStore): |
| 507 | + """Override to use a document_store with required metadata fields.""" |
| 508 | + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_all_documents(document_store) |
| 509 | + |
| 510 | + @pytest.mark.parametrize( |
| 511 | + "document_store", |
| 512 | + [{"metadata_fields": {"category": str, "status": str, "priority": int}}], |
| 513 | + indirect=True, |
| 514 | + ) |
| 515 | + def test_count_unique_metadata_by_filter_with_filter(self, document_store: AzureAISearchDocumentStore): |
| 516 | + """Override to use a document_store with required metadata fields.""" |
| 517 | + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_filter(document_store) |
| 518 | + |
| 519 | + @pytest.mark.parametrize( |
| 520 | + "document_store", |
| 521 | + [{"metadata_fields": {"category": str, "year": int}}], |
| 522 | + indirect=True, |
| 523 | + ) |
| 524 | + def test_count_unique_metadata_by_filter_with_multiple_filters(self, document_store: AzureAISearchDocumentStore): |
| 525 | + """Override to use a document_store with required metadata fields.""" |
| 526 | + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_multiple_filters(document_store) |
| 527 | + |
| 528 | + @pytest.mark.parametrize( |
| 529 | + "document_store", |
| 530 | + [{"metadata_fields": {"category": str, "status": str, "priority": int, "rating": float}}], |
| 531 | + indirect=True, |
| 532 | + ) |
| 533 | + def test_get_metadata_fields_info(self, document_store: AzureAISearchDocumentStore): |
| 534 | + """Override to use a document_store with required metadata fields.""" |
| 535 | + GetMetadataFieldsInfoTest.test_get_metadata_fields_info(document_store) |
| 536 | + |
| 537 | + @pytest.mark.skip(reason="Azure AI Search returns index schema fields even on empty collections.") |
| 538 | + def test_get_metadata_fields_info_empty_collection(self, document_store: AzureAISearchDocumentStore): ... |
| 539 | + |
| 540 | + @pytest.mark.parametrize( |
| 541 | + "document_store", |
| 542 | + [{"metadata_fields": {"priority": int}}], |
| 543 | + indirect=True, |
| 544 | + ) |
| 545 | + def test_get_metadata_field_min_max_numeric(self, document_store: AzureAISearchDocumentStore): |
| 546 | + """Override to use a document_store with required metadata fields.""" |
| 547 | + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_numeric(document_store) |
| 548 | + |
| 549 | + @pytest.mark.parametrize( |
| 550 | + "document_store", |
| 551 | + [{"metadata_fields": {"rating": float}}], |
| 552 | + indirect=True, |
| 553 | + ) |
| 554 | + def test_get_metadata_field_min_max_float(self, document_store: AzureAISearchDocumentStore): |
| 555 | + """Override to use a document_store with required metadata fields.""" |
| 556 | + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_float(document_store) |
| 557 | + |
| 558 | + @pytest.mark.parametrize( |
| 559 | + "document_store", |
| 560 | + [{"metadata_fields": {"priority": int}}], |
| 561 | + indirect=True, |
| 562 | + ) |
| 563 | + def test_get_metadata_field_min_max_single_value(self, document_store: AzureAISearchDocumentStore): |
| 564 | + """Override to use a document_store with required metadata fields.""" |
| 565 | + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_single_value(document_store) |
| 566 | + |
| 567 | + @pytest.mark.parametrize( |
| 568 | + "document_store", |
| 569 | + [{"metadata_fields": {"priority": int}}], |
| 570 | + indirect=True, |
| 571 | + ) |
| 572 | + def test_get_metadata_field_min_max_empty_collection(self, document_store: AzureAISearchDocumentStore): |
| 573 | + """Override to use a document_store with required metadata fields.""" |
| 574 | + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_empty_collection(document_store) |
| 575 | + |
| 576 | + @pytest.mark.parametrize( |
| 577 | + "document_store", |
| 578 | + [{"metadata_fields": {"priority": int, "age": int, "rating": float}}], |
| 579 | + indirect=True, |
| 580 | + ) |
| 581 | + def test_get_metadata_field_min_max_meta_prefix(self, document_store: AzureAISearchDocumentStore): |
| 582 | + """Override to use a document_store with required metadata fields.""" |
| 583 | + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_meta_prefix(document_store) |
| 584 | + |
| 585 | + @pytest.mark.parametrize( |
| 586 | + "document_store", |
| 587 | + [{"metadata_fields": {"category": str}}], |
| 588 | + indirect=True, |
| 589 | + ) |
| 590 | + def test_get_metadata_field_unique_values_basic(self, document_store: AzureAISearchDocumentStore): |
| 591 | + """Override to use a document_store with required metadata fields.""" |
| 592 | + GetMetadataFieldUniqueValuesTest.test_get_metadata_field_unique_values_basic(document_store) |
| 593 | + |
529 | 594 | @pytest.mark.parametrize( |
530 | 595 | "document_store", |
531 | 596 | [{"metadata_fields": {"category": str, "status": str, "priority": int}}], |
|
0 commit comments