|
10 | 10 |
|
11 | 11 | import pytest |
12 | 12 | from azure.core.credentials import TokenCredential |
| 13 | +from azure.core.exceptions import ResourceNotFoundError |
13 | 14 | from azure.search.documents.indexes.models import ( |
14 | 15 | CustomAnalyzer, |
15 | 16 | SearchableField, |
@@ -336,6 +337,128 @@ def test_query_sql_raises_not_implemented(): |
336 | 337 | document_store.query_sql("SELECT * FROM test-index") |
337 | 338 |
|
338 | 339 |
|
| 340 | +@pytest.mark.parametrize( |
| 341 | + "metadata_fields, expected_error_match", |
| 342 | + [ |
| 343 | + ( |
| 344 | + {"Title": SearchField(name="mismatched", type="Edm.String", filterable=True)}, |
| 345 | + "Name of SearchField", |
| 346 | + ), |
| 347 | + ({"Pages": object}, "Unsupported field type"), |
| 348 | + ], |
| 349 | +) |
| 350 | +def test_normalize_metadata_index_fields_raises(metadata_fields, expected_error_match): |
| 351 | + with pytest.raises(ValueError, match=expected_error_match): |
| 352 | + AzureAISearchDocumentStore._normalize_metadata_index_fields(metadata_fields) |
| 353 | + |
| 354 | + |
| 355 | +def test_normalize_metadata_index_fields_skips_non_alpha_keys(caplog): |
| 356 | + with caplog.at_level(logging.WARNING): |
| 357 | + normalized = AzureAISearchDocumentStore._normalize_metadata_index_fields({"1invalid": str, "valid": int}) |
| 358 | + assert "valid" in normalized |
| 359 | + assert "1invalid" not in normalized |
| 360 | + assert "Invalid key" in caplog.text |
| 361 | + |
| 362 | + |
| 363 | +def test_normalize_metadata_index_fields_returns_empty_for_none(): |
| 364 | + assert AzureAISearchDocumentStore._normalize_metadata_index_fields(None) == {} |
| 365 | + |
| 366 | + |
| 367 | +@pytest.mark.parametrize( |
| 368 | + "method, kwargs, expected_match", |
| 369 | + [ |
| 370 | + ("_bm25_retrieval", {"query": None}, "query must not be None"), |
| 371 | + ("_hybrid_retrieval", {"query": None, "query_embedding": [0.1]}, "query must not be None"), |
| 372 | + ("_hybrid_retrieval", {"query": "q", "query_embedding": []}, "query_embedding must be a non-empty"), |
| 373 | + ("_embedding_retrieval", {"query_embedding": []}, "query_embedding must be a non-empty"), |
| 374 | + ], |
| 375 | +) |
| 376 | +def test_internal_retrieval_validates_inputs(method, kwargs, expected_match): |
| 377 | + document_store = AzureAISearchDocumentStore( |
| 378 | + api_key=Secret.from_token("fake-api-key"), |
| 379 | + azure_endpoint=Secret.from_token("fake-endpoint"), |
| 380 | + index_name="test-index", |
| 381 | + ) |
| 382 | + with pytest.raises(ValueError, match=expected_match): |
| 383 | + getattr(document_store, method)(**kwargs) |
| 384 | + |
| 385 | + |
| 386 | +def test_collect_unique_values_combines_lists_and_scalars(): |
| 387 | + docs = [ |
| 388 | + {"tags": ["a", "b"]}, |
| 389 | + {"tags": "c"}, |
| 390 | + {"tags": None}, |
| 391 | + {"tags": ["a", "d"]}, |
| 392 | + ] |
| 393 | + assert AzureAISearchDocumentStore._collect_unique_values(docs, "tags") == {"a", "b", "c", "d"} |
| 394 | + |
| 395 | + |
| 396 | +@pytest.mark.parametrize( |
| 397 | + "docs, expected", |
| 398 | + [ |
| 399 | + ([], {"min": None, "max": None}), |
| 400 | + ([{"x": None}, {"x": [1, 2]}], {"min": None, "max": None}), |
| 401 | + ([{"x": 3}, {"x": 1}, {"x": 2}], {"min": 1, "max": 3}), |
| 402 | + ], |
| 403 | +) |
| 404 | +def test_get_min_max_from_documents(docs, expected): |
| 405 | + assert AzureAISearchDocumentStore._get_min_max_from_documents(docs, "x") == expected |
| 406 | + |
| 407 | + |
| 408 | +@pytest.mark.parametrize( |
| 409 | + "field, expected_type", |
| 410 | + [ |
| 411 | + (SimpleField(name="cat", type=SearchFieldDataType.String, filterable=True), "keyword"), |
| 412 | + (SearchableField(name="content", type=SearchFieldDataType.String), "text"), |
| 413 | + (SearchableField(name="title", type=SearchFieldDataType.String), "text"), |
| 414 | + (SimpleField(name="year", type=SearchFieldDataType.Int32, filterable=True), "long"), |
| 415 | + (SimpleField(name="rating", type=SearchFieldDataType.Double, filterable=True), "double"), |
| 416 | + ( |
| 417 | + SearchField( |
| 418 | + name="tags", |
| 419 | + type=SearchFieldDataType.Collection(SearchFieldDataType.String), |
| 420 | + filterable=True, |
| 421 | + ), |
| 422 | + "keyword", |
| 423 | + ), |
| 424 | + (SimpleField(name="when", type=SearchFieldDataType.DateTimeOffset, filterable=True), "date"), |
| 425 | + ], |
| 426 | +) |
| 427 | +def test_map_azure_field_type_variants(field, expected_type): |
| 428 | + assert AzureAISearchDocumentStore._map_azure_field_type(field) == expected_type |
| 429 | + |
| 430 | + |
| 431 | +def test_map_azure_field_type_without_type_attribute(): |
| 432 | + field = Mock(spec=[]) |
| 433 | + field.name = "custom" |
| 434 | + assert AzureAISearchDocumentStore._map_azure_field_type(field) == "keyword" |
| 435 | + |
| 436 | + |
| 437 | +def test_index_exists_raises_without_index_name(): |
| 438 | + document_store = AzureAISearchDocumentStore( |
| 439 | + api_key=Secret.from_token("fake-api-key"), |
| 440 | + azure_endpoint=Secret.from_token("fake-endpoint"), |
| 441 | + index_name="test-index", |
| 442 | + ) |
| 443 | + document_store._index_client = Mock() |
| 444 | + with pytest.raises(ValueError, match="Index name is required"): |
| 445 | + document_store._index_exists(None) |
| 446 | + |
| 447 | + |
| 448 | +def test_get_raw_documents_by_id_skips_not_found(caplog): |
| 449 | + store, search_client, _ = _build_mock_document_store_with_schema( |
| 450 | + [SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True)] |
| 451 | + ) |
| 452 | + search_client.get_document.side_effect = [ |
| 453 | + {"id": "1", "content": "c1"}, |
| 454 | + ResourceNotFoundError("not found"), |
| 455 | + ] |
| 456 | + with caplog.at_level(logging.WARNING): |
| 457 | + result = store._get_raw_documents_by_id(["1", "missing"]) |
| 458 | + assert result == [{"id": "1", "content": "c1"}] |
| 459 | + assert "missing" in caplog.text |
| 460 | + |
| 461 | + |
339 | 462 | def _assert_documents_are_equal(received: list[Document], expected: list[Document]): |
340 | 463 | """ |
341 | 464 | Assert that two lists of Documents are equal. |
|
0 commit comments