1212from haystack .components .preprocessors import DocumentSplitter
1313from haystack .components .retrievers import SentenceWindowRetriever
1414from haystack .testing .document_store import (
15+ CountDocumentsByFilterTest ,
1516 CountDocumentsTest ,
17+ CountUniqueMetadataByFilterTest ,
1618 DeleteAllTest ,
1719 DeleteByFilterTest ,
1820 DeleteDocumentsTest ,
1921 FilterableDocsFixtureMixin ,
22+ GetMetadataFieldMinMaxTest ,
23+ GetMetadataFieldsInfoTest ,
24+ GetMetadataFieldUniqueValuesTest ,
2025 UpdateByFilterTest ,
2126 WriteDocumentsTest ,
2227)
@@ -276,6 +281,11 @@ class TestDocumentStore(
276281 UpdateByFilterTest ,
277282 DeleteAllTest ,
278283 DeleteByFilterTest ,
284+ CountDocumentsByFilterTest ,
285+ CountUniqueMetadataByFilterTest ,
286+ GetMetadataFieldsInfoTest ,
287+ GetMetadataFieldMinMaxTest ,
288+ GetMetadataFieldUniqueValuesTest ,
279289):
280290 def test_write_documents (self , document_store : PineconeDocumentStore ):
281291 docs = [Document (id = "1" )]
@@ -353,108 +363,6 @@ def test_sentence_window_retriever(self, document_store: PineconeDocumentStore):
353363
354364 assert len (result ["context_windows" ]) == 1
355365
356- def test_count_documents_by_filter (self , document_store : PineconeDocumentStore ):
357- docs = [
358- Document (content = "Doc 1" , meta = {"category" : "A" , "status" : "draft" }),
359- Document (content = "Doc 2" , meta = {"category" : "B" , "status" : "published" }),
360- Document (content = "Doc 3" , meta = {"category" : "A" , "status" : "published" }),
361- Document (content = "Doc 4" , meta = {"category" : "A" , "status" : "draft" }),
362- ]
363- document_store .write_documents (docs )
364-
365- # Count documents with category="A"
366- count = document_store .count_documents_by_filter (
367- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "A" }
368- )
369- assert count == 3
370-
371- # Count documents with status="published"
372- count = document_store .count_documents_by_filter (
373- filters = {"field" : "meta.status" , "operator" : "==" , "value" : "published" }
374- )
375- assert count == 2
376-
377- # Count with complex filter
378- count = document_store .count_documents_by_filter (
379- filters = {
380- "operator" : "AND" ,
381- "conditions" : [
382- {"field" : "meta.category" , "operator" : "==" , "value" : "A" },
383- {"field" : "meta.status" , "operator" : "==" , "value" : "draft" },
384- ],
385- }
386- )
387- assert count == 2
388-
389- def test_count_unique_metadata_by_filter (self , document_store : PineconeDocumentStore ):
390- docs = [
391- Document (content = "Doc 1" , meta = {"category" : "A" , "author" : "Alice" , "priority" : 1 }),
392- Document (content = "Doc 2" , meta = {"category" : "B" , "author" : "Bob" , "priority" : 2 }),
393- Document (content = "Doc 3" , meta = {"category" : "A" , "author" : "Alice" , "priority" : 1 }),
394- Document (content = "Doc 4" , meta = {"category" : "C" , "author" : "Charlie" , "priority" : 3 }),
395- Document (content = "Doc 5" , meta = {"category" : "A" , "author" : "Bob" , "priority" : 2 }),
396- ]
397- document_store .write_documents (docs )
398-
399- # Count unique values without filter
400- counts = document_store .count_unique_metadata_by_filter (
401- filters = {}, metadata_fields = ["category" , "author" , "priority" ]
402- )
403- assert counts ["category" ] == 3 # A, B, C
404- assert counts ["author" ] == 3 # Alice, Bob, Charlie
405- assert counts ["priority" ] == 3 # 1, 2, 3
406-
407- # Count unique values with filter
408- counts = document_store .count_unique_metadata_by_filter (
409- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "A" },
410- metadata_fields = ["author" , "priority" ],
411- )
412- assert counts ["author" ] == 2 # Alice, Bob
413- assert counts ["priority" ] == 2 # 1, 2
414-
415- def test_get_metadata_fields_info (self , document_store : PineconeDocumentStore ):
416- docs = [
417- Document (
418- content = "Doc 1" ,
419- meta = {
420- "category" : "A" ,
421- "author" : "Alice" ,
422- "priority" : 1 ,
423- "is_published" : True ,
424- "tags" : ["tag1" , "tag2" ],
425- },
426- ),
427- Document (content = "Doc 2" , meta = {"category" : "B" , "author" : "Bob" , "priority" : 2 , "is_published" : False }),
428- ]
429- document_store .write_documents (docs )
430-
431- field_info = document_store .get_metadata_fields_info ()
432-
433- # Check content field
434- assert "content" in field_info
435- assert field_info ["content" ]["type" ] == "text"
436-
437- # Check metadata fields
438- assert "category" in field_info
439- assert field_info ["category" ]["type" ] == "keyword"
440-
441- assert "author" in field_info
442- assert field_info ["author" ]["type" ] == "keyword"
443-
444- assert "priority" in field_info
445- assert field_info ["priority" ]["type" ] == "long"
446-
447- assert "is_published" in field_info
448- assert field_info ["is_published" ]["type" ] == "boolean"
449-
450- assert "tags" in field_info
451- assert field_info ["tags" ]["type" ] == "keyword"
452-
453- def test_get_metadata_fields_info_empty (self , document_store : PineconeDocumentStore ):
454- # Test with no documents
455- field_info = document_store .get_metadata_fields_info ()
456- assert field_info == {}
457-
458366 def test_get_metadata_fields_info_consistent_types (self , document_store : PineconeDocumentStore ):
459367 # Test that all documents are checked for type consistency
460368 docs = [
@@ -468,7 +376,7 @@ def test_get_metadata_fields_info_consistent_types(self, document_store: Pinecon
468376 assert "score" in field_info
469377 assert field_info ["score" ]["type" ] == "long"
470378
471- def test_get_metadata_field_min_max (self , document_store : PineconeDocumentStore ):
379+ def test_get_metadata_field_min_max_boolean_and_string (self , document_store : PineconeDocumentStore ):
472380 docs = [
473381 Document (content = "Doc 1" , meta = {"priority" : 1 , "score" : 85.5 , "active" : True , "category" : "Zebra" }),
474382 Document (content = "Doc 2" , meta = {"priority" : 5 , "score" : 92.3 , "active" : False , "category" : "Alpha" }),
@@ -477,16 +385,6 @@ def test_get_metadata_field_min_max(self, document_store: PineconeDocumentStore)
477385 ]
478386 document_store .write_documents (docs )
479387
480- # Get min/max for numeric field (int)
481- min_max = document_store .get_metadata_field_min_max ("priority" )
482- assert min_max ["min" ] == 1
483- assert min_max ["max" ] == 7
484-
485- # Get min/max for numeric field (float)
486- min_max = document_store .get_metadata_field_min_max ("score" )
487- assert min_max ["min" ] == 78.9
488- assert min_max ["max" ] == 95.1
489-
490388 # Get min/max for boolean field
491389 min_max = document_store .get_metadata_field_min_max ("active" )
492390 assert min_max ["min" ] is False
@@ -497,6 +395,11 @@ def test_get_metadata_field_min_max(self, document_store: PineconeDocumentStore)
497395 assert min_max ["min" ] == "Alpha"
498396 assert min_max ["max" ] == "Zebra"
499397
398+ def test_get_metadata_field_min_max_empty_collection (self , document_store : PineconeDocumentStore ):
399+ assert document_store .count_documents () == 0
400+ with pytest .raises (ValueError , match = "No values found" ):
401+ document_store .get_metadata_field_min_max ("priority" )
402+
500403 def test_get_metadata_field_min_max_no_values (self , document_store : PineconeDocumentStore ):
501404 docs = [
502405 Document (content = "Doc 1" , meta = {"tags" : ["tag1" , "tag2" ]}),
0 commit comments