@@ -464,29 +464,32 @@ def test_comparison_less_than_equal_with_iso_date(self, document_store, filterab
464464 ],
465465 )
466466
467- def test_meta_split_overlap_is_skipped (self , document_store ):
467+ def test_split_overlap_preserved (self , document_store ):
468+ """Split overlap meta is written and read back correctly."""
469+ overlap = [
470+ {"range" : [3.0 , 13.0 ], "doc_id" : "34326b7e6be489cb4c031152fc378cb50479ca5fcc3861e7e61dfb2e4e4e968b" },
471+ {"range" : [0.0 , 13.0 ], "doc_id" : "780f791c09d499c0bf01f87bce047b45c44224d36c79f0c9d8c1405a3197fc1a" },
472+ ]
468473 doc = Document (
469- content = "The moonlight shimmered " ,
474+ id = "6edd24e8b01f3cd6e4b71fef7d57b52f17664e14db5ab01b8ef429f97add3620" ,
475+ content = "an eighth test. " ,
470476 meta = {
471- "source_id" : "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0" ,
472- "page_number" : 1 ,
473- "split_id" : 0 ,
474- "split_idx_start" : 0 ,
475- "_split_overlap" : [
476- {"doc_id" : "68ed48ba830048c5d7815874ed2de794722e6d10866b6c55349a914fd9a0df65" , "range" : (0 , 20 )}
477- ],
477+ "_split_overlap" : overlap ,
478+ "page_number" : 1.0 ,
479+ "split_id" : 33.0 ,
480+ "split_idx_start" : 159.0 ,
481+ "source_id" : "fdbde6d217f04d3dd60c01f36541794f3153a61f13b4ca669655f4c5610c1664" ,
478482 },
479483 )
480484 document_store .write_documents ([doc ])
481-
482485 written_doc = document_store .filter_documents ()[0 ]
483-
484- assert written_doc . content == "The moonlight shimmered "
485- assert written_doc . meta [ "source_id" ] == "62049ba1d1e1d5ebb1f6230b0b00c5356b8706c56e0b9c36b1dfc86084cd75f0"
486- assert written_doc . meta [ "page_number " ] == 1.0
487- assert written_doc . meta [ "split_id" ] == 0.0
488- assert written_doc . meta [ "split_idx_start " ] == 0.0
489- assert "_split_overlap" not in written_doc . meta
486+ assert "_split_overlap" in written_doc . meta
487+ written_overlap = written_doc . meta [ "_split_overlap" ]
488+ assert len ( written_overlap ) == 2
489+ assert written_overlap [ 0 ][ "doc_id " ] == overlap [ 0 ][ "doc_id" ]
490+ assert list ( written_overlap [ 0 ][ "range" ]) == [ 3 , 13 ]
491+ assert written_overlap [ 1 ][ "doc_id " ] == overlap [ 1 ][ "doc_id" ]
492+ assert list ( written_overlap [ 1 ][ "range" ]) == [ 0 , 13 ]
490493
491494 def test_bm25_retrieval (self , document_store ):
492495 document_store .write_documents (
0 commit comments