@@ -31,20 +31,15 @@ def test_semantic_double_merging_logic(mock_embed_model, mocker):
3131 min_chunk_size = 100
3232 )
3333
34- # Mocking the base class behavior (Pass 1)
35- # We simulate that the base splitter returned 3 nodes: two small ones and one normal
36- node1 = MagicMock (spec = Document )
37- node1 .get_content .return_value = "Short text." # 11 chars
38-
39- node2 = MagicMock (spec = Document )
40- node2 .get_content .return_value = "Another short bit." # 18 chars
41-
42- node3 = MagicMock (spec = Document )
43- node3 .get_content .return_value = "This is a much longer text that should exceed the minimum chunk size threshold for merging logic." # ~100 chars
34+ # Use real Document objects to ensure content updates (set_content) work as expected
35+ node1 = Document (text = "A" * 60 ) # 60 chars
36+ node2 = Document (text = "B" * 50 ) # 50 chars. node1 + node2 = 110 (> 100)
37+ node3 = Document (text = "C" * 110 ) # 110 chars. Already big enough.
4438
45- # Inject mock into super().get_nodes_from_documents via mocker
39+ # Inject mock into the base class method.
40+ # We patch it in the splitter module where it's imported.
4641 mocker .patch (
47- 'llama_index.core.node_parser .SemanticSplitterNodeParser.get_nodes_from_documents' ,
42+ 'src.processing.splitter .SemanticSplitterNodeParser.get_nodes_from_documents' ,
4843 return_value = [node1 , node2 , node3 ]
4944 )
5045
@@ -53,11 +48,13 @@ def test_semantic_double_merging_logic(mock_embed_model, mocker):
5348 nodes = list (splitter .get_nodes_generator ([doc ]))
5449
5550 # Validation
56- # node1 and node2 should have been merged because len(node1) < 100
57- # Final result should have 2 nodes (Merge of 1+2 and the isolated 3)
51+ # node1 and node2 should have been merged because len(node1) < 100.
52+ # After merging node2, node1's length becomes 111 (60 + 1 (newline) + 50).
53+ # Since 111 > 100, the next iteration (node3) will yield the merged node1 and start fresh.
54+ # Final result should have 2 nodes: (node1+node2) and (node3)
5855 assert len (nodes ) == 2
59- assert "Short text." in nodes [0 ].get_content ()
60- assert "Another short bit." in nodes [0 ].get_content ()
56+ assert "A" * 60 in nodes [0 ].get_content ()
57+ assert "B" * 50 in nodes [0 ].get_content ()
6158 assert nodes [1 ].get_content () == node3 .get_content ()
6259
6360def test_generator_memory_efficiency (mock_embed_model ):
0 commit comments