File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -14,6 +14,9 @@ class SemanticDoubleMergingSplitter(SemanticSplitterNodeParser):
1414 semantically similar chunks to ensure optimal context window usage.
1515 Refactored to use generators for memory efficiency.
1616 """
17+ min_chunk_size : int = 200
18+ similarity_threshold : float = 0.85
19+
1720 def __init__ (
1821 self ,
1922 embed_model : BaseEmbedding ,
Original file line number Diff line number Diff line change 11import pytest
2+ from typing import List
23from unittest .mock import MagicMock
34from llama_index .core .schema import Document
5+ from llama_index .core .embeddings import BaseEmbedding
46from src .processing .splitter import SemanticDoubleMergingSplitter
57
68@pytest .fixture
79def mock_embed_model ():
810 """Creates a mock for the embedding model to avoid costs and latency."""
9- embed_model = MagicMock ()
10- # Mocking necessary methods for SemanticSplitterNodeParser
11- embed_model .get_text_embedding .side_effect = lambda x : [0.1 ] * 384
12- embed_model .get_text_embedding_batch .side_effect = lambda x : [[0.1 ] * 384 for _ in x ]
11+ class MockEmbedding (BaseEmbedding ):
12+ def __init__ (self , ** kwargs ):
13+ super ().__init__ (model_name = "mock-model" , ** kwargs )
14+ def _get_query_embedding (self , query : str ): return [0.1 ] * 384
15+ def _get_text_embedding (self , text : str ): return [0.1 ] * 384
16+ def _get_text_embeddings (self , texts : List [str ]): return [[0.1 ] * 384 for _ in texts ]
17+ async def _aget_query_embedding (self , query : str ): return [0.1 ] * 384
18+ async def _aget_text_embedding (self , text : str ): return [0.1 ] * 384
19+
20+ embed_model = MockEmbedding ()
1321 return embed_model
1422
1523def test_semantic_double_merging_logic (mock_embed_model , mocker ):
You can’t perform that action at this time.
0 commit comments