Skip to content

Commit 1029da0

Browse files
committed
ix(test): update mock for LlamaIndex compatibility and fix splitter initialization
1 parent af7f67e commit 1029da0

2 files changed

Lines changed: 15 additions & 4 deletions

File tree

src/processing/splitter.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ class SemanticDoubleMergingSplitter(SemanticSplitterNodeParser):
1414
semantically similar chunks to ensure optimal context window usage.
1515
Refactored to use generators for memory efficiency.
1616
"""
17+
min_chunk_size: int = 200
18+
similarity_threshold: float = 0.85
19+
1720
def __init__(
1821
self,
1922
embed_model: BaseEmbedding,

tests/test_splitter.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
11
import pytest
2+
from typing import List
23
from unittest.mock import MagicMock
34
from llama_index.core.schema import Document
5+
from llama_index.core.embeddings import BaseEmbedding
46
from src.processing.splitter import SemanticDoubleMergingSplitter
57

68
@pytest.fixture
79
def mock_embed_model():
810
"""Creates a mock for the embedding model to avoid costs and latency."""
9-
embed_model = MagicMock()
10-
# Mocking necessary methods for SemanticSplitterNodeParser
11-
embed_model.get_text_embedding.side_effect = lambda x: [0.1] * 384
12-
embed_model.get_text_embedding_batch.side_effect = lambda x: [[0.1] * 384 for _ in x]
11+
class MockEmbedding(BaseEmbedding):
12+
def __init__(self, **kwargs):
13+
super().__init__(model_name="mock-model", **kwargs)
14+
def _get_query_embedding(self, query: str): return [0.1] * 384
15+
def _get_text_embedding(self, text: str): return [0.1] * 384
16+
def _get_text_embeddings(self, texts: List[str]): return [[0.1] * 384 for _ in texts]
17+
async def _aget_query_embedding(self, query: str): return [0.1] * 384
18+
async def _aget_text_embedding(self, text: str): return [0.1] * 384
19+
20+
embed_model = MockEmbedding()
1321
return embed_model
1422

1523
def test_semantic_double_merging_logic(mock_embed_model, mocker):

0 commit comments

Comments
 (0)