1212from typing import Any
1313
1414import numpy as np
15- import pytest
1615from cocoindex .connectors import sqlite as coco_sqlite
1716from cocoindex .resources .schema import VectorSchema
1817from example_toml_chunker import toml_chunker
1918
20- import cocoindex_code .shared as _shared
2119from cocoindex_code .chunking import CHUNKER_REGISTRY , Chunk , TextPosition
2220from cocoindex_code .project import Project
2321from cocoindex_code .settings import ProjectSettings
@@ -44,14 +42,11 @@ async def embed(self, text: str) -> np.ndarray:
4442
4543async def _index_project (
4644 project_root : Path ,
47- monkeypatch : pytest .MonkeyPatch ,
4845 ** create_kwargs : Any ,
4946) -> Project :
5047 """Create a Project and run a full index pass."""
5148 settings = ProjectSettings (include_patterns = ["**/*.*" ], exclude_patterns = ["**/.cocoindex_code" ])
5249 stub = _StubEmbedder ()
53- # shared.embedder is read by CodeChunk.embedding at schema resolution time.
54- monkeypatch .setattr (_shared , "embedder" , stub )
5550 from cocoindex_code .settings import save_project_settings
5651
5752 save_project_settings (project_root , settings )
@@ -104,40 +99,36 @@ def _pos(line: int) -> TextPosition:
10499# ---------------------------------------------------------------------------
105100
106101
107- async def test_default_registry_is_empty (tmp_path : Path , monkeypatch : pytest . MonkeyPatch ) -> None :
102+ async def test_default_registry_is_empty (tmp_path : Path ) -> None :
108103 """CHUNKER_REGISTRY is an empty dict when no registry is passed."""
109104 (tmp_path / ".git" ).mkdir ()
110105 (tmp_path / "hello.py" ).write_text ("x = 1\n " )
111106
112- project = await _index_project (tmp_path , monkeypatch )
107+ project = await _index_project (tmp_path )
113108 registry = project .env .get_context (CHUNKER_REGISTRY )
114109 assert isinstance (registry , dict )
115110 assert registry == {}
116111
117112
118- async def test_unregistered_suffix_uses_splitter (
119- tmp_path : Path , monkeypatch : pytest .MonkeyPatch
120- ) -> None :
113+ async def test_unregistered_suffix_uses_splitter (tmp_path : Path ) -> None :
121114 """Files with no registered chunker are processed by RecursiveSplitter."""
122115 (tmp_path / ".git" ).mkdir ()
123116 (tmp_path / "sample.py" ).write_text ("def foo():\n return 1\n " )
124117
125- await _index_project (tmp_path , monkeypatch )
118+ await _index_project (tmp_path )
126119 chunks = _query_chunks (tmp_path )
127120
128121 assert len (chunks ) >= 1
129122 assert all (c ["language" ] == "python" for c in chunks )
130123 assert any ("foo" in c ["content" ] for c in chunks )
131124
132125
133- async def test_registered_chunker_is_called (
134- tmp_path : Path , monkeypatch : pytest .MonkeyPatch
135- ) -> None :
126+ async def test_registered_chunker_is_called (tmp_path : Path ) -> None :
136127 """A registered ChunkerFn splits files and may override the language."""
137128 (tmp_path / ".git" ).mkdir ()
138129 (tmp_path / "config.toml" ).write_text (_TOML_CONTENT )
139130
140- await _index_project (tmp_path , monkeypatch , chunker_registry = {".toml" : toml_chunker })
131+ await _index_project (tmp_path , chunker_registry = {".toml" : toml_chunker })
141132 chunks = _query_chunks (tmp_path )
142133
143134 assert len (chunks ) == 2
@@ -147,9 +138,7 @@ async def test_registered_chunker_is_called(
147138 assert all (c ["language" ] == "toml" for c in chunks )
148139
149140
150- async def test_chunker_language_none_preserves_detected (
151- tmp_path : Path , monkeypatch : pytest .MonkeyPatch
152- ) -> None :
141+ async def test_chunker_language_none_preserves_detected (tmp_path : Path ) -> None :
153142 """When ChunkerFn returns language=None, detect_code_language() is used."""
154143
155144 def _passthrough_chunker (path : Path , content : str ) -> tuple [str | None , list [Chunk ]]:
@@ -159,21 +148,19 @@ def _passthrough_chunker(path: Path, content: str) -> tuple[str | None, list[Chu
159148 (tmp_path / ".git" ).mkdir ()
160149 (tmp_path / "script.py" ).write_text ("x = 1\n " )
161150
162- await _index_project (tmp_path , monkeypatch , chunker_registry = {".py" : _passthrough_chunker })
151+ await _index_project (tmp_path , chunker_registry = {".py" : _passthrough_chunker })
163152 chunks = _query_chunks (tmp_path )
164153
165154 assert all (c ["language" ] == "python" for c in chunks )
166155
167156
168- async def test_registry_does_not_affect_other_suffixes (
169- tmp_path : Path , monkeypatch : pytest .MonkeyPatch
170- ) -> None :
157+ async def test_registry_does_not_affect_other_suffixes (tmp_path : Path ) -> None :
171158 """Registering a chunker for .toml does not affect .py files."""
172159 (tmp_path / ".git" ).mkdir ()
173160 (tmp_path / "config.toml" ).write_text (_TOML_CONTENT )
174161 (tmp_path / "code.py" ).write_text ("def bar():\n pass\n " )
175162
176- await _index_project (tmp_path , monkeypatch , chunker_registry = {".toml" : toml_chunker })
163+ await _index_project (tmp_path , chunker_registry = {".toml" : toml_chunker })
177164 chunks = _query_chunks (tmp_path )
178165
179166 toml_chunks = [c for c in chunks if c ["language" ] == "toml" ]
0 commit comments