Skip to content

Commit da886d5

Browse files
authored
refactor: drop unused shared.embedder global (#149)
The module-level `shared.embedder` global was written by `create_embedder()` but never read from production code — the embedder flows through the `EMBEDDER` ContextKey via `context.provide` / `use_context`. Drop the global along with the two test readers that kept it alive: - `tests/test_daemon.py` — the `daemon_sock` fixture pre-loaded an embedder and monkeypatched `dm.create_embedder` to reuse it. That was only useful as a cross-module cache via the now-dead global; with a session-scoped fixture, `run_daemon()` loads the embedder once from the saved settings regardless. - `tests/test_chunker_registry.py` — removed a stale `monkeypatch.setattr(_shared, "embedder", stub)` whose comment claimed CodeChunk.embedding read the global at schema resolution time. It reads the EMBEDDER ContextKey instead; the stub is already wired through `Project.create(..., stub, ...)`.
1 parent a264038 commit da886d5

3 files changed

Lines changed: 13 additions & 44 deletions

File tree

src/cocoindex_code/shared.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@
3535
SQLITE_DB = coco.ContextKey[sqlite.ManagedConnection]("index_db")
3636
CODEBASE_DIR = coco.ContextKey[pathlib.Path]("codebase")
3737

38-
# Module-level variable — set by daemon at startup (needed for CodeChunk annotation).
39-
embedder: Embedder | None = None
40-
41-
# Query prompt name — set alongside embedder by create_embedder().
38+
# Query prompt name — set by create_embedder().
4239
query_prompt_name: str | None = None
4340

4441

@@ -80,9 +77,9 @@ async def check_embedding(embedder: Embedder) -> EmbeddingCheckResult:
8077
def create_embedder(settings: EmbeddingSettings) -> Embedder:
8178
"""Create and return an embedder instance based on settings.
8279
83-
Also sets the module-level ``embedder`` and ``query_prompt_name`` variables.
80+
Also sets the module-level ``query_prompt_name`` variable.
8481
"""
85-
global embedder, query_prompt_name
82+
global query_prompt_name
8683

8784
if settings.provider == "sentence-transformers":
8885
from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder
@@ -118,7 +115,6 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
118115
min_interval_ms,
119116
)
120117

121-
embedder = instance
122118
return instance
123119

124120

tests/test_chunker_registry.py

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@
1212
from typing import Any
1313

1414
import numpy as np
15-
import pytest
1615
from cocoindex.connectors import sqlite as coco_sqlite
1716
from cocoindex.resources.schema import VectorSchema
1817
from example_toml_chunker import toml_chunker
1918

20-
import cocoindex_code.shared as _shared
2119
from cocoindex_code.chunking import CHUNKER_REGISTRY, Chunk, TextPosition
2220
from cocoindex_code.project import Project
2321
from cocoindex_code.settings import ProjectSettings
@@ -44,14 +42,11 @@ async def embed(self, text: str) -> np.ndarray:
4442

4543
async def _index_project(
4644
project_root: Path,
47-
monkeypatch: pytest.MonkeyPatch,
4845
**create_kwargs: Any,
4946
) -> Project:
5047
"""Create a Project and run a full index pass."""
5148
settings = ProjectSettings(include_patterns=["**/*.*"], exclude_patterns=["**/.cocoindex_code"])
5249
stub = _StubEmbedder()
53-
# shared.embedder is read by CodeChunk.embedding at schema resolution time.
54-
monkeypatch.setattr(_shared, "embedder", stub)
5550
from cocoindex_code.settings import save_project_settings
5651

5752
save_project_settings(project_root, settings)
@@ -104,40 +99,36 @@ def _pos(line: int) -> TextPosition:
10499
# ---------------------------------------------------------------------------
105100

106101

107-
async def test_default_registry_is_empty(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
102+
async def test_default_registry_is_empty(tmp_path: Path) -> None:
108103
"""CHUNKER_REGISTRY is an empty dict when no registry is passed."""
109104
(tmp_path / ".git").mkdir()
110105
(tmp_path / "hello.py").write_text("x = 1\n")
111106

112-
project = await _index_project(tmp_path, monkeypatch)
107+
project = await _index_project(tmp_path)
113108
registry = project.env.get_context(CHUNKER_REGISTRY)
114109
assert isinstance(registry, dict)
115110
assert registry == {}
116111

117112

118-
async def test_unregistered_suffix_uses_splitter(
119-
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
120-
) -> None:
113+
async def test_unregistered_suffix_uses_splitter(tmp_path: Path) -> None:
121114
"""Files with no registered chunker are processed by RecursiveSplitter."""
122115
(tmp_path / ".git").mkdir()
123116
(tmp_path / "sample.py").write_text("def foo():\n return 1\n")
124117

125-
await _index_project(tmp_path, monkeypatch)
118+
await _index_project(tmp_path)
126119
chunks = _query_chunks(tmp_path)
127120

128121
assert len(chunks) >= 1
129122
assert all(c["language"] == "python" for c in chunks)
130123
assert any("foo" in c["content"] for c in chunks)
131124

132125

133-
async def test_registered_chunker_is_called(
134-
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
135-
) -> None:
126+
async def test_registered_chunker_is_called(tmp_path: Path) -> None:
136127
"""A registered ChunkerFn splits files and may override the language."""
137128
(tmp_path / ".git").mkdir()
138129
(tmp_path / "config.toml").write_text(_TOML_CONTENT)
139130

140-
await _index_project(tmp_path, monkeypatch, chunker_registry={".toml": toml_chunker})
131+
await _index_project(tmp_path, chunker_registry={".toml": toml_chunker})
141132
chunks = _query_chunks(tmp_path)
142133

143134
assert len(chunks) == 2
@@ -147,9 +138,7 @@ async def test_registered_chunker_is_called(
147138
assert all(c["language"] == "toml" for c in chunks)
148139

149140

150-
async def test_chunker_language_none_preserves_detected(
151-
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
152-
) -> None:
141+
async def test_chunker_language_none_preserves_detected(tmp_path: Path) -> None:
153142
"""When ChunkerFn returns language=None, detect_code_language() is used."""
154143

155144
def _passthrough_chunker(path: Path, content: str) -> tuple[str | None, list[Chunk]]:
@@ -159,21 +148,19 @@ def _passthrough_chunker(path: Path, content: str) -> tuple[str | None, list[Chu
159148
(tmp_path / ".git").mkdir()
160149
(tmp_path / "script.py").write_text("x = 1\n")
161150

162-
await _index_project(tmp_path, monkeypatch, chunker_registry={".py": _passthrough_chunker})
151+
await _index_project(tmp_path, chunker_registry={".py": _passthrough_chunker})
163152
chunks = _query_chunks(tmp_path)
164153

165154
assert all(c["language"] == "python" for c in chunks)
166155

167156

168-
async def test_registry_does_not_affect_other_suffixes(
169-
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
170-
) -> None:
157+
async def test_registry_does_not_affect_other_suffixes(tmp_path: Path) -> None:
171158
"""Registering a chunker for .toml does not affect .py files."""
172159
(tmp_path / ".git").mkdir()
173160
(tmp_path / "config.toml").write_text(_TOML_CONTENT)
174161
(tmp_path / "code.py").write_text("def bar():\n pass\n")
175162

176-
await _index_project(tmp_path, monkeypatch, chunker_registry={".toml": toml_chunker})
163+
await _index_project(tmp_path, chunker_registry={".toml": toml_chunker})
177164
chunks = _query_chunks(tmp_path)
178165

179166
toml_chunks = [c for c in chunks if c["language"] == "toml"]

tests/test_daemon.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,6 @@ def calculate_fibonacci(n: int) -> int:
5656
def daemon_sock() -> Iterator[str]:
5757
"""Start a daemon once per session and return the socket path."""
5858
import cocoindex_code.daemon as dm
59-
from cocoindex_code.shared import create_embedder
60-
from cocoindex_code.shared import embedder as shared_emb
61-
62-
emb = (
63-
shared_emb
64-
if shared_emb is not None
65-
else create_embedder(make_test_user_settings().embedding)
66-
)
6759

6860
# Use a short path to stay within AF_UNIX limit
6961
user_dir = Path(tempfile.mkdtemp(prefix="ccc_d_"))
@@ -75,10 +67,6 @@ def daemon_sock() -> Iterator[str]:
7567
old_env = os.environ.get("COCOINDEX_CODE_DIR")
7668
os.environ["COCOINDEX_CODE_DIR"] = str(user_dir)
7769

78-
# Patch create_embedder to reuse the already-loaded embedder (performance)
79-
_orig_create_embedder = dm.create_embedder
80-
dm.create_embedder = lambda settings: emb
81-
8270
save_user_settings(make_test_user_settings())
8371

8472
thread = threading.Thread(target=dm.run_daemon, daemon=True)
@@ -108,8 +96,6 @@ def daemon_sock() -> Iterator[str]:
10896
pass
10997
thread.join(timeout=5)
11098

111-
# Restore patches and env var
112-
dm.create_embedder = _orig_create_embedder
11399
if old_env is None:
114100
os.environ.pop("COCOINDEX_CODE_DIR", None)
115101
else:

0 commit comments

Comments
 (0)