Skip to content

Commit e602332

Browse files
authored
refactor: create separate Project to encapsulate coco.Environment (#63)
1 parent f5dccfa commit e602332

7 files changed

Lines changed: 110 additions & 74 deletions

File tree

src/cocoindex_code/indexer.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ async def process(
129129

130130

131131
@coco.fn
132-
async def app_main() -> None:
132+
async def indexer_main() -> None:
133133
"""Main indexing function - walks files and processes each."""
134134
db = coco.use_context(SQLITE_DB)
135135

@@ -159,10 +159,3 @@ async def app_main() -> None:
159159
# Process each file
160160
with coco.component_subpath(coco.Symbol("process_file")):
161161
await coco.mount_each(process_file, files.items(), table)
162-
163-
164-
# Create the app
165-
app = coco.App(
166-
coco.AppConfig(name="CocoIndexCode"),
167-
app_main,
168-
)

src/cocoindex_code/project.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
5+
import cocoindex as coco
6+
from cocoindex.connectors import sqlite
7+
from cocoindex.connectors.localfs import register_base_dir
8+
9+
from .config import config
10+
from .indexer import indexer_main
11+
from .shared import CODEBASE_DIR, SQLITE_DB
12+
13+
14+
class Project:
15+
_env: coco.Environment
16+
_app: coco.App[[], None]
17+
_index_lock: asyncio.Lock
18+
_initial_index_done: bool = False
19+
20+
async def update_index(self, *, report_to_stdout: bool = False) -> None:
21+
"""Update the index, serializing concurrent calls via lock."""
22+
async with self._index_lock:
23+
try:
24+
await self._app.update(report_to_stdout=report_to_stdout)
25+
finally:
26+
self._initial_index_done = True
27+
28+
@property
29+
def env(self) -> coco.Environment:
30+
return self._env
31+
32+
@property
33+
def is_initial_index_done(self) -> bool:
34+
return self._initial_index_done
35+
36+
@staticmethod
37+
async def create() -> Project:
38+
# Ensure index directory exists
39+
config.index_dir.mkdir(parents=True, exist_ok=True)
40+
41+
# Set CocoIndex state database path
42+
settings = coco.Settings.from_env(config.cocoindex_db_path)
43+
44+
context = coco.ContextProvider()
45+
46+
# Provide codebase root directory to environment
47+
context.provide(CODEBASE_DIR, register_base_dir("codebase", config.codebase_root_path))
48+
# Connect to SQLite with vector extension
49+
conn = sqlite.connect(str(config.target_sqlite_db_path), load_vec="auto")
50+
context.provide(SQLITE_DB, sqlite.register_db("index_db", conn))
51+
52+
env = coco.Environment(settings, context_provider=context)
53+
app = coco.App(
54+
coco.AppConfig(
55+
name="CocoIndexCode",
56+
environment=env,
57+
),
58+
indexer_main,
59+
)
60+
61+
result = Project.__new__(Project)
62+
result._env = env
63+
result._app = app
64+
result._index_lock = asyncio.Lock()
65+
return result
66+
67+
68+
_project: Project | None = None
69+
70+
71+
async def default_project() -> Project:
72+
"""Factory function to create the CocoIndexCode project."""
73+
global _project
74+
if _project is None:
75+
_project = await Project.create()
76+
return _project

src/cocoindex_code/query.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
import sqlite3
55
from typing import Any
66

7-
import cocoindex as coco
8-
97
from .config import config
8+
from .project import default_project
109
from .schema import QueryResult
1110
from .shared import SQLITE_DB, embedder, query_prompt_name
1211

@@ -102,8 +101,8 @@ async def query_codebase(
102101
"Please run a query with refresh_index=True first."
103102
)
104103

105-
coco_env = await coco.default_env()
106-
db = coco_env.get_context(SQLITE_DB)
104+
coco_proj = await default_project()
105+
db = coco_proj.env.get_context(SQLITE_DB)
107106

108107
# Generate query embedding.
109108
query_embedding = await embedder.embed(query, query_prompt_name)

src/cocoindex_code/server.py

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@
33
import argparse
44
import asyncio
55

6-
import cocoindex as coco
76
from mcp.server.fastmcp import FastMCP
87
from pydantic import BaseModel, Field
98

109
from .config import config
11-
from .indexer import app as indexer_app
10+
from .project import default_project
1211
from .query import query_codebase
1312
from .shared import SQLITE_DB
1413

@@ -27,17 +26,11 @@
2726
),
2827
)
2928

30-
# Lock to prevent concurrent index updates
31-
_index_lock = asyncio.Lock()
32-
33-
# Event set once the initial background index is ready
34-
_initial_index_done = asyncio.Event()
35-
3629

3730
async def _refresh_index() -> None:
3831
"""Refresh the index. Uses lock to prevent concurrent updates."""
39-
async with _index_lock:
40-
await indexer_app.update(report_to_stdout=False)
32+
proj = await default_project()
33+
await proj.update_index()
4134

4235

4336
# === Pydantic Models for Tool Inputs/Outputs ===
@@ -126,7 +119,8 @@ async def search(
126119
),
127120
) -> SearchResultModel:
128121
"""Query the codebase index."""
129-
if not _initial_index_done.is_set():
122+
proj = await default_project()
123+
if not proj.is_initial_index_done:
130124
return SearchResultModel(
131125
success=False,
132126
message=(
@@ -182,19 +176,14 @@ async def _async_serve() -> None:
182176
"""Async entry point for the MCP server."""
183177

184178
# Refresh index in background so startup isn't blocked
185-
async def _initial_index() -> None:
186-
try:
187-
await _refresh_index()
188-
finally:
189-
_initial_index_done.set()
190-
191-
asyncio.create_task(_initial_index())
179+
asyncio.create_task(_refresh_index())
192180
await mcp.run_stdio_async()
193181

194182

195183
async def _async_index() -> None:
196184
"""Async entry point for the index command."""
197-
await indexer_app.update(report_to_stdout=True)
185+
proj = await default_project()
186+
await proj.update_index(report_to_stdout=True)
198187
await _print_index_stats()
199188

200189

@@ -205,8 +194,8 @@ async def _print_index_stats() -> None:
205194
print("No index database found.")
206195
return
207196

208-
coco_env = await coco.default_env()
209-
db = coco_env.get_context(SQLITE_DB)
197+
proj = await default_project()
198+
db = proj.env.get_context(SQLITE_DB)
210199

211200
with db.value.readonly() as conn:
212201
total_chunks = conn.execute("SELECT COUNT(*) FROM code_chunks_vec").fetchone()[0]

src/cocoindex_code/shared.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
from __future__ import annotations
44

55
import logging
6-
from collections.abc import Iterator
76
from dataclasses import dataclass
87
from typing import TYPE_CHECKING, Annotated
98

109
import cocoindex as coco
1110
from cocoindex.connectors import sqlite
12-
from cocoindex.connectors.localfs import FilePath, register_base_dir
11+
from cocoindex.connectors.localfs import FilePath
1312
from numpy.typing import NDArray
1413

1514
if TYPE_CHECKING:
@@ -54,27 +53,6 @@
5453
CODEBASE_DIR = coco.ContextKey[FilePath]("codebase_dir")
5554

5655

57-
@coco.lifespan
58-
def coco_lifespan(builder: coco.EnvironmentBuilder) -> Iterator[None]:
59-
"""Set up database connection."""
60-
# Ensure index directory exists
61-
config.index_dir.mkdir(parents=True, exist_ok=True)
62-
63-
# Set CocoIndex state database path
64-
builder.settings.db_path = config.cocoindex_db_path
65-
66-
# Provide codebase root directory to environment
67-
builder.provide(CODEBASE_DIR, register_base_dir("codebase", config.codebase_root_path))
68-
69-
# Connect to SQLite with vector extension
70-
conn = sqlite.connect(str(config.target_sqlite_db_path), load_vec="auto")
71-
builder.provide(SQLITE_DB, sqlite.register_db("index_db", conn))
72-
73-
yield
74-
75-
conn.close()
76-
77-
7856
@dataclass
7957
class CodeChunk:
8058
"""Schema for storing code chunks in SQLite."""

tests/conftest.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from collections.abc import AsyncIterator
66
from pathlib import Path
77

8-
import cocoindex as coco
98
import pytest
109
import pytest_asyncio
1110

@@ -24,10 +23,12 @@ def test_codebase_root() -> Path:
2423
@pytest_asyncio.fixture(scope="session", loop_scope="session")
2524
async def coco_runtime() -> AsyncIterator[None]:
2625
"""
27-
Set up CocoIndex runtime context for the entire test session.
26+
Set up CocoIndex project for the entire test session.
2827
2928
Uses session-scoped event loop to ensure CocoIndex environment
3029
persists across all tests.
3130
"""
32-
async with coco.runtime():
33-
yield
31+
from cocoindex_code.project import default_project
32+
33+
await default_project()
34+
yield

tests/test_e2e.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from cocoindex_code.config import _discover_codebase_root
9-
from cocoindex_code.indexer import app
9+
from cocoindex_code.project import default_project
1010
from cocoindex_code.query import query_codebase
1111

1212
pytest_plugins = ("pytest_asyncio",)
@@ -194,7 +194,7 @@ async def test_index_and_query_codebase(
194194
) -> None:
195195
"""Should index a codebase and return relevant query results."""
196196
setup_base_codebase(test_codebase_root)
197-
await app.update(report_to_stdout=False)
197+
await (await default_project()).update_index()
198198

199199
# Verify index was created
200200
index_dir = test_codebase_root / ".cocoindex_code"
@@ -218,7 +218,7 @@ async def test_incremental_update_add_file(
218218
) -> None:
219219
"""Should reflect newly added files after re-indexing."""
220220
setup_base_codebase(test_codebase_root)
221-
await app.update(report_to_stdout=False)
221+
await (await default_project()).update_index()
222222

223223
# Query for ML content - should not find it
224224
results = await query_codebase("machine learning neural network")
@@ -232,7 +232,7 @@ async def test_incremental_update_add_file(
232232
(test_codebase_root / "ml_model.py").write_text(SAMPLE_ML_MODEL_PY)
233233

234234
# Re-index and query again
235-
await app.update(report_to_stdout=False)
235+
await (await default_project()).update_index()
236236
results = await query_codebase("neural network machine learning")
237237

238238
assert len(results) > 0
@@ -244,13 +244,13 @@ async def test_incremental_update_modify_file(
244244
) -> None:
245245
"""Should reflect file modifications after re-indexing."""
246246
setup_base_codebase(test_codebase_root)
247-
await app.update(report_to_stdout=False)
247+
await (await default_project()).update_index()
248248

249249
# Modify utils.py to add authentication
250250
(test_codebase_root / "utils.py").write_text(SAMPLE_UTILS_AUTH_PY)
251251

252252
# Re-index and query for authentication
253-
await app.update(report_to_stdout=False)
253+
await (await default_project()).update_index()
254254
results = await query_codebase("user authentication login")
255255

256256
assert len(results) > 0
@@ -264,7 +264,7 @@ async def test_incremental_update_delete_file(
264264
) -> None:
265265
"""Should no longer return results from deleted files after re-indexing."""
266266
setup_base_codebase(test_codebase_root)
267-
await app.update(report_to_stdout=False)
267+
await (await default_project()).update_index()
268268

269269
# Query for database - should find it
270270
results = await query_codebase("database connection execute query")
@@ -274,7 +274,7 @@ async def test_incremental_update_delete_file(
274274
(test_codebase_root / "lib" / "database.py").unlink()
275275

276276
# Re-index and query again - should no longer find database.py
277-
await app.update(report_to_stdout=False)
277+
await (await default_project()).update_index()
278278
results = await query_codebase("database connection execute query")
279279
assert not any("database.py" in r.file_path for r in results)
280280

@@ -335,7 +335,7 @@ class TestSearchFilters:
335335
async def test_filter_by_language(self, test_codebase_root: Path, coco_runtime: None) -> None:
336336
"""Should return only results matching the specified language."""
337337
setup_multi_lang_codebase(test_codebase_root)
338-
await app.update(report_to_stdout=False)
338+
await (await default_project()).update_index()
339339

340340
results = await query_codebase("function", limit=50, languages=["python"])
341341
assert len(results) > 0
@@ -347,7 +347,7 @@ async def test_filter_by_language_multiple(
347347
) -> None:
348348
"""Should return results matching any of the specified languages."""
349349
setup_multi_lang_codebase(test_codebase_root)
350-
await app.update(report_to_stdout=False)
350+
await (await default_project()).update_index()
351351

352352
results = await query_codebase("function", limit=50, languages=["python", "javascript"])
353353
assert len(results) > 0
@@ -363,7 +363,7 @@ async def test_filter_by_file_path_glob(
363363
) -> None:
364364
"""Should return only results matching the file path glob pattern."""
365365
setup_multi_lang_codebase(test_codebase_root)
366-
await app.update(report_to_stdout=False)
366+
await (await default_project()).update_index()
367367

368368
results = await query_codebase("function", limit=50, paths=["lib/*"])
369369
assert len(results) > 0
@@ -375,7 +375,7 @@ async def test_filter_by_file_path_wildcard_extension(
375375
) -> None:
376376
"""Should filter by file extension using glob wildcard."""
377377
setup_multi_lang_codebase(test_codebase_root)
378-
await app.update(report_to_stdout=False)
378+
await (await default_project()).update_index()
379379

380380
results = await query_codebase("function", limit=50, paths=["*.js"])
381381
assert len(results) > 0
@@ -387,7 +387,7 @@ async def test_filter_by_both_language_and_file_path(
387387
) -> None:
388388
"""Should apply both language and file path filters together."""
389389
setup_multi_lang_codebase(test_codebase_root)
390-
await app.update(report_to_stdout=False)
390+
await (await default_project()).update_index()
391391

392392
# Filter for Python files under lib/
393393
results = await query_codebase("function", limit=50, languages=["python"], paths=["lib/*"])
@@ -401,7 +401,7 @@ async def test_no_filter_returns_all_languages(
401401
) -> None:
402402
"""Should return results from all languages when no filter is applied."""
403403
setup_multi_lang_codebase(test_codebase_root)
404-
await app.update(report_to_stdout=False)
404+
await (await default_project()).update_index()
405405

406406
results = await query_codebase("function", limit=50)
407407
languages_found = {r.language for r in results}

0 commit comments

Comments
 (0)