diff --git a/src/cocoindex_code/indexer.py b/src/cocoindex_code/indexer.py index 18f0dd7..4596011 100644 --- a/src/cocoindex_code/indexer.py +++ b/src/cocoindex_code/indexer.py @@ -11,7 +11,7 @@ from cocoindex.resources.id import IdGenerator from .settings import PROJECT_SETTINGS -from .shared import CODEBASE_DIR, EMBEDDER, SQLITE_DB, CodeChunk +from .shared import CODEBASE_DIR, EMBEDDER, EXT_LANG_OVERRIDE_MAP, SQLITE_DB, CodeChunk # Chunking configuration CHUNK_SIZE = 2000 @@ -28,7 +28,6 @@ async def process_file( table: sqlite.TableTarget[CodeChunk], ) -> None: """Process a single file: chunk, embed, and store.""" - ps = coco.use_context(PROJECT_SETTINGS) embedder = coco.use_context(EMBEDDER) try: @@ -40,10 +39,9 @@ async def process_file( return suffix = file.file_path.path.suffix - # Check language overrides from project settings - override_map = {f".{lo.ext}": lo.lang for lo in ps.language_overrides} + ext_lang_override_map = coco.use_context(EXT_LANG_OVERRIDE_MAP) language = ( - override_map.get(suffix) + ext_lang_override_map.get(suffix) or detect_code_language(filename=file.file_path.path.name) or "text" ) diff --git a/src/cocoindex_code/project.py b/src/cocoindex_code/project.py index 7876f7d..3e9cf7f 100644 --- a/src/cocoindex_code/project.py +++ b/src/cocoindex_code/project.py @@ -12,7 +12,7 @@ from .indexer import indexer_main from .protocol import IndexingProgress from .settings import PROJECT_SETTINGS, ProjectSettings -from .shared import CODEBASE_DIR, EMBEDDER, SQLITE_DB, Embedder +from .shared import CODEBASE_DIR, EMBEDDER, EXT_LANG_OVERRIDE_MAP, SQLITE_DB, Embedder class Project: @@ -86,6 +86,10 @@ async def create( context.provide(SQLITE_DB, sqlite.connect(str(target_sqlite_db_path), load_vec=True)) context.provide(EMBEDDER, embedder) context.provide(PROJECT_SETTINGS, project_settings) + context.provide( + EXT_LANG_OVERRIDE_MAP, + {f".{lo.ext}": lo.lang for lo in project_settings.language_overrides}, + ) env = coco.Environment(settings, context_provider=context) app = coco.App( diff --git a/src/cocoindex_code/shared.py b/src/cocoindex_code/shared.py index 714224d..7882ddb 100644 --- a/src/cocoindex_code/shared.py +++ b/src/cocoindex_code/shared.py @@ -31,6 +31,7 @@ EMBEDDER = coco.ContextKey[Embedder]("embedder") SQLITE_DB = coco.ContextKey[sqlite.ManagedConnection]("index_db", tracked=False) CODEBASE_DIR = coco.ContextKey[pathlib.Path]("codebase", tracked=False) +EXT_LANG_OVERRIDE_MAP = coco.ContextKey[dict[str, str]]("ext_lang_override_map") # Module-level variable — set by daemon at startup (needed for CodeChunk annotation). embedder: Embedder | None = None