Skip to content

Commit 33334bc

Browse files
georgeh0claude
andauthored
feat: add COCOINDEX_CODE_DB_PATH_MAPPING for custom database locations (#116)
* feat: add COCOINDEX_CODE_DB_PATH_MAPPING for custom database locations Allow remapping database file locations via environment variable, enabling Docker deployments where databases live on the container's native filesystem while source code is mounted from the host. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: use = delimiter for DB path mapping (avoid Windows colon conflict) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: use platform-agnostic tmp_path in DB mapping tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e1a66b0 commit 33334bc

10 files changed

Lines changed: 317 additions & 12 deletions

File tree

CLAUDE.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ We distinguish between **internal modules** (under packages with `_` prefix, e.g
3232
* Standard library and internal imports don't need underscore prefix
3333
* Only prefix symbols that are truly private to the module itself (e.g. `_context_var` for a module-private ContextVar)
3434

35+
### Imports
36+
37+
Prefer top-level imports. Only use local (in-function) imports when truly necessary — e.g. to break circular dependencies or to defer a heavy import that isn't always needed.
38+
3539
### Type Annotations
3640

3741
Avoid `Any` whenever feasible. Use specific types — including concrete types from third-party libraries. Only use `Any` when the type is truly generic and no downstream code needs to downcast it.

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,26 @@ embedding:
418418
| xml | | `.xml` |
419419
| yaml | | `.yaml`, `.yml` |
420420

421+
### Custom Database Location
422+
423+
By default, index databases (`cocoindex.db` and `target_sqlite.db`) live alongside settings in `<project>/.cocoindex_code/`. When running in Docker, you may want the databases on the container's native filesystem for performance (LMDB doesn't work well on mounted volumes) while keeping the source code and settings on a mounted volume.
424+
425+
Set `COCOINDEX_CODE_DB_PATH_MAPPING` to remap database locations by path prefix:
426+
427+
```bash
428+
COCOINDEX_CODE_DB_PATH_MAPPING=/workspace=/db-files
429+
```
430+
431+
With this mapping, a project at `/workspace/myrepo` stores its databases in `/db-files/myrepo/` instead of `/workspace/myrepo/.cocoindex_code/`. Settings files remain in the original location.
432+
433+
Multiple mappings are comma-separated and resolved in order (first match wins):
434+
435+
```bash
436+
COCOINDEX_CODE_DB_PATH_MAPPING=/workspace=/db-files,/workspace2=/db-files2
437+
```
438+
439+
Both source and target must be absolute paths. If no mapping matches, the default location is used.
440+
421441
## Troubleshooting
422442

423443
Run `ccc doctor` to diagnose common issues. It checks your settings, daemon health, embedding model, file matching, and index status — all in one command.

src/cocoindex_code/cli.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
default_user_settings,
1717
find_parent_with_marker,
1818
find_project_root,
19+
resolve_db_dir,
1920
save_project_settings,
2021
save_user_settings,
2122
user_settings_path,
@@ -389,10 +390,11 @@ def reset(
389390
"""Reset project databases and optionally remove settings."""
390391
project_root = require_project_root()
391392
cocoindex_dir = project_root / ".cocoindex_code"
393+
db_dir = resolve_db_dir(project_root)
392394

393395
db_files = [
394-
cocoindex_dir / "cocoindex.db",
395-
cocoindex_dir / "target_sqlite.db",
396+
db_dir / "cocoindex.db",
397+
db_dir / "target_sqlite.db",
396398
]
397399
settings_file = cocoindex_dir / "settings.yml"
398400

@@ -436,6 +438,12 @@ def reset(
436438
f.unlink(missing_ok=True)
437439

438440
if all_:
441+
# Remove db_dir if empty and different from cocoindex_dir
442+
if db_dir != cocoindex_dir:
443+
try:
444+
db_dir.rmdir()
445+
except OSError:
446+
pass # Not empty or doesn't exist
439447
# Remove .cocoindex_code/ if empty
440448
try:
441449
cocoindex_dir.rmdir()
@@ -539,6 +547,10 @@ def doctor() -> None:
539547
other_keys = [k for k in env_resp.env_names if k not in settings_keys]
540548
if other_keys:
541549
_typer.echo(f" Other env vars in daemon: {', '.join(sorted(other_keys))}")
550+
if env_resp.db_path_mappings:
551+
_typer.echo(" DB path mappings:")
552+
for m in env_resp.db_path_mappings:
553+
_typer.echo(f" {m.source} \u2192 {m.target}")
542554
except Exception as e:
543555
_print_error(f"Failed to get daemon env: {e}")
544556

src/cocoindex_code/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from dataclasses import dataclass
88
from pathlib import Path
99

10+
from .settings import resolve_db_dir
11+
1012
_DEFAULT_MODEL = "sbert/sentence-transformers/all-MiniLM-L6-v2"
1113

1214

@@ -96,8 +98,8 @@ def from_env(cls) -> Config:
9698
_DEFAULT_MODEL,
9799
)
98100

99-
# Index directory is always under the root
100-
index_dir = root / ".cocoindex_code"
101+
# Index directory: apply DB path mapping if configured
102+
index_dir = resolve_db_dir(root)
101103

102104
# Device: auto-detect CUDA or use env override
103105
device = os.environ.get("COCOINDEX_CODE_DEVICE")

src/cocoindex_code/daemon.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from .settings import (
4949
global_settings_mtime_us,
5050
load_user_settings,
51+
resolve_db_dir,
5152
user_settings_dir,
5253
)
5354
from .shared import Embedder, create_embedder
@@ -345,7 +346,7 @@ async def _check_index_status(project_root_str: str) -> DoctorCheckResult:
345346
from cocoindex.connectors import sqlite as coco_sqlite
346347

347348
project_root = Path(project_root_str)
348-
db_path = project_root / ".cocoindex_code" / "target_sqlite.db"
349+
db_path = resolve_db_dir(project_root) / "target_sqlite.db"
349350
details = [f"Index: {db_path}"]
350351

351352
if not db_path.exists():
@@ -441,9 +442,16 @@ async def _dispatch(
441442
return StopResponse(ok=True)
442443

443444
if isinstance(req, DaemonEnvRequest):
445+
from .protocol import DbPathMappingEntry
446+
from .settings import get_db_path_mappings
447+
444448
return DaemonEnvResponse(
445449
env_names=sorted(os.environ.keys()),
446450
settings_env_names=settings_env_names,
451+
db_path_mappings=[
452+
DbPathMappingEntry(source=str(m.source), target=str(m.target))
453+
for m in get_db_path_mappings()
454+
],
447455
)
448456

449457
if isinstance(req, DoctorRequest):

src/cocoindex_code/project.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
SearchResult,
2222
)
2323
from .query import query_codebase
24+
from .settings import resolve_db_dir
2425
from .shared import (
2526
CODEBASE_DIR,
2627
EMBEDDER,
@@ -170,7 +171,7 @@ async def search(
170171
offset: int = 0,
171172
) -> list[SearchResult]:
172173
"""Search within this project."""
173-
target_db = self._project_root / ".cocoindex_code" / "target_sqlite.db"
174+
target_db = resolve_db_dir(self._project_root) / "target_sqlite.db"
174175
results = await query_codebase(
175176
query=query,
176177
target_sqlite_db_path=target_db,
@@ -254,11 +255,14 @@ async def create(
254255
indexer loads them fresh from disk on every run so that user edits
255256
take effect without restarting the daemon.
256257
"""
257-
index_dir = project_root / ".cocoindex_code"
258-
index_dir.mkdir(parents=True, exist_ok=True)
258+
settings_dir = project_root / ".cocoindex_code"
259+
settings_dir.mkdir(parents=True, exist_ok=True)
259260

260-
cocoindex_db_path = index_dir / "cocoindex.db"
261-
target_sqlite_db_path = index_dir / "target_sqlite.db"
261+
db_dir = resolve_db_dir(project_root)
262+
db_dir.mkdir(parents=True, exist_ok=True)
263+
264+
cocoindex_db_path = db_dir / "cocoindex.db"
265+
target_sqlite_db_path = db_dir / "target_sqlite.db"
262266

263267
settings = coco.Settings.from_env(cocoindex_db_path)
264268

src/cocoindex_code/protocol.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,15 @@ class DoctorResponse(_msgspec.Struct, tag="doctor"):
158158
final: bool = False
159159

160160

161+
class DbPathMappingEntry(_msgspec.Struct):
162+
source: str
163+
target: str
164+
165+
161166
class DaemonEnvResponse(_msgspec.Struct, tag="daemon_env"):
162167
env_names: list[str]
163168
settings_env_names: list[str]
169+
db_path_mappings: list[DbPathMappingEntry] = []
164170

165171

166172
class ErrorResponse(_msgspec.Struct, tag="error"):

src/cocoindex_code/settings.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import os
56
from dataclasses import dataclass, field
67
from pathlib import Path
78
from typing import Any
@@ -115,14 +116,89 @@ def default_project_settings() -> ProjectSettings:
115116
_SETTINGS_FILE_NAME = "settings.yml" # project-level
116117
_USER_SETTINGS_FILE_NAME = "global_settings.yml" # user-level
117118

119+
_ENV_DB_PATH_MAPPING = "COCOINDEX_CODE_DB_PATH_MAPPING"
120+
121+
122+
@dataclass
123+
class DbPathMapping:
124+
source: Path
125+
target: Path
126+
127+
128+
_db_path_mapping: list[DbPathMapping] | None = None
129+
130+
131+
def _parse_db_path_mapping() -> list[DbPathMapping]:
132+
"""Parse ``COCOINDEX_CODE_DB_PATH_MAPPING`` env var.
133+
134+
Format: ``/src1=/dst1,/src2=/dst2``
135+
Both source and target must be absolute paths.
136+
"""
137+
raw = os.environ.get(_ENV_DB_PATH_MAPPING, "")
138+
if not raw.strip():
139+
return []
140+
141+
mappings: list[DbPathMapping] = []
142+
for entry in raw.split(","):
143+
entry = entry.strip()
144+
if not entry:
145+
continue
146+
parts = entry.split("=", 1)
147+
if len(parts) != 2 or not parts[0] or not parts[1]:
148+
raise ValueError(
149+
f"{_ENV_DB_PATH_MAPPING}: invalid entry {entry!r}, expected format 'source=target'"
150+
)
151+
source = Path(parts[0])
152+
target = Path(parts[1])
153+
if not source.is_absolute():
154+
raise ValueError(
155+
f"{_ENV_DB_PATH_MAPPING}: source path must be absolute, got {source!r}"
156+
)
157+
if not target.is_absolute():
158+
raise ValueError(
159+
f"{_ENV_DB_PATH_MAPPING}: target path must be absolute, got {target!r}"
160+
)
161+
mappings.append(DbPathMapping(source=source.resolve(), target=target.resolve()))
162+
return mappings
163+
164+
165+
def resolve_db_dir(project_root: Path) -> Path:
166+
"""Return the directory for database files given a project root.
167+
168+
Applies ``COCOINDEX_CODE_DB_PATH_MAPPING`` if set, otherwise falls back
169+
to ``project_root / ".cocoindex_code"``.
170+
"""
171+
global _db_path_mapping # noqa: PLW0603
172+
if _db_path_mapping is None:
173+
_db_path_mapping = _parse_db_path_mapping()
174+
175+
resolved = project_root.resolve()
176+
for mapping in _db_path_mapping:
177+
if resolved == mapping.source or resolved.is_relative_to(mapping.source):
178+
rel = resolved.relative_to(mapping.source)
179+
return mapping.target / rel
180+
return project_root / _SETTINGS_DIR_NAME
181+
182+
183+
def get_db_path_mappings() -> list[DbPathMapping]:
184+
"""Return the parsed DB path mappings from ``COCOINDEX_CODE_DB_PATH_MAPPING``."""
185+
global _db_path_mapping # noqa: PLW0603
186+
if _db_path_mapping is None:
187+
_db_path_mapping = _parse_db_path_mapping()
188+
return list(_db_path_mapping)
189+
190+
191+
def _reset_db_path_mapping_cache() -> None:
192+
"""Reset the cached mapping (for tests)."""
193+
global _db_path_mapping # noqa: PLW0603
194+
_db_path_mapping = None
195+
118196

119197
def user_settings_dir() -> Path:
120198
"""Return ``~/.cocoindex_code/``.
121199
122200
Respects ``COCOINDEX_CODE_DIR`` env var for overriding the base directory.
123201
"""
124-
import os
125-
126202
override = os.environ.get("COCOINDEX_CODE_DIR")
127203
if override:
128204
return Path(override)

tests/test_e2e.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from cocoindex_code.cli import app
2121
from cocoindex_code.client import stop_daemon
2222
from cocoindex_code.settings import (
23+
_reset_db_path_mapping_cache,
2324
default_project_settings,
2425
find_parent_with_marker,
2526
save_project_settings,
@@ -580,6 +581,87 @@ def test_session_daemon_restart_missing_global_settings() -> None:
580581
assert "User settings not found" in result.output
581582

582583

584+
# ---------------------------------------------------------------------------
585+
# DB path mapping tests
586+
# ---------------------------------------------------------------------------
587+
588+
589+
@pytest.fixture()
590+
def e2e_project_with_db_mapping() -> Iterator[tuple[Path, Path]]:
591+
"""Set up a project with COCOINDEX_CODE_DB_PATH_MAPPING pointing to a separate db dir.
592+
593+
Yields (project_dir, db_base_dir).
594+
"""
595+
base_dir = Path(tempfile.mkdtemp(prefix="ccc_e2e_"))
596+
project_dir = base_dir / "workspace" / "myproject"
597+
project_dir.mkdir(parents=True)
598+
db_base_dir = base_dir / "db-files"
599+
db_base_dir.mkdir()
600+
601+
(project_dir / "main.py").write_text(SAMPLE_MAIN_PY)
602+
(project_dir / ".git").mkdir()
603+
604+
old_env = {
605+
k: os.environ.get(k) for k in ("COCOINDEX_CODE_DIR", "COCOINDEX_CODE_DB_PATH_MAPPING")
606+
}
607+
os.environ["COCOINDEX_CODE_DIR"] = str(base_dir)
608+
workspace = str(base_dir / "workspace")
609+
os.environ["COCOINDEX_CODE_DB_PATH_MAPPING"] = f"{workspace}={db_base_dir}"
610+
_reset_db_path_mapping_cache()
611+
old_cwd = os.getcwd()
612+
os.chdir(project_dir)
613+
614+
try:
615+
yield project_dir, db_base_dir
616+
finally:
617+
os.chdir(project_dir)
618+
runner.invoke(app, ["reset", "--all", "-f"])
619+
stop_daemon()
620+
os.chdir(old_cwd)
621+
_reset_db_path_mapping_cache()
622+
for k, v in old_env.items():
623+
if v is None:
624+
os.environ.pop(k, None)
625+
else:
626+
os.environ[k] = v
627+
628+
629+
def test_session_db_path_mapping(
630+
e2e_project_with_db_mapping: tuple[Path, Path],
631+
) -> None:
632+
"""Init → index → verify databases are in the mapped directory → search works."""
633+
project_dir, db_base_dir = e2e_project_with_db_mapping
634+
mapped_db_dir = db_base_dir / "myproject"
635+
636+
# Init
637+
result = runner.invoke(app, ["init"], catch_exceptions=False)
638+
assert result.exit_code == 0, result.output
639+
640+
# Settings should be in the project dir, NOT the mapped dir
641+
assert (project_dir / ".cocoindex_code" / "settings.yml").exists()
642+
643+
# Index
644+
result = runner.invoke(app, ["index"], catch_exceptions=False)
645+
assert result.exit_code == 0, result.output
646+
647+
# Databases should be in the mapped directory
648+
assert (mapped_db_dir / "target_sqlite.db").exists()
649+
# Databases should NOT be in the project's .cocoindex_code dir
650+
assert not (project_dir / ".cocoindex_code" / "target_sqlite.db").exists()
651+
652+
# Search should work
653+
result = runner.invoke(app, ["search", "fibonacci"], catch_exceptions=False)
654+
assert result.exit_code == 0, result.output
655+
assert "main.py" in result.output
656+
657+
# Reset should clean databases from the mapped dir
658+
result = runner.invoke(app, ["reset", "-f"], catch_exceptions=False)
659+
assert result.exit_code == 0
660+
assert not (mapped_db_dir / "target_sqlite.db").exists()
661+
# Settings still in place
662+
assert (project_dir / ".cocoindex_code" / "settings.yml").exists()
663+
664+
583665
# ---------------------------------------------------------------------------
584666
# Unit tests (not session-based)
585667
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)