Skip to content

Commit 8015afe

Browse files
committed
feat: auto generate config file on first run
1 parent f0635d8 commit 8015afe

2 files changed

Lines changed: 117 additions & 2 deletions

File tree

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,17 @@ export CODE_RAG_CHUNK_SIZE="2048"
107107
export CODE_RAG_ADDITIONAL_IGNORE_PATTERNS="*.tmp,*.bak,logs/"
108108
```
109109

110+
111+
### Configuration Files
112+
113+
Code-RAG looks for configuration in the following order (first found wins):
114+
115+
1. `CODE_RAG_CONFIG_FILE` environment variable
116+
2. `code-rag.config` in the current directory (project-specific)
117+
3. `~/.config/code-rag/config` (user-global)
118+
- **Auto-created with default values** if it doesn't exist.
119+
4. Shell environment variables (highest priority - these override files)
120+
110121
Full configuration options in [docs/IMPLEMENTATION.md](docs/IMPLEMENTATION.md#configuration-system).
111122

112123
## How It Works

src/config/config.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,65 @@
55

66
from dotenv import load_dotenv
77

8-
env_path = Path(__file__).resolve().parent.parent.parent / ".env"
9-
load_dotenv(env_path)
8+
DEFAULT_CONFIG = """# Configuration for code-rag
9+
# This file is automatically generated. Modify as needed.
10+
11+
# Embedding model to use for generating vector embeddings
12+
# Options:
13+
# - nomic-ai/CodeRankEmbed (Recommended for code)
14+
# - text-embedding-3-small (Requires OPENAI_API_KEY)
15+
CODE_RAG_EMBEDDING_MODEL=nomic-ai/CodeRankEmbed
16+
17+
# Database type: 'chroma' or 'qdrant'
18+
CODE_RAG_DATABASE_TYPE=chroma
19+
20+
# Database path (optional, defaults to OS-specific cache directory)
21+
# Windows: %LOCALAPPDATA%\\code-rag
22+
# macOS: ~/Library/Caches/code-rag
23+
# Linux: ~/.cache/code-rag
24+
# CODE_RAG_DATABASE_PATH=
25+
26+
# Chunk size in characters for splitting files
27+
CODE_RAG_CHUNK_SIZE=1024
28+
29+
# Batch size for processing documents
30+
CODE_RAG_BATCH_SIZE=32
31+
32+
# Whether to include file headers in chunks (true/false)
33+
CODE_RAG_INCLUDE_FILE_HEADER=true
34+
35+
# Whether to exclude test files from indexing (true/false)
36+
CODE_RAG_EXCLUDE_TESTS=false
37+
38+
# Additional ignore patterns (comma-separated list)
39+
# Examples: *.log,*.tmp,custom_ignore_dir/
40+
CODE_RAG_ADDITIONAL_IGNORE_PATTERNS=
41+
42+
# Whether reranking is enabled (true/false)
43+
CODE_RAG_RERANKER_ENABLED=false
44+
45+
# Reranker model to use
46+
CODE_RAG_RERANKER_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1
47+
48+
# Type of reranking to perform
49+
CODE_RAG_RERANKER_MULTIPLIER=2
50+
51+
# Security / Verification
52+
CODE_RAG_VERIFY_CHANGES_WITH_HASH=true
53+
54+
# Shared Embedding Server
55+
# Enables reusing the model across multiple calls to save memory
56+
CODE_RAG_SHARED_SERVER=true
57+
CODE_RAG_SHARED_SERVER_PORT=8199
58+
"""
1059

1160

1261
class Config:
1362
"""Configuration handler for the code-rag application."""
1463

1564
def __init__(self):
1665
"""Initialize configuration from environment variables or defaults."""
66+
self._load_config_files()
1767
self.embedding_model = os.getenv(
1868
"CODE_RAG_EMBEDDING_MODEL", "nomic-ai/CodeRankEmbed"
1969
)
@@ -80,6 +130,60 @@ def __init__(self):
80130
).lower() in ("true", "1", "yes")
81131
self.shared_server_port = self._get_int_env("CODE_RAG_SHARED_SERVER_PORT", 8199)
82132

133+
def _load_config_files(self) -> None:
134+
"""
135+
Load configuration from config files in precedence order.
136+
137+
1. Custom config file (CODE_RAG_CONFIG_FILE)
138+
2. Current directory (code-rag.config)
139+
3. User config (~/.config/code-rag/config) - Auto-created if missing
140+
"""
141+
paths_to_check = []
142+
143+
# 1. Direct path via env var (highest file priority)
144+
if custom_path := os.getenv("CODE_RAG_CONFIG_FILE"):
145+
paths_to_check.append(Path(custom_path).expanduser())
146+
147+
# 2. Local directory project config
148+
paths_to_check.append(Path.cwd() / "code-rag.config")
149+
150+
# 3. User configuration (XDG-style)
151+
# We prefer ~/.config/code-rag/config
152+
user_config_dir = Path.home() / ".config" / "code-rag"
153+
user_config_file = user_config_dir / "config"
154+
155+
# Auto-create if it doesn't exist and we aren't testing
156+
if not user_config_file.exists():
157+
try:
158+
user_config_dir.mkdir(parents=True, exist_ok=True)
159+
user_config_file.write_text(DEFAULT_CONFIG, encoding="utf-8")
160+
# Only print via stderr to avoid corrupting MCP stdout if this happens during tool use
161+
# But Config might be initialized during module import, so be careful.
162+
# Safe to write, maybe skip print or print to stderr.
163+
except Exception:
164+
# working in a read-only fs or permission error, skip creation
165+
pass
166+
167+
paths_to_check.append(user_config_file)
168+
169+
# 4. Package source fallback (for dev envs, keeping .env here as it's gitignored)
170+
paths_to_check.append(Path(__file__).resolve().parent.parent.parent / ".env")
171+
172+
# Load found files. override=False means:
173+
# - Shell vars > File vars
174+
# - We want specific files to override general ones?
175+
# load_dotenv defaults to override=False (don't overwrite system env).
176+
# But for file-vs-file, we want the first ones we check (files) to take precedence?
177+
# Actually load_dotenv adds to os.environ but doesn't overwrite if exists.
178+
# So we should load High Priority files FIRST.
179+
# Order above was: Custom -> Local -> User -> Package.
180+
# This is correct for load_dotenv(override=False).
181+
# The first file loaded sets the var. Subsequent files won't overwrite it.
182+
183+
for path in paths_to_check:
184+
if path.is_file():
185+
load_dotenv(path)
186+
83187
@staticmethod
84188
def _get_default_database_path() -> str:
85189
"""Get the default database path in the user's cache directory."""

0 commit comments

Comments
 (0)