|
5 | 5 |
|
6 | 6 | from dotenv import load_dotenv |
7 | 7 |
|
8 | | -env_path = Path(__file__).resolve().parent.parent.parent / ".env" |
9 | | -load_dotenv(env_path) |
| 8 | +DEFAULT_CONFIG = """# Configuration for code-rag |
| 9 | +# This file is automatically generated. Modify as needed. |
| 10 | +
|
| 11 | +# Embedding model to use for generating vector embeddings |
| 12 | +# Options: |
| 13 | +# - nomic-ai/CodeRankEmbed (Recommended for code) |
| 14 | +# - text-embedding-3-small (Requires OPENAI_API_KEY) |
| 15 | +CODE_RAG_EMBEDDING_MODEL=nomic-ai/CodeRankEmbed |
| 16 | +
|
| 17 | +# Database type: 'chroma' or 'qdrant' |
| 18 | +CODE_RAG_DATABASE_TYPE=chroma |
| 19 | +
|
| 20 | +# Database path (optional, defaults to OS-specific cache directory) |
| 21 | +# Windows: %LOCALAPPDATA%\\code-rag |
| 22 | +# macOS: ~/Library/Caches/code-rag |
| 23 | +# Linux: ~/.cache/code-rag |
| 24 | +# CODE_RAG_DATABASE_PATH= |
| 25 | +
|
| 26 | +# Chunk size in characters for splitting files |
| 27 | +CODE_RAG_CHUNK_SIZE=1024 |
| 28 | +
|
| 29 | +# Batch size for processing documents |
| 30 | +CODE_RAG_BATCH_SIZE=32 |
| 31 | +
|
| 32 | +# Whether to include file headers in chunks (true/false) |
| 33 | +CODE_RAG_INCLUDE_FILE_HEADER=true |
| 34 | +
|
| 35 | +# Whether to exclude test files from indexing (true/false) |
| 36 | +CODE_RAG_EXCLUDE_TESTS=false |
| 37 | +
|
| 38 | +# Additional ignore patterns (comma-separated list) |
| 39 | +# Examples: *.log,*.tmp,custom_ignore_dir/ |
| 40 | +CODE_RAG_ADDITIONAL_IGNORE_PATTERNS= |
| 41 | +
|
| 42 | +# Whether reranking is enabled (true/false) |
| 43 | +CODE_RAG_RERANKER_ENABLED=false |
| 44 | +
|
| 45 | +# Reranker model to use |
| 46 | +CODE_RAG_RERANKER_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 |
| 47 | +
|
| 48 | +# Type of reranking to perform |
| 49 | +CODE_RAG_RERANKER_MULTIPLIER=2 |
| 50 | +
|
| 51 | +# Security / Verification |
| 52 | +CODE_RAG_VERIFY_CHANGES_WITH_HASH=true |
| 53 | +
|
| 54 | +# Shared Embedding Server |
| 55 | +# Enables reusing the model across multiple calls to save memory |
| 56 | +CODE_RAG_SHARED_SERVER=true |
| 57 | +CODE_RAG_SHARED_SERVER_PORT=8199 |
| 58 | +""" |
10 | 59 |
|
11 | 60 |
|
12 | 61 | class Config: |
13 | 62 | """Configuration handler for the code-rag application.""" |
14 | 63 |
|
15 | 64 | def __init__(self): |
16 | 65 | """Initialize configuration from environment variables or defaults.""" |
| 66 | + self._load_config_files() |
17 | 67 | self.embedding_model = os.getenv( |
18 | 68 | "CODE_RAG_EMBEDDING_MODEL", "nomic-ai/CodeRankEmbed" |
19 | 69 | ) |
@@ -80,6 +130,60 @@ def __init__(self): |
80 | 130 | ).lower() in ("true", "1", "yes") |
81 | 131 | self.shared_server_port = self._get_int_env("CODE_RAG_SHARED_SERVER_PORT", 8199) |
82 | 132 |
|
| 133 | + def _load_config_files(self) -> None: |
| 134 | + """ |
| 135 | + Load configuration from config files in precedence order. |
| 136 | +
|
| 137 | + 1. Custom config file (CODE_RAG_CONFIG_FILE) |
| 138 | + 2. Current directory (code-rag.config) |
| 139 | + 3. User config (~/.config/code-rag/config) - Auto-created if missing |
| 140 | + """ |
| 141 | + paths_to_check = [] |
| 142 | + |
| 143 | + # 1. Direct path via env var (highest file priority) |
| 144 | + if custom_path := os.getenv("CODE_RAG_CONFIG_FILE"): |
| 145 | + paths_to_check.append(Path(custom_path).expanduser()) |
| 146 | + |
| 147 | + # 2. Local directory project config |
| 148 | + paths_to_check.append(Path.cwd() / "code-rag.config") |
| 149 | + |
| 150 | + # 3. User configuration (XDG-style) |
| 151 | + # We prefer ~/.config/code-rag/config |
| 152 | + user_config_dir = Path.home() / ".config" / "code-rag" |
| 153 | + user_config_file = user_config_dir / "config" |
| 154 | + |
| 155 | + # Auto-create if it doesn't exist and we aren't testing |
| 156 | + if not user_config_file.exists(): |
| 157 | + try: |
| 158 | + user_config_dir.mkdir(parents=True, exist_ok=True) |
| 159 | + user_config_file.write_text(DEFAULT_CONFIG, encoding="utf-8") |
| 160 | + # Only print via stderr to avoid corrupting MCP stdout if this happens during tool use |
| 161 | + # But Config might be initialized during module import, so be careful. |
| 162 | + # Safe to write, maybe skip print or print to stderr. |
| 163 | + except Exception: |
| 164 | + # working in a read-only fs or permission error, skip creation |
| 165 | + pass |
| 166 | + |
| 167 | + paths_to_check.append(user_config_file) |
| 168 | + |
| 169 | + # 4. Package source fallback (for dev envs, keeping .env here as it's gitignored) |
| 170 | + paths_to_check.append(Path(__file__).resolve().parent.parent.parent / ".env") |
| 171 | + |
| 172 | + # Load found files. override=False means: |
| 173 | + # - Shell vars > File vars |
| 174 | + # - We want specific files to override general ones? |
| 175 | + # load_dotenv defaults to override=False (don't overwrite system env). |
| 176 | + # But for file-vs-file, we want the first ones we check (files) to take precedence? |
| 177 | + # Actually load_dotenv adds to os.environ but doesn't overwrite if exists. |
| 178 | + # So we should load High Priority files FIRST. |
| 179 | + # Order above was: Custom -> Local -> User -> Package. |
| 180 | + # This is correct for load_dotenv(override=False). |
| 181 | + # The first file loaded sets the var. Subsequent files won't overwrite it. |
| 182 | + |
| 183 | + for path in paths_to_check: |
| 184 | + if path.is_file(): |
| 185 | + load_dotenv(path) |
| 186 | + |
83 | 187 | @staticmethod |
84 | 188 | def _get_default_database_path() -> str: |
85 | 189 | """Get the default database path in the user's cache directory.""" |
|
0 commit comments