Skip to content

Commit bdac42e

Browse files
committed
fix: support commas in custom exclude patterns
1 parent a7f5c3c commit bdac42e

3 files changed

Lines changed: 94 additions & 12 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ Use the cocoindex-code MCP server for semantic code search when:
130130
| `COCOINDEX_CODE_EMBEDDING_MODEL` | Embedding model (see below) | `sbert/sentence-transformers/all-MiniLM-L6-v2` |
131131
| `COCOINDEX_CODE_BATCH_SIZE` | Max batch size for local embedding model | `16` |
132132
| `COCOINDEX_CODE_EXTRA_EXTENSIONS` | Additional file extensions to index (comma-separated, e.g. `"inc:php,yaml,toml"` — use `ext:lang` to override language detection) | _(none)_ |
133-
| `COCOINDEX_CODE_EXCLUDED_PATTERNS` | Additional glob patterns to exclude from indexing (comma-separated, e.g. `"**/migration.sql,**/*.d.ts"`) | _(none)_ |
133+
| `COCOINDEX_CODE_EXCLUDED_PATTERNS` | Additional glob patterns to exclude from indexing as a JSON array (e.g. `'["**/migration.sql", "{**/*.md,**/*.txt}"]'`) | _(none)_ |
134134

135135

136136
### Root Path Discovery

src/cocoindex_code/config.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import json
56
import os
67
from dataclasses import dataclass
78
from pathlib import Path
@@ -42,6 +43,33 @@ def _discover_codebase_root() -> Path:
4243
return root if root is not None else cwd
4344

4445

46+
def _parse_json_string_list_env(var_name: str) -> list[str]:
47+
"""Parse an environment variable as a JSON array of strings."""
48+
raw_value = os.environ.get(var_name, "")
49+
if not raw_value.strip():
50+
return []
51+
52+
try:
53+
parsed = json.loads(raw_value)
54+
except json.JSONDecodeError as exc:
55+
raise ValueError(
56+
f"{var_name} must be a JSON array of strings, got invalid JSON"
57+
) from exc
58+
59+
if not isinstance(parsed, list):
60+
raise ValueError(f"{var_name} must be a JSON array of strings")
61+
62+
result: list[str] = []
63+
for item in parsed:
64+
if not isinstance(item, str):
65+
raise ValueError(f"{var_name} must be a JSON array of strings")
66+
item = item.strip()
67+
if item:
68+
result.append(item)
69+
70+
return result
71+
72+
4573
@dataclass
4674
class Config:
4775
"""Configuration loaded from environment variables."""
@@ -101,13 +129,9 @@ def from_env(cls) -> Config:
101129
extra_extensions[f".{token}"] = None
102130

103131
# Excluded file glob patterns
104-
raw_excluded_patterns = os.environ.get("COCOINDEX_CODE_EXCLUDED_PATTERNS", "")
105-
excluded_patterns: list[str] = []
106-
for pattern in raw_excluded_patterns.split(","):
107-
pattern = pattern.strip()
108-
if not pattern:
109-
continue
110-
excluded_patterns.append(pattern)
132+
excluded_patterns = _parse_json_string_list_env(
133+
"COCOINDEX_CODE_EXCLUDED_PATTERNS"
134+
)
111135

112136
return cls(
113137
codebase_root_path=root,

tests/test_config.py

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from pathlib import Path
77
from unittest.mock import patch
88

9+
import pytest
10+
911
from cocoindex_code.config import Config
1012

1113

@@ -168,23 +170,34 @@ def test_empty_by_default(self, tmp_path: Path) -> None:
168170
config = Config.from_env()
169171
assert config.excluded_patterns == []
170172

171-
def test_parses_comma_separated(self, tmp_path: Path) -> None:
173+
def test_parses_json_array(self, tmp_path: Path) -> None:
172174
with patch.dict(
173175
os.environ,
174176
{
175177
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
176-
"COCOINDEX_CODE_EXCLUDED_PATTERNS": "**/migration.sql,**/*.d.ts",
178+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": '["**/migration.sql", "**/*.d.ts"]',
177179
},
178180
):
179181
config = Config.from_env()
180182
assert config.excluded_patterns == ["**/migration.sql", "**/*.d.ts"]
181183

182-
def test_trims_whitespace(self, tmp_path: Path) -> None:
184+
def test_preserves_commas_inside_globs(self, tmp_path: Path) -> None:
183185
with patch.dict(
184186
os.environ,
185187
{
186188
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
187-
"COCOINDEX_CODE_EXCLUDED_PATTERNS": " **/migration.sql , **/*.d.ts , ",
189+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": '["{**/*.md,**/*.txt}"]',
190+
},
191+
):
192+
config = Config.from_env()
193+
assert config.excluded_patterns == ["{**/*.md,**/*.txt}"]
194+
195+
def test_trims_whitespace_and_ignores_empty_entries(self, tmp_path: Path) -> None:
196+
with patch.dict(
197+
os.environ,
198+
{
199+
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
200+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": '[" **/migration.sql ", " ", "**/*.d.ts"]',
188201
},
189202
):
190203
config = Config.from_env()
@@ -200,3 +213,48 @@ def test_empty_string_gives_empty_list(self, tmp_path: Path) -> None:
200213
):
201214
config = Config.from_env()
202215
assert config.excluded_patterns == []
216+
217+
def test_rejects_invalid_json(self, tmp_path: Path) -> None:
218+
with patch.dict(
219+
os.environ,
220+
{
221+
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
222+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": "**/migration.sql,**/*.d.ts",
223+
},
224+
):
225+
with pytest.raises(
226+
ValueError,
227+
match=(
228+
"COCOINDEX_CODE_EXCLUDED_PATTERNS must be a JSON array of strings, "
229+
"got invalid JSON"
230+
),
231+
):
232+
Config.from_env()
233+
234+
def test_rejects_valid_json_non_list(self, tmp_path: Path) -> None:
235+
with patch.dict(
236+
os.environ,
237+
{
238+
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
239+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": "{}",
240+
},
241+
):
242+
with pytest.raises(
243+
ValueError,
244+
match="COCOINDEX_CODE_EXCLUDED_PATTERNS must be a JSON array of strings",
245+
):
246+
Config.from_env()
247+
248+
def test_rejects_non_string_entries(self, tmp_path: Path) -> None:
249+
with patch.dict(
250+
os.environ,
251+
{
252+
"COCOINDEX_CODE_ROOT_PATH": str(tmp_path),
253+
"COCOINDEX_CODE_EXCLUDED_PATTERNS": '["**/*.py", 1]',
254+
},
255+
):
256+
with pytest.raises(
257+
ValueError,
258+
match="COCOINDEX_CODE_EXCLUDED_PATTERNS must be a JSON array of strings",
259+
):
260+
Config.from_env()

0 commit comments

Comments
 (0)