Skip to content

Commit c662652

Browse files
committed
Add JSON search, JSON-RPC bridge, and repo-scoped vector partitions to ccc
1 parent cc55f32 commit c662652

13 files changed

Lines changed: 579 additions & 26 deletions

File tree

src/cocoindex_code/cli.py

Lines changed: 210 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
from __future__ import annotations
44

55
import functools
6+
import json as _json
67
import os
78
import sys
89
from collections.abc import Callable
910
from pathlib import Path
10-
from typing import TYPE_CHECKING, TypeVar
11+
from typing import TYPE_CHECKING, Protocol, TextIO, TypeVar
1112

1213
import typer as _typer
1314

@@ -102,6 +103,20 @@ def require_project_root() -> Path:
102103
_F = TypeVar("_F", bound=Callable[..., object])
103104

104105

106+
class _SearchCallable(Protocol):
107+
def __call__(
108+
self,
109+
project_root: str,
110+
query: str,
111+
languages: list[str] | None = None,
112+
paths: list[str] | None = None,
113+
repo_keys: list[str] | None = None,
114+
limit: int = 5,
115+
offset: int = 0,
116+
on_waiting: Callable[[], None] | None = None,
117+
) -> SearchResponse: ...
118+
119+
105120
def _catch_daemon_start_error(func: _F) -> _F:
106121
"""Decorator that catches ``DaemonStartError`` and exits with a clean message.
107122
@@ -181,6 +196,176 @@ def print_search_results(response: SearchResponse) -> None:
181196
_typer.echo(r.content)
182197

183198

199+
def search_response_json_payload(response: SearchResponse) -> dict[str, object]:
200+
"""Build the machine-readable search response payload."""
201+
return {
202+
"success": response.success,
203+
"results": [
204+
{
205+
"file_path": r.file_path,
206+
"repo_key": r.repo_key,
207+
"language": r.language,
208+
"content": r.content,
209+
"start_line": r.start_line,
210+
"end_line": r.end_line,
211+
"score": r.score,
212+
}
213+
for r in response.results
214+
],
215+
"total_returned": response.total_returned,
216+
"offset": response.offset,
217+
"message": response.message,
218+
}
219+
220+
221+
def print_search_results_json(response: SearchResponse) -> None:
222+
"""Print search results as machine-readable JSON."""
223+
payload = search_response_json_payload(response)
224+
_typer.echo(_json.dumps(payload, indent=2))
225+
226+
227+
def _jsonrpc_id(value: object) -> str | int | None:
228+
if value is None or isinstance(value, str):
229+
return value
230+
if isinstance(value, int) and not isinstance(value, bool):
231+
return value
232+
raise ValueError("JSON-RPC id must be a string, integer, or null")
233+
234+
235+
def _jsonrpc_success(request_id: str | int | None, result: object) -> dict[str, object]:
236+
return {
237+
"jsonrpc": "2.0",
238+
"id": request_id,
239+
"result": result,
240+
}
241+
242+
243+
def _jsonrpc_error(
244+
request_id: str | int | None,
245+
code: int,
246+
message: str,
247+
) -> dict[str, object]:
248+
return {
249+
"jsonrpc": "2.0",
250+
"id": request_id,
251+
"error": {
252+
"code": code,
253+
"message": message,
254+
},
255+
}
256+
257+
258+
def _required_str(params: dict[str, object], name: str) -> str:
259+
value = params.get(name)
260+
if not isinstance(value, str) or not value:
261+
raise ValueError(f"params.{name} must be a non-empty string")
262+
return value
263+
264+
265+
def _optional_str_list(params: dict[str, object], name: str) -> list[str] | None:
266+
value = params.get(name)
267+
if value is None:
268+
return None
269+
if not isinstance(value, list):
270+
raise ValueError(f"params.{name} must be a list of strings")
271+
result: list[str] = []
272+
for item in value:
273+
if not isinstance(item, str):
274+
raise ValueError(f"params.{name} must be a list of strings")
275+
result.append(item)
276+
return result
277+
278+
279+
def _positive_int_param(params: dict[str, object], name: str, default: int) -> int:
280+
value = params.get(name)
281+
if value is None:
282+
return default
283+
if not isinstance(value, int) or isinstance(value, bool) or value <= 0:
284+
raise ValueError(f"params.{name} must be a positive integer")
285+
return value
286+
287+
288+
def _non_negative_int_param(params: dict[str, object], name: str, default: int) -> int:
289+
value = params.get(name)
290+
if value is None:
291+
return default
292+
if not isinstance(value, int) or isinstance(value, bool) or value < 0:
293+
raise ValueError(f"params.{name} must be a non-negative integer")
294+
return value
295+
296+
297+
def handle_bridge_jsonrpc_request(
298+
request: object,
299+
search_func: _SearchCallable,
300+
) -> tuple[dict[str, object], bool]:
301+
"""Handle one JSON-RPC bridge request."""
302+
request_id: str | int | None = None
303+
try:
304+
if not isinstance(request, dict):
305+
return _jsonrpc_error(None, -32600, "Invalid Request"), False
306+
raw_id = request.get("id")
307+
request_id = _jsonrpc_id(raw_id)
308+
if request.get("jsonrpc") != "2.0":
309+
return _jsonrpc_error(request_id, -32600, "Invalid Request"), False
310+
method = request.get("method")
311+
if not isinstance(method, str):
312+
return _jsonrpc_error(request_id, -32600, "Invalid Request"), False
313+
params_obj = request.get("params", {})
314+
if not isinstance(params_obj, dict):
315+
return _jsonrpc_error(request_id, -32602, "Invalid params"), False
316+
params = {str(k): v for k, v in params_obj.items()}
317+
318+
if method == "ping":
319+
return _jsonrpc_success(request_id, {"ok": True}), False
320+
if method == "shutdown":
321+
return _jsonrpc_success(request_id, {"ok": True}), True
322+
if method != "search":
323+
return _jsonrpc_error(request_id, -32601, f"Method not found: {method}"), False
324+
325+
response = search_func(
326+
project_root=_required_str(params, "project_root"),
327+
query=_required_str(params, "query"),
328+
languages=_optional_str_list(params, "languages"),
329+
paths=_optional_str_list(params, "paths"),
330+
repo_keys=_optional_str_list(params, "repo_keys"),
331+
limit=_positive_int_param(params, "limit", 10),
332+
offset=_non_negative_int_param(params, "offset", 0),
333+
)
334+
return _jsonrpc_success(request_id, search_response_json_payload(response)), False
335+
except ValueError as e:
336+
return _jsonrpc_error(request_id, -32602, str(e)), False
337+
except RuntimeError as e:
338+
return _jsonrpc_error(request_id, -32000, str(e)), False
339+
340+
341+
def run_jsonrpc_bridge(
342+
input_stream: TextIO = sys.stdin,
343+
output_stream: TextIO = sys.stdout,
344+
search_func: _SearchCallable | None = None,
345+
) -> None:
346+
"""Run the JSON-RPC bridge over newline-delimited stdin/stdout."""
347+
if search_func is None:
348+
from . import client as _client
349+
350+
search_func = _client.search
351+
352+
for line in input_stream:
353+
stripped = line.strip()
354+
if not stripped:
355+
continue
356+
try:
357+
request = _json.loads(stripped)
358+
except _json.JSONDecodeError:
359+
response = _jsonrpc_error(None, -32700, "Parse error")
360+
should_exit = False
361+
else:
362+
response, should_exit = handle_bridge_jsonrpc_request(request, search_func)
363+
output_stream.write(_json.dumps(response, separators=(",", ":")) + "\n")
364+
output_stream.flush()
365+
if should_exit:
366+
break
367+
368+
184369
def _run_index_with_progress(project_root: str) -> None:
185370
"""Run indexing with streaming progress display. Exits on failure."""
186371
from rich.console import Console as _Console
@@ -231,6 +416,7 @@ def _search_with_wait_spinner(
231416
query: str,
232417
languages: list[str] | None = None,
233418
paths: list[str] | None = None,
419+
repo_keys: list[str] | None = None,
234420
limit: int = 10,
235421
offset: int = 0,
236422
) -> SearchResponse:
@@ -256,6 +442,7 @@ def _on_waiting() -> None:
256442
query=query,
257443
languages=languages,
258444
paths=paths,
445+
repo_keys=repo_keys,
259446
limit=limit,
260447
offset=offset,
261448
on_waiting=_on_waiting,
@@ -549,9 +736,11 @@ def search(
549736
query: list[str] = _typer.Argument(..., help="Search query"),
550737
lang: list[str] = _typer.Option([], "--lang", help="Filter by language"),
551738
path: str | None = _typer.Option(None, "--path", help="Filter by file path glob"),
739+
repo_key: list[str] = _typer.Option([], "--repo-key", help="Filter by indexed repo key"),
552740
offset: int = _typer.Option(0, "--offset", help="Number of results to skip"),
553741
limit: int = _typer.Option(10, "--limit", help="Maximum results to return"),
554742
refresh: bool = _typer.Option(False, "--refresh", help="Refresh index before searching"),
743+
json_output: bool = _typer.Option(False, "--json", help="Print results as JSON"),
555744
) -> None:
556745
"""Semantic search across the codebase."""
557746
project_root = str(require_project_root())
@@ -574,10 +763,29 @@ def search(
574763
query=query_str,
575764
languages=lang or None,
576765
paths=paths,
766+
repo_keys=repo_key or None,
577767
limit=limit,
578768
offset=offset,
579769
)
580-
print_search_results(resp)
770+
if json_output:
771+
print_search_results_json(resp)
772+
else:
773+
print_search_results(resp)
774+
775+
776+
@app.command()
777+
def bridge(
778+
jsonrpc: bool = _typer.Option(
779+
False,
780+
"--jsonrpc",
781+
help="Run a JSON-RPC bridge over stdin/stdout",
782+
),
783+
) -> None:
784+
"""Run a long-lived bridge for external tools."""
785+
if not jsonrpc:
786+
_typer.echo("Error: pass --jsonrpc to select the bridge protocol.", err=True)
787+
raise _typer.Exit(code=1)
788+
run_jsonrpc_bridge()
581789

582790

583791
@app.command()

src/cocoindex_code/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ def search(
278278
query: str,
279279
languages: list[str] | None = None,
280280
paths: list[str] | None = None,
281+
repo_keys: list[str] | None = None,
281282
limit: int = 5,
282283
offset: int = 0,
283284
on_waiting: Callable[[], None] | None = None,
@@ -298,6 +299,7 @@ def search(
298299
query=query,
299300
languages=languages,
300301
paths=paths,
302+
repo_keys=repo_keys,
301303
limit=limit,
302304
offset=offset,
303305
)

src/cocoindex_code/daemon.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ async def _search_with_wait(
275275
query=req.query,
276276
languages=req.languages,
277277
paths=req.paths,
278+
repo_keys=req.repo_keys,
278279
limit=req.limit,
279280
offset=req.offset,
280281
)
@@ -488,6 +489,7 @@ async def _dispatch(
488489
query=req.query,
489490
languages=req.languages,
490491
paths=req.paths,
492+
repo_keys=req.repo_keys,
491493
limit=req.limit,
492494
offset=req.offset,
493495
)

src/cocoindex_code/indexer.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,22 @@
3333
splitter = RecursiveSplitter()
3434

3535

36+
def repo_key_for_path(file_path: PurePath, project_root: Path) -> str:
37+
"""Return the relative Git repo root for fast scoped search."""
38+
directory = file_path.parent
39+
while True:
40+
if (project_root / directory / ".git").exists():
41+
repo_key = directory.as_posix()
42+
return repo_key if repo_key != "." else "."
43+
44+
if directory in (PurePath("."), PurePath("")):
45+
break
46+
directory = directory.parent
47+
48+
parts = file_path.parts
49+
return parts[0] if len(parts) > 1 else "."
50+
51+
3652
def _normalize_gitignore_lines(lines: Iterable[str], directory: PurePath) -> list[str]:
3753
"""Normalize .gitignore lines to root-relative gitignore patterns."""
3854
if directory in (PurePath("."), PurePath("")):
@@ -151,8 +167,9 @@ async def process_file(
151167
if not content.strip():
152168
return
153169

154-
suffix = file.file_path.path.suffix
155170
project_root = coco.use_context(CODEBASE_DIR)
171+
suffix = file.file_path.path.suffix
172+
repo_key = repo_key_for_path(file.file_path.path, project_root)
156173
ps = load_project_settings(project_root)
157174
ext_lang_map = {f".{lo.ext}": lo.lang for lo in ps.language_overrides}
158175
language = (
@@ -183,6 +200,7 @@ async def process(chunk: Chunk) -> None:
183200
row=CodeChunk(
184201
id=await id_gen.next_id(chunk.text),
185202
file_path=file.file_path.path.as_posix(),
203+
repo_key=repo_key,
186204
language=language,
187205
content=chunk.text,
188206
start_line=chunk.start.line,
@@ -209,7 +227,7 @@ async def indexer_main() -> None:
209227
primary_key=["id"],
210228
),
211229
virtual_table_def=Vec0TableDef(
212-
partition_key_columns=["language"],
230+
partition_key_columns=["repo_key", "language"],
213231
auxiliary_columns=["file_path", "content", "start_line", "end_line"],
214232
),
215233
)

src/cocoindex_code/project.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ async def search(
179179
query: str,
180180
languages: list[str] | None = None,
181181
paths: list[str] | None = None,
182+
repo_keys: list[str] | None = None,
182183
limit: int = 5,
183184
offset: int = 0,
184185
) -> list[SearchResult]:
@@ -192,10 +193,12 @@ async def search(
192193
offset=offset,
193194
languages=languages,
194195
paths=paths,
196+
repo_keys=repo_keys,
195197
)
196198
return [
197199
SearchResult(
198200
file_path=r.file_path,
201+
repo_key=r.repo_key,
199202
language=r.language,
200203
content=r.content,
201204
start_line=r.start_line,

src/cocoindex_code/protocol.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class SearchRequest(_msgspec.Struct, tag="search"):
2222
query: str
2323
languages: list[str] | None = None
2424
paths: list[str] | None = None
25+
repo_keys: list[str] | None = None
2526
limit: int = 5
2627
offset: int = 0
2728

@@ -111,6 +112,7 @@ class SearchResult(_msgspec.Struct):
111112
start_line: int
112113
end_line: int
113114
score: float
115+
repo_key: str | None = None
114116

115117

116118
class SearchResponse(_msgspec.Struct, tag="search"):

0 commit comments

Comments
 (0)