Skip to content

Commit 10fa244

Browse files
committed
Merge scaling phases 2-12 into main
Completes the Scaling + Token Efficiency initiative (Phases 2-12). Phase 1 was already merged via PR #115.
2 parents bdc681e + 9b22f83 commit 10fa244

31 files changed

Lines changed: 3613 additions & 284 deletions

CLAUDE.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44

55
**code-review-graph** is a persistent, incrementally-updated knowledge graph for token-efficient code reviews with Claude Code. It parses codebases using Tree-sitter, builds a structural graph in SQLite, and exposes it via MCP tools and prompts.
66

7+
## Graph Tool Usage (Token-Efficient)
8+
When using code-review-graph MCP tools, follow these rules:
9+
1. First call: `get_minimal_context(task="<description>")` — costs ~100 tokens, gives you the full picture.
10+
2. All subsequent calls: use `detail_level="minimal"` unless you need more.
11+
3. Prefer `query_graph` with a specific target over broad `list_*` calls.
12+
4. The `next_tool_suggestions` field in every response tells you the optimal next step.
13+
5. Target: ≤5 tool calls per task, ≤800 total tokens of graph context.
14+
715
## Architecture
816

917
- **Core Package**: `code_review_graph/` (Python 3.10+)

code_review_graph/cli.py

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def _print_banner() -> None:
8686
{g}register{r} Register a repository in the multi-repo registry
8787
{g}unregister{r} Remove a repository from the registry
8888
{g}repos{r} List registered repositories
89+
{g}postprocess{r} Run post-processing {d}(flows, communities, FTS){r}
8990
{g}eval{r} Run evaluation benchmarks
9091
{g}serve{r} Start MCP server
9192
@@ -226,11 +227,37 @@ def main() -> None:
226227
# build
227228
build_cmd = sub.add_parser("build", help="Full graph build (re-parse all files)")
228229
build_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
230+
build_cmd.add_argument(
231+
"--skip-flows", action="store_true",
232+
help="Skip flow/community detection (signatures + FTS only)",
233+
)
234+
build_cmd.add_argument(
235+
"--skip-postprocess", action="store_true",
236+
help="Skip all post-processing (raw parse only)",
237+
)
229238

230239
# update
231240
update_cmd = sub.add_parser("update", help="Incremental update (only changed files)")
232241
update_cmd.add_argument("--base", default="HEAD~1", help="Git diff base (default: HEAD~1)")
233242
update_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
243+
update_cmd.add_argument(
244+
"--skip-flows", action="store_true",
245+
help="Skip flow/community detection (signatures + FTS only)",
246+
)
247+
update_cmd.add_argument(
248+
"--skip-postprocess", action="store_true",
249+
help="Skip all post-processing (raw parse only)",
250+
)
251+
252+
# postprocess
253+
pp_cmd = sub.add_parser(
254+
"postprocess",
255+
help="Run post-processing on existing graph (flows, communities, FTS)",
256+
)
257+
pp_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
258+
pp_cmd.add_argument("--no-flows", action="store_true", help="Skip flow detection")
259+
pp_cmd.add_argument("--no-communities", action="store_true", help="Skip community detection")
260+
pp_cmd.add_argument("--no-fts", action="store_true", help="Skip FTS rebuild")
234261

235262
# watch
236263
watch_cmd = sub.add_parser("watch", help="Watch for changes and auto-update")
@@ -243,6 +270,12 @@ def main() -> None:
243270
# visualize
244271
vis_cmd = sub.add_parser("visualize", help="Generate interactive HTML graph visualization")
245272
vis_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
273+
vis_cmd.add_argument(
274+
"--mode",
275+
choices=["auto", "full", "community", "file"],
276+
default="auto",
277+
help="Rendering mode: auto (default), full, community, or file",
278+
)
246279
vis_cmd.add_argument(
247280
"--serve", action="store_true",
248281
help="Start a local HTTP server to view the visualization (localhost:8765)",
@@ -402,6 +435,30 @@ def main() -> None:
402435
watch,
403436
)
404437

438+
if args.command == "postprocess":
439+
repo_root = Path(args.repo) if args.repo else find_project_root()
440+
db_path = get_db_path(repo_root)
441+
store = GraphStore(db_path)
442+
try:
443+
from .tools.build import run_postprocess
444+
result = run_postprocess(
445+
flows=not getattr(args, "no_flows", False),
446+
communities=not getattr(args, "no_communities", False),
447+
fts=not getattr(args, "no_fts", False),
448+
repo_root=str(repo_root),
449+
)
450+
parts = []
451+
if result.get("flows_detected"):
452+
parts.append(f"{result['flows_detected']} flows")
453+
if result.get("communities_detected"):
454+
parts.append(f"{result['communities_detected']} communities")
455+
if result.get("fts_indexed"):
456+
parts.append(f"{result['fts_indexed']} FTS entries")
457+
print(f"Post-processing: {', '.join(parts) or 'done'}")
458+
finally:
459+
store.close()
460+
return
461+
405462
if args.command in ("update", "detect-changes"):
406463
# update and detect-changes require git for diffing
407464
repo_root = Path(args.repo) if args.repo else find_repo_root()
@@ -420,19 +477,40 @@ def main() -> None:
420477

421478
try:
422479
if args.command == "build":
423-
result = full_build(repo_root, store)
480+
pp = "none" if getattr(args, "skip_postprocess", False) else (
481+
"minimal" if getattr(args, "skip_flows", False) else "full"
482+
)
483+
from .tools.build import build_or_update_graph
484+
result = build_or_update_graph(
485+
full_rebuild=True, repo_root=str(repo_root), postprocess=pp,
486+
)
487+
parsed = result.get("files_parsed", 0)
488+
nodes = result.get("total_nodes", 0)
489+
edges = result.get("total_edges", 0)
424490
print(
425-
f"Full build: {result['files_parsed']} files, "
426-
f"{result['total_nodes']} nodes, {result['total_edges']} edges"
491+
f"Full build: {parsed} files, "
492+
f"{nodes} nodes, {edges} edges"
493+
f" (postprocess={pp})"
427494
)
428-
if result["errors"]:
495+
if result.get("errors"):
429496
print(f"Errors: {len(result['errors'])}")
430497

431498
elif args.command == "update":
432-
result = incremental_update(repo_root, store, base=args.base)
499+
pp = "none" if getattr(args, "skip_postprocess", False) else (
500+
"minimal" if getattr(args, "skip_flows", False) else "full"
501+
)
502+
from .tools.build import build_or_update_graph
503+
result = build_or_update_graph(
504+
full_rebuild=False, repo_root=str(repo_root),
505+
base=args.base, postprocess=pp,
506+
)
507+
updated = result.get("files_updated", 0)
508+
nodes = result.get("total_nodes", 0)
509+
edges = result.get("total_edges", 0)
433510
print(
434-
f"Incremental: {result['files_updated']} files updated, "
435-
f"{result['total_nodes']} nodes, {result['total_edges']} edges"
511+
f"Incremental: {updated} files updated, "
512+
f"{nodes} nodes, {edges} edges"
513+
f" (postprocess={pp})"
436514
)
437515

438516
elif args.command == "status":
@@ -464,8 +542,9 @@ def main() -> None:
464542
elif args.command == "visualize":
465543
from .visualization import generate_html
466544
html_path = repo_root / ".code-review-graph" / "graph.html"
467-
generate_html(store, html_path)
468-
print(f"Visualization: {html_path}")
545+
vis_mode = getattr(args, "mode", "auto") or "auto"
546+
generate_html(store, html_path, mode=vis_mode)
547+
print(f"Visualization ({vis_mode}): {html_path}")
469548
if getattr(args, "serve", False):
470549
import functools
471550
import http.server

code_review_graph/communities.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,46 @@ def detect_communities(
433433
return results
434434

435435

436+
def incremental_detect_communities(
437+
store: GraphStore,
438+
changed_files: list[str],
439+
min_size: int = 2,
440+
) -> int:
441+
"""Re-detect communities only if changed files affect existing communities.
442+
443+
If no existing communities contain nodes from changed files, skips
444+
re-detection entirely (the common case for small changes). Otherwise
445+
re-runs full community detection.
446+
447+
Args:
448+
store: The GraphStore instance.
449+
changed_files: List of file paths that have changed.
450+
min_size: Minimum number of nodes for a community to be included.
451+
452+
Returns:
453+
Number of communities detected, or 0 if skipped.
454+
"""
455+
if not changed_files:
456+
return 0
457+
458+
conn = store._conn
459+
460+
# Check if any communities are affected
461+
placeholders = ",".join("?" * len(changed_files))
462+
affected = conn.execute(
463+
f"SELECT COUNT(DISTINCT community_id) FROM nodes " # nosec B608
464+
f"WHERE community_id IS NOT NULL AND file_path IN ({placeholders})",
465+
changed_files,
466+
).fetchone()
467+
468+
if not affected or affected[0] == 0:
469+
return 0 # No communities affected, skip
470+
471+
# Re-run full community detection (correct and fast enough)
472+
communities = detect_communities(store, min_size=min_size)
473+
return store_communities(store, communities)
474+
475+
436476
def store_communities(
437477
store: GraphStore, communities: list[dict[str, Any]]
438478
) -> int:

code_review_graph/constants.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,22 @@
22

33
from __future__ import annotations
44

5+
import os
6+
57
SECURITY_KEYWORDS: frozenset[str] = frozenset({
68
"auth", "login", "password", "token", "session", "crypt", "secret",
79
"credential", "permission", "sql", "query", "execute", "connect",
810
"socket", "request", "http", "sanitize", "validate", "encrypt",
911
"decrypt", "hash", "sign", "verify", "admin", "privilege",
1012
})
13+
14+
# ---------------------------------------------------------------------------
15+
# Configurable limits (override via environment variables)
16+
# ---------------------------------------------------------------------------
17+
MAX_IMPACT_NODES = int(os.environ.get("CRG_MAX_IMPACT_NODES", "500"))
18+
MAX_IMPACT_DEPTH = int(os.environ.get("CRG_MAX_IMPACT_DEPTH", "2"))
19+
MAX_BFS_DEPTH = int(os.environ.get("CRG_MAX_BFS_DEPTH", "15"))
20+
MAX_SEARCH_RESULTS = int(os.environ.get("CRG_MAX_SEARCH_RESULTS", "20"))
21+
22+
# BFS engine: "sql" (SQLite recursive CTE) or "networkx" (Python-side BFS)
23+
BFS_ENGINE = os.environ.get("CRG_BFS_ENGINE", "sql")

0 commit comments

Comments
 (0)