From 1cb471b74a4338bba7da15779cc6c98a54f501e6 Mon Sep 17 00:00:00 2001 From: RaghavChamadiya Date: Mon, 13 Apr 2026 14:13:00 +0530 Subject: [PATCH 1/2] feat: improve PreToolUse hook relevance with multi-signal search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace FTS-only file retrieval with a 3-signal ranking system: - Symbol name match (weight 2.0) — most precise - File path match (weight 1.5) — catches path-based searches - FTS on wiki content (weight 1.0) — broadest, lowest priority Files ranked by signal score then PageRank, top 3 returned. Remove git signals (HOTSPOT, bus-factor, owner) from enrichment — that info belongs in get_risk, not every search. Remove Bash command interception (fragile regex on grep/rg commands). Keep: symbols (3), importers (3), dependencies (2) per file. --- .../src/repowise/cli/commands/augment_cmd.py | 271 ++++++++---------- packages/cli/src/repowise/cli/mcp_config.py | 2 +- 2 files changed, 120 insertions(+), 153 deletions(-) diff --git a/packages/cli/src/repowise/cli/commands/augment_cmd.py b/packages/cli/src/repowise/cli/commands/augment_cmd.py index 4463db7..3915b05 100644 --- a/packages/cli/src/repowise/cli/commands/augment_cmd.py +++ b/packages/cli/src/repowise/cli/commands/augment_cmd.py @@ -1,11 +1,13 @@ """``repowise augment`` — hook-driven context enrichment for AI coding agents. Reads a Claude Code hook payload from stdin (JSON), queries the local wiki.db -for graph context (importers, dependencies, symbols), and writes enriched -context back to stdout as the hook response. +for graph context (importers, symbols), and writes enriched context back to +stdout as the hook response. -Also handles PostToolUse events: detects git commits and notifies the agent -when the wiki is stale. +PreToolUse: enriches Grep/Glob calls with up to 3 related files, their key +symbols, importers, and dependencies. + +PostToolUse: detects git commits and notifies the agent when the wiki is stale. Design goals: - Cold start < 500ms (lazy imports, minimal work) @@ -49,7 +51,7 @@ def _run_augment() -> None: tool_input = payload.get("tool_input", {}) cwd = payload.get("cwd", "") - if event == "PreToolUse" and tool_name in ("Grep", "Glob", "Bash"): + if event == "PreToolUse" and tool_name in ("Grep", "Glob"): result = _handle_pre_tool_use(tool_name, tool_input, cwd) if result: _emit_response(event, result) @@ -80,22 +82,8 @@ def _emit_response(event: str, context: str) -> None: def _extract_search_pattern(tool_name: str, tool_input: dict) -> str | None: """Extract the search pattern from the tool input.""" - if tool_name == "Grep": - return tool_input.get("pattern") - if tool_name == "Glob": + if tool_name in ("Grep", "Glob"): return tool_input.get("pattern") - if tool_name == "Bash": - cmd = tool_input.get("command", "") - # Only augment grep/rg/find commands, not arbitrary bash - import re - - m = re.search(r"\b(?:grep|rg|find|ag|ack)\b.*?['\"]([^'\"]+)['\"]", cmd) - if m: - return m.group(1) - # Also match: grep pattern (unquoted, first non-flag arg) - m = re.search(r"\b(?:grep|rg)\s+(?:-\S+\s+)*(\S+)", cmd) - if m and not m.group(1).startswith("-"): - return m.group(1) return None @@ -117,14 +105,30 @@ def _handle_pre_tool_use(tool_name: str, tool_input: dict, cwd: str) -> str | No async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: - """Run FTS + graph queries and format the enrichment context.""" - from pathlib import Path as _Path + """Multi-signal search + graph enrichment. + + Phase 1 — find relevant files via three signals (merged, deduped): + a) Symbol name match (wiki_symbols.name LIKE pattern) + b) File path match (graph_nodes.node_id LIKE pattern) + c) FTS on wiki page content (fallback for conceptual queries) + Results ranked by how many signals matched, then by PageRank. + + Phase 2 — enrich top 3 files with symbols, importers, dependencies. + """ + import re from repowise.core.persistence import ( FullTextSearch, + GraphEdge, + GraphNode, + WikiSymbol, create_engine, + create_session_factory, + get_session, ) + from repowise.core.persistence.crud import get_repository_by_path from repowise.core.persistence.database import resolve_db_url + from sqlalchemy import select db_path = repo_path / ".repowise" / "wiki.db" if not db_path.exists(): @@ -134,62 +138,6 @@ async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: engine = create_engine(url) try: - # Phase 1: FTS search to find relevant files - fts = FullTextSearch(engine) - try: - results = await fts.search(pattern, limit=5) - except Exception: - results = [] - - if not results: - # Fallback: try matching graph nodes by path pattern - results = await _search_nodes_by_path(engine, repo_path, pattern) - if not results: - return None - - # Collect file paths from search results — prefer file-level pages - file_paths = [] - for r in results: - target = getattr(r, "target_path", None) or getattr(r, "node_id", None) - if not target: - continue - # Skip non-file pages — they don't map to graph nodes - page_type = getattr(r, "page_type", "") - if page_type and page_type not in ("file", "file_page", "infra_page", "api_contract"): - continue - # Symbol spotlight pages have target_path like "file.py::SymbolName" - if "::" in target: - target = target.split("::")[0] - file_paths.append(target) - - # Deduplicate while preserving order - seen: set[str] = set() - unique_paths = [] - for fp in file_paths: - if fp not in seen: - seen.add(fp) - unique_paths.append(fp) - file_paths = unique_paths[:5] - - if not file_paths: - # All FTS results were module-level — try path-based fallback - results = await _search_nodes_by_path(engine, repo_path, pattern) - file_paths = [r.node_id for r in results if hasattr(r, "node_id")][:5] - - if not file_paths: - return None - - # Phase 2: Batch graph queries for importers, dependencies, and symbols - from repowise.core.persistence import ( - GraphEdge, - GraphNode, - WikiSymbol, - create_session_factory, - get_session, - ) - from repowise.core.persistence.crud import get_repository_by_path - from sqlalchemy import select - sf = create_session_factory(engine) async with get_session(sf) as session: repo = await get_repository_by_path(session, str(repo_path)) @@ -197,10 +145,94 @@ async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: return None repo_id = repo.id - # Importers: who imports these files? (limit 3 per file) + # Clean pattern for SQL LIKE — strip regex syntax + clean = re.sub(r"[^\w/._-]", "", pattern) + + # Track how many signals each file matched + its PageRank + file_scores: dict[str, float] = {} # path -> score + file_ranks: dict[str, float] = {} # path -> pagerank + + # Signal 1: symbol name match — most precise + if clean: + sym_stmt = ( + select(WikiSymbol.file_path) + .where( + WikiSymbol.repository_id == repo_id, + WikiSymbol.name.like(f"%{clean}%"), + ) + .distinct() + .limit(5) + ) + sym_result = await session.execute(sym_stmt) + for (fp,) in sym_result.all(): + file_scores[fp] = file_scores.get(fp, 0) + 2.0 + + # Signal 2: file path match + if clean: + path_stmt = ( + select(GraphNode.node_id, GraphNode.pagerank) + .where( + GraphNode.repository_id == repo_id, + GraphNode.node_type == "file", + GraphNode.node_id.like(f"%{clean}%"), + ) + .order_by(GraphNode.pagerank.desc()) + .limit(5) + ) + path_result = await session.execute(path_stmt) + for node_id, pr in path_result.all(): + file_scores[node_id] = file_scores.get(node_id, 0) + 1.5 + file_ranks[node_id] = pr + + # Signal 3: FTS on wiki content — broadest, lowest weight + fts = FullTextSearch(engine) + try: + fts_results = await fts.search(pattern, limit=5) + except Exception: + fts_results = [] + + for r in fts_results: + target = getattr(r, "target_path", None) or "" + page_type = getattr(r, "page_type", "") + if page_type and page_type not in ( + "file", "file_page", "infra_page", "api_contract", + ): + continue + if "::" in target: + target = target.split("::")[0] + if target: + file_scores[target] = file_scores.get(target, 0) + 1.0 + + if not file_scores: + return None + + # Fetch PageRank for files we don't have it for yet + missing_pr = [fp for fp in file_scores if fp not in file_ranks] + if missing_pr: + pr_stmt = select(GraphNode.node_id, GraphNode.pagerank).where( + GraphNode.repository_id == repo_id, + GraphNode.node_type == "file", + GraphNode.node_id.in_(missing_pr), + ) + pr_result = await session.execute(pr_stmt) + for node_id, pr in pr_result.all(): + file_ranks[node_id] = pr + + # Rank: primary by signal score, secondary by PageRank + ranked = sorted( + file_scores.keys(), + key=lambda fp: (file_scores[fp], file_ranks.get(fp, 0)), + reverse=True, + ) + file_paths = ranked[:3] + + # Phase 2: enrich with symbols, importers, dependencies + + # Importers: who uses these files? (limit 3 per file) importers_stmt = select(GraphEdge).where( GraphEdge.repository_id == repo_id, GraphEdge.target_node_id.in_(file_paths), + GraphEdge.edge_type == "imports", ) importers_result = await session.execute(importers_stmt) importers_by_file: dict[str, list[str]] = {} @@ -209,19 +241,20 @@ async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: if len(lst) < 3: lst.append(edge.source_node_id) - # Dependencies: what do these files import? + # Dependencies: what does this file use? (limit 2 per file) deps_stmt = select(GraphEdge).where( GraphEdge.repository_id == repo_id, GraphEdge.source_node_id.in_(file_paths), + GraphEdge.edge_type == "imports", ) deps_result = await session.execute(deps_stmt) deps_by_file: dict[str, list[str]] = {} for edge in deps_result.scalars().all(): lst = deps_by_file.setdefault(edge.source_node_id, []) - if len(lst) < 3: + if len(lst) < 2: lst.append(edge.target_node_id) - # Symbols: key symbols in these files + # Symbols: key symbols (limit 3 per file) symbols_stmt = ( select(WikiSymbol) .where( @@ -234,58 +267,27 @@ async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: symbols_by_file: dict[str, list] = {} for sym in symbols_result.scalars().all(): lst = symbols_by_file.setdefault(sym.file_path, []) - if len(lst) < 5: + if len(lst) < 3: lst.append(sym) - # Hotspot info (best-effort — DB schema may not have all columns) - git_by_file: dict = {} - try: - from repowise.core.persistence.models import GitMetadata - - git_stmt = select(GitMetadata).where( - GitMetadata.repository_id == repo_id, - GitMetadata.file_path.in_(file_paths), - ) - git_result = await session.execute(git_stmt) - for gm in git_result.scalars().all(): - git_by_file[gm.file_path] = gm - except Exception: - pass # git metadata is optional enrichment - - # Phase 3: Format the enrichment text + # Phase 3: Format lines = [f"[repowise] {len(file_paths)} related file(s) found:\n"] - for fp in file_paths[:5]: + for fp in file_paths: lines.append(f" {fp}") - # Symbols syms = symbols_by_file.get(fp, []) if syms: sym_strs = [f"{s.kind}:{s.name}" for s in syms] lines.append(f" Symbols: {', '.join(sym_strs)}") - # Importers imps = importers_by_file.get(fp, []) if imps: lines.append(f" Imported by: {', '.join(imps)}") - # Dependencies deps = deps_by_file.get(fp, []) if deps: - lines.append(f" Depends on: {', '.join(deps)}") - - # Git signals - gm = git_by_file.get(fp) - if gm: - signals = [] - if gm.is_hotspot: - signals.append("HOTSPOT") - if gm.bus_factor and gm.bus_factor <= 1: - signals.append(f"bus-factor={gm.bus_factor}") - if gm.primary_owner_name: - signals.append(f"owner={gm.primary_owner_name}") - if signals: - lines.append(f" Git: {', '.join(signals)}") + lines.append(f" Uses: {', '.join(deps)}") lines.append("") @@ -295,41 +297,6 @@ async def _query_graph_context(repo_path: "Path", pattern: str) -> str | None: await engine.dispose() -async def _search_nodes_by_path(engine, repo_path: "Path", pattern: str) -> list: - """Fallback: search GraphNode.node_id by LIKE pattern.""" - from repowise.core.persistence import ( - GraphNode, - create_session_factory, - get_session, - ) - from repowise.core.persistence.crud import get_repository_by_path - from sqlalchemy import select - - sf = create_session_factory(engine) - async with get_session(sf) as session: - repo = await get_repository_by_path(session, str(repo_path)) - if repo is None: - return [] - # Clean pattern for LIKE query: strip regex chars, use as substring - import re - - clean = re.sub(r"[^\w/._-]", "", pattern) - if not clean: - return [] - - stmt = ( - select(GraphNode) - .where( - GraphNode.repository_id == repo.id, - GraphNode.node_id.like(f"%{clean}%"), - ) - .order_by(GraphNode.pagerank.desc()) - .limit(5) - ) - result = await session.execute(stmt) - return list(result.scalars().all()) - - # --------------------------------------------------------------------------- # PostToolUse — detect git commits and flag stale wiki # --------------------------------------------------------------------------- diff --git a/packages/cli/src/repowise/cli/mcp_config.py b/packages/cli/src/repowise/cli/mcp_config.py index 685fa34..6d2944f 100644 --- a/packages/cli/src/repowise/cli/mcp_config.py +++ b/packages/cli/src/repowise/cli/mcp_config.py @@ -135,7 +135,7 @@ def install_claude_code_hooks() -> Path | None: """Register PreToolUse and PostToolUse hooks in ~/.claude/settings.json. PreToolUse: enriches Grep/Glob searches with graph context (importers, - dependencies, symbols) from the local wiki.db. + symbols) from the local wiki.db. PostToolUse: detects git commits and notifies the agent when the wiki is stale. From c546d7b2966cff949bcf50dd07f3d9c8c4a94d29 Mon Sep 17 00:00:00 2001 From: RaghavChamadiya Date: Sun, 26 Apr 2026 16:38:09 +0530 Subject: [PATCH 2/2] release: v0.3.1 Bumps repowise to 0.3.1 across pyproject.toml and the three sub-package __init__.py files. Highlights since 0.3.0: - Output language support for generated wiki content (#99) - Luau / Roblox language support (#89) - OpenRouter LLM and embedding provider (#56) - base_url plus per-provider env vars for OpenAI / Anthropic / Gemini / Ollama / LiteLLM (#85) - SQLite WAL plus busy_timeout plus FK constraints, fixing concurrent 'repowise update' database is locked errors (#101) - CLAUDE.md opt-out prompt now asked in both full and advanced modes and the answer is honoured (#102) - repowise init no longer silently overwrites unparseable user JSON configs (#94) - pyproject packages list resynced with the language-support refactor so editable installs and CI build cleanly (#97) - uv workflow documented and dev deps migrated to PEP 735 dependency-groups, silencing the tool.uv.dev-dependencies deprecation warning (#100) - Five Dependabot security bumps (dompurify, gitpython, mako, litellm, python-multipart) Also flips the project URLs and serve_cmd's _GITHUB_REPO constant from RaghavChamadiya/repowise to repowise-dev/repowise so 'repowise serve' can locate the published web UI tarball. --- .github/CONTRIBUTING.md | 4 ++-- docs/QUICKSTART.md | 2 +- packages/cli/src/repowise/cli/__init__.py | 2 +- packages/cli/src/repowise/cli/commands/serve_cmd.py | 2 +- packages/core/src/repowise/core/__init__.py | 2 +- packages/server/src/repowise/server/__init__.py | 2 +- pyproject.toml | 10 +++++----- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index e5fb849..e92ace9 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,7 +15,7 @@ Thanks for your interest in contributing to Repowise! This guide will help you g ```bash # Clone the repo -git clone https://github.com/RaghavChamadiya/repowise.git +git clone https://github.com/repowise-dev/repowise.git cd repowise # Install Python dependencies @@ -94,7 +94,7 @@ repowise/ ## Reporting Issues -- Use [GitHub Issues](https://github.com/RaghavChamadiya/repowise/issues) for bugs and feature requests +- Use [GitHub Issues](https://github.com/repowise-dev/repowise/issues) for bugs and feature requests - For security vulnerabilities, see [SECURITY.md](SECURITY.md) ## License diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 18d0adf..5756bd1 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -156,7 +156,7 @@ docker run -p 7337:7337 -p 3000:3000 \ ### From source (for development) ```bash -git clone https://github.com/RaghavChamadiya/repowise.git +git clone https://github.com/repowise-dev/repowise.git cd repowise && npm install # Terminal 1: API diff --git a/packages/cli/src/repowise/cli/__init__.py b/packages/cli/src/repowise/cli/__init__.py index 9283444..1b3c4b9 100644 --- a/packages/cli/src/repowise/cli/__init__.py +++ b/packages/cli/src/repowise/cli/__init__.py @@ -6,4 +6,4 @@ AI-generated documentation. """ -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/packages/cli/src/repowise/cli/commands/serve_cmd.py b/packages/cli/src/repowise/cli/commands/serve_cmd.py index 9da2e3c..f10274b 100644 --- a/packages/cli/src/repowise/cli/commands/serve_cmd.py +++ b/packages/cli/src/repowise/cli/commands/serve_cmd.py @@ -155,7 +155,7 @@ def _save_global_embedder(embedder: str, api_key: str) -> None: pass # Non-fatal — user just gets prompted again next time. -_GITHUB_REPO = "RaghavChamadiya/repowise" +_GITHUB_REPO = "repowise-dev/repowise" _WEB_CACHE_DIR = Path.home() / ".repowise" / "web" _MARKER_FILE = _WEB_CACHE_DIR / ".version" diff --git a/packages/core/src/repowise/core/__init__.py b/packages/core/src/repowise/core/__init__.py index 5120e79..60ec9c6 100644 --- a/packages/core/src/repowise/core/__init__.py +++ b/packages/core/src/repowise/core/__init__.py @@ -6,4 +6,4 @@ Namespace package: repowise.core is part of the repowise namespace. """ -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/packages/server/src/repowise/server/__init__.py b/packages/server/src/repowise/server/__init__.py index af5ac95..918e31e 100644 --- a/packages/server/src/repowise/server/__init__.py +++ b/packages/server/src/repowise/server/__init__.py @@ -7,4 +7,4 @@ - Background job scheduler (APScheduler) """ -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/pyproject.toml b/pyproject.toml index e798110..ef8208d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "repowise" -version = "0.3.0" +version = "0.3.1" description = "Codebase intelligence for developers and AI — generates and maintains a structured wiki for any codebase" readme = "README.md" requires-python = ">=3.11" @@ -113,10 +113,10 @@ dev = [ repowise = "repowise.cli.main:cli" [project.urls] -Homepage = "https://github.com/RaghavChamadiya/repowise" -Repository = "https://github.com/RaghavChamadiya/repowise" -Issues = "https://github.com/RaghavChamadiya/repowise/issues" -Documentation = "https://github.com/RaghavChamadiya/repowise/blob/main/docs/USER_GUIDE.md" +Homepage = "https://github.com/repowise-dev/repowise" +Repository = "https://github.com/repowise-dev/repowise" +Issues = "https://github.com/repowise-dev/repowise/issues" +Documentation = "https://github.com/repowise-dev/repowise/blob/main/docs/USER_GUIDE.md" # --------------------------------------------------------------------------- # setuptools — explicit package-dir mapping across three src/ directories