Skip to content

Commit 0d614d8

Browse files
RecoDemoclaude
andcommitted
Add git-aware incremental re-indexing, bump to 0.3.0
Before every query the MCP server now checks git for changes (~1-2ms) and only re-parses modified/added/deleted files instead of rebuilding the entire index. Full rebuilds only happen on first startup or when >50% of files change (e.g. branch switches). New module: git_tracker.py (git diff/status change detection) New methods: ProjectIndexer.remove_file(), rebuild_graphs() New param: reindex_file(skip_graph_rebuild=True) for batching 24 new tests (unit + integration with real temp git repos) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3c258c7 commit 0d614d8

9 files changed

Lines changed: 621 additions & 8 deletions

File tree

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ A structural codebase indexer with an [MCP](https://modelcontextprotocol.io) ser
1414

1515
Indexes codebases by parsing source files into structural metadata -- functions, classes, imports, dependency graphs, and cross-file call chains -- then exposes 17 query tools via the Model Context Protocol, enabling Claude Code and other MCP clients to navigate codebases efficiently without reading entire files.
1616

17+
**Automatic incremental re-indexing:** In git repositories, the index stays up to date automatically. Before every query, the server checks `git diff` and `git status` (~1-2ms). If files changed, only those files are re-parsed and the dependency graph is rebuilt. No need to manually call `reindex` after edits, branch switches, or pulls.
18+
1719
## Language Support
1820

1921
| Language | Method | Extracts |
@@ -166,7 +168,7 @@ This ensures the AI reaches for surgical indexed queries first, which saves toke
166168
| `get_file_dependencies` | Files imported by a given file |
167169
| `get_file_dependents` | Files that import from a given file |
168170
| `search_codebase` | Regex search across all files (max 100 results) |
169-
| `reindex` | Re-index the project after file changes (MCP server only) |
171+
| `reindex` | Force full re-index (rarely needed — incremental updates happen automatically in git repos) |
170172

171173
## Benchmarks
172174

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "mcp-codebase-index"
7-
version = "0.2.2"
7+
version = "0.3.0"
88
description = "Structural codebase indexer with MCP server for AI-assisted development"
99
requires-python = ">=3.11"
1010
readme = "README.md"

src/mcp_codebase_index/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@
1818

1919
"""Structural codebase indexer with MCP server for AI-assisted development."""
2020

21-
__version__ = "0.2.2"
21+
__version__ = "0.3.0"
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# mcp-codebase-index - Structural codebase indexer with MCP server
2+
# Copyright (C) 2026 Michael Doyle
3+
#
4+
# This program is free software: you can redistribute it and/or modify
5+
# it under the terms of the GNU Affero General Public License as published by
6+
# the Free Software Foundation, either version 3 of the License, or
7+
# (at your option) any later version.
8+
#
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU Affero General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU Affero General Public License
15+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
16+
#
17+
# Commercial licensing available. See COMMERCIAL-LICENSE.md for details.
18+
19+
"""Git change detection for incremental re-indexing."""
20+
21+
from __future__ import annotations
22+
23+
import subprocess
24+
from dataclasses import dataclass, field
25+
26+
27+
@dataclass
28+
class GitChangeSet:
29+
"""Set of files changed since a given git ref."""
30+
31+
modified: list[str] = field(default_factory=list)
32+
added: list[str] = field(default_factory=list)
33+
deleted: list[str] = field(default_factory=list)
34+
35+
@property
36+
def is_empty(self) -> bool:
37+
return not self.modified and not self.added and not self.deleted
38+
39+
40+
def is_git_repo(root_path: str) -> bool:
41+
"""Check if the given path is inside a git work tree."""
42+
try:
43+
result = subprocess.run(
44+
["git", "rev-parse", "--is-inside-work-tree"],
45+
cwd=root_path,
46+
capture_output=True,
47+
text=True,
48+
timeout=10,
49+
)
50+
return result.returncode == 0 and result.stdout.strip() == "true"
51+
except (FileNotFoundError, subprocess.TimeoutExpired):
52+
return False
53+
54+
55+
def get_head_commit(root_path: str) -> str | None:
56+
"""Get the current HEAD commit hash."""
57+
try:
58+
result = subprocess.run(
59+
["git", "rev-parse", "HEAD"],
60+
cwd=root_path,
61+
capture_output=True,
62+
text=True,
63+
timeout=10,
64+
)
65+
if result.returncode == 0:
66+
return result.stdout.strip()
67+
return None
68+
except (FileNotFoundError, subprocess.TimeoutExpired):
69+
return None
70+
71+
72+
def get_changed_files(root_path: str, since_ref: str | None) -> GitChangeSet:
73+
"""Get files changed since a given git ref.
74+
75+
Combines committed changes (since_ref..HEAD), staged changes,
76+
unstaged changes, and untracked files into a single GitChangeSet.
77+
"""
78+
if since_ref is None:
79+
return GitChangeSet()
80+
81+
modified: set[str] = set()
82+
added: set[str] = set()
83+
deleted: set[str] = set()
84+
85+
# 1. Committed changes since the ref
86+
_parse_diff_output(root_path, ["git", "diff", "--name-status", since_ref, "HEAD"],
87+
modified, added, deleted)
88+
89+
# 2. Unstaged changes
90+
_parse_diff_output(root_path, ["git", "diff", "--name-status"],
91+
modified, added, deleted)
92+
93+
# 3. Staged changes
94+
_parse_diff_output(root_path, ["git", "diff", "--name-status", "--cached"],
95+
modified, added, deleted)
96+
97+
# 4. Untracked files
98+
try:
99+
result = subprocess.run(
100+
["git", "ls-files", "--others", "--exclude-standard"],
101+
cwd=root_path,
102+
capture_output=True,
103+
text=True,
104+
timeout=10,
105+
)
106+
if result.returncode == 0:
107+
for line in result.stdout.strip().splitlines():
108+
path = line.strip()
109+
if path:
110+
added.add(path)
111+
except (FileNotFoundError, subprocess.TimeoutExpired):
112+
pass
113+
114+
# Resolve overlaps: file in both added and deleted → modified
115+
overlap = added & deleted
116+
modified |= overlap
117+
added -= overlap
118+
deleted -= overlap
119+
120+
return GitChangeSet(
121+
modified=sorted(modified),
122+
added=sorted(added),
123+
deleted=sorted(deleted),
124+
)
125+
126+
127+
def _parse_diff_output(
128+
root_path: str,
129+
cmd: list[str],
130+
modified: set[str],
131+
added: set[str],
132+
deleted: set[str],
133+
) -> None:
134+
"""Parse git diff --name-status output into modified/added/deleted sets."""
135+
try:
136+
result = subprocess.run(
137+
cmd,
138+
cwd=root_path,
139+
capture_output=True,
140+
text=True,
141+
timeout=10,
142+
)
143+
if result.returncode != 0:
144+
return
145+
except (FileNotFoundError, subprocess.TimeoutExpired):
146+
return
147+
148+
for line in result.stdout.strip().splitlines():
149+
parts = line.split("\t")
150+
if len(parts) < 2:
151+
continue
152+
status = parts[0]
153+
path = parts[1]
154+
155+
if status == "M":
156+
modified.add(path)
157+
elif status == "A":
158+
added.add(path)
159+
elif status == "D":
160+
deleted.add(path)
161+
elif status.startswith("R"):
162+
# Rename: delete old path, add new path
163+
deleted.add(path)
164+
if len(parts) >= 3:
165+
added.add(parts[2])

src/mcp_codebase_index/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,6 @@ class ProjectIndex:
126126
total_classes: int = 0
127127
index_build_time_seconds: float = 0.0
128128
index_memory_bytes: int = 0
129+
130+
# Git tracking
131+
last_indexed_git_ref: str | None = None

src/mcp_codebase_index/project_indexer.py

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,14 @@ def index(self) -> ProjectIndex:
161161

162162
return self._project_index
163163

164-
def reindex_file(self, file_path: str) -> None:
164+
def reindex_file(self, file_path: str, skip_graph_rebuild: bool = False) -> None:
165165
"""Re-index a single file. Updates the existing ProjectIndex in place.
166166
167167
Args:
168168
file_path: Path to the file (absolute or relative to root_path).
169+
skip_graph_rebuild: If True, skip rebuilding cross-file graphs.
170+
Use when batching multiple reindex calls, then call
171+
rebuild_graphs() once at the end.
169172
"""
170173
if self._project_index is None:
171174
raise RuntimeError("Cannot reindex before initial index() call.")
@@ -240,10 +243,78 @@ def reindex_file(self, file_path: str) -> None:
240243
else:
241244
idx.import_graph.pop(rel_path, None)
242245

243-
# Rebuild reverse import graph
244-
idx.reverse_import_graph = self._build_reverse_graph(idx.import_graph)
246+
if not skip_graph_rebuild:
247+
# Rebuild reverse import graph
248+
idx.reverse_import_graph = self._build_reverse_graph(idx.import_graph)
249+
250+
# Rebuild global dependency graphs (full rebuild is simplest for correctness)
251+
idx.global_dependency_graph = self._build_global_dependency_graph(
252+
idx.files, idx.symbol_table
253+
)
254+
idx.reverse_dependency_graph = self._build_reverse_graph(
255+
idx.global_dependency_graph
256+
)
257+
258+
def remove_file(self, file_path: str) -> None:
259+
"""Remove a file from the index. Does NOT rebuild cross-file graphs.
260+
261+
Call rebuild_graphs() after batching multiple remove/reindex operations.
245262
246-
# Rebuild global dependency graphs (full rebuild is simplest for correctness)
263+
Args:
264+
file_path: Path to the file (absolute or relative to root_path).
265+
"""
266+
if self._project_index is None:
267+
raise RuntimeError("Cannot remove_file before initial index() call.")
268+
269+
# Normalize to relative path
270+
abs_path = (
271+
os.path.abspath(file_path)
272+
if os.path.isabs(file_path)
273+
else os.path.join(self.root_path, file_path)
274+
)
275+
rel_path = os.path.relpath(abs_path, self.root_path)
276+
277+
idx = self._project_index
278+
old_metadata = idx.files.get(rel_path)
279+
if old_metadata is None:
280+
return
281+
282+
# Remove old symbols from symbol table
283+
for func in old_metadata.functions:
284+
if idx.symbol_table.get(func.qualified_name) == rel_path:
285+
del idx.symbol_table[func.qualified_name]
286+
if idx.symbol_table.get(func.name) == rel_path:
287+
del idx.symbol_table[func.name]
288+
for cls in old_metadata.classes:
289+
if idx.symbol_table.get(cls.name) == rel_path:
290+
del idx.symbol_table[cls.name]
291+
292+
# Remove from import graphs
293+
idx.import_graph.pop(rel_path, None)
294+
for targets in idx.reverse_import_graph.values():
295+
targets.discard(rel_path)
296+
297+
# Update stats
298+
idx.total_lines -= old_metadata.total_lines
299+
idx.total_functions -= len(old_metadata.functions)
300+
idx.total_classes -= len(old_metadata.classes)
301+
302+
# Remove the file entry
303+
del idx.files[rel_path]
304+
idx.total_files = len(idx.files)
305+
306+
def rebuild_graphs(self) -> None:
307+
"""Rebuild all cross-file graphs from current file data.
308+
309+
Call after batching multiple remove_file() / reindex_file(skip_graph_rebuild=True)
310+
operations.
311+
"""
312+
if self._project_index is None:
313+
raise RuntimeError("Cannot rebuild_graphs before initial index() call.")
314+
315+
idx = self._project_index
316+
idx.import_graph = self._build_import_graph(idx.files)
317+
idx.reverse_import_graph = self._build_reverse_graph(idx.import_graph)
247318
idx.global_dependency_graph = self._build_global_dependency_graph(
248319
idx.files, idx.symbol_table
249320
)

0 commit comments

Comments
 (0)