Skip to content

Commit 0cbbc54

Browse files
cdeustclaude
andcommitted
release: v3.14.9 — ingest_codebase: no caps + Rust-style qn fallback
Fix three bugs that collapsed the upstream graph projection from 197k nodes / 95k edges to 98 memories / 98 entities / 3 edges on the Cortex codebase: - Remove hardcoded `top_symbols=50` / `top_processes=10` from the FastMCP wrapper. The schema documented `null = unlimited` but the wrapper signature always passed 50/10. Public tool now has no caps. - Decouple `fetch_files` from the symbol cap. Files were truncated to the same slice, so the File→symbol containment join collapsed to near-zero edges. Files now pulled unconditionally. - Rewrite `file_path_from_qn` to handle Rust-style `a::b::c::sym` qualified names (used by first-party Python in this codebase). Previously returned the head segment as a path, which was never a real file. Now returns priority-ordered candidates covering <file.py>::<sym>, <dotted.module>::<sym>, and Rust-style module paths; caller picks the first present in known_files. Diagnostic message also corrected — no longer blames "non-Python indexer" when the real cause is qn-format mismatch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2f525ab commit 0cbbc54

7 files changed

Lines changed: 127 additions & 52 deletions

File tree

.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "cortex",
33
"description": "Persistent memory for Claude Code — remembers across sessions automatically. Install and forget. Scientific retrieval backed by 41 published papers.",
4-
"version": "3.14.8",
4+
"version": "3.14.9",
55
"author": {
66
"name": "Clement Deust",
77
"email": "admin@ai-architect.tools"

CHANGELOG.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,47 @@ adheres to [Semantic Versioning](https://semver.org/).
66

77
## [Unreleased]
88

9+
## [3.14.9] — ingest_codebase: no caps + Rust-style qn fallback
10+
11+
### Fixed
12+
13+
- **Hardcoded `top_symbols=50` / `top_processes=10` caps in the FastMCP
14+
wrapper** (`mcp_server/tool_registry_ingest.py`) silently truncated
15+
every ingest to the longest 50 symbols across Function/Method/Struct,
16+
regardless of the schema's documented `null = unlimited` default. On
17+
the Cortex codebase this collapsed an upstream graph of 197 646
18+
nodes / 95 185 edges to **98 memories / 98 entities / 3 edges**.
19+
Removed both parameters from the tool wrapper signature; the
20+
composition root now always passes `None` so the handler pulls every
21+
Function/Method/Struct/process the upstream graph holds.
22+
- **`fetch_files` shared the symbol cap.**
23+
`cypher.fetch_files(graph_path, limit=top_symbols)` truncated File
24+
nodes to the same slice as the symbol cap. With `top_symbols=50`,
25+
only 50 of thousands of files came back; the
26+
`(:File)-[]->(:symbol)` containment join filtered by
27+
`known_files` and dropped every edge whose file wasn't in that
28+
50-file slice. Decoupled: files are pulled unconditionally
29+
(`limit=None`); only symbols may be capped (and even that path is
30+
no longer reachable from the public tool).
31+
- **`file_path_from_qn` couldn't resolve Rust-style qualified names.**
32+
First-party Python in this codebase emits
33+
`mcp_server::handlers::ingest_codebase::handler`, which the previous
34+
fallback split on `::` and returned `"mcp_server"` — not a real
35+
file path, so containment failed and the diagnostic blamed a
36+
"non-Python indexer". Rewritten to return a priority-ordered list
37+
of candidates covering three qn formats:
38+
`<file.py>::<sym>`, `<dotted.module>::<sym>`, and
39+
`<a::b::c>::<sym>` (Rust-style module paths). The handler picks the
40+
first candidate present in `known_files`; the diagnostic now
41+
describes the actual cause when no candidate matches.
42+
43+
### Changed
44+
45+
- `ingest_codebase` MCP schema no longer advertises `top_symbols` or
46+
`top_processes` properties. The handler still accepts them as
47+
programmatic kwargs for tests, but they are not part of the public
48+
tool surface.
49+
950
## [3.14.8] — ingest_codebase full-chain extraction + audit fixes
1051

1152
### Fixed

mcp_server/handlers/ingest_codebase.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,17 @@ def _attribute_files_to_symbols(
114114
sym["file"] = authoritative
115115
continue
116116
# No containment edge — try the qn-split fallback, but only
117-
# accept it when the result corresponds to an actual File node.
118-
candidate = cypher.file_path_from_qn(qn)
119-
if candidate and candidate in known_files:
120-
sym["file"] = candidate
117+
# accept candidates that correspond to actual File nodes.
118+
# ``file_path_from_qn`` returns a priority-ordered list; pick
119+
# the first candidate present in ``known_files``.
120+
candidates = cypher.file_path_from_qn(qn)
121+
match = next((c for c in candidates if c in known_files), None)
122+
if match is not None:
123+
sym["file"] = match
121124
fallback_used += 1
122125
else:
123126
sym["file"] = None
124-
if candidate:
127+
if candidates:
125128
fallback_unverified += 1
126129
diagnostics: list[str] = []
127130
if fallback_used:
@@ -133,8 +136,9 @@ def _attribute_files_to_symbols(
133136
if fallback_unverified:
134137
diagnostics.append(
135138
f"file-attribution: {fallback_unverified} symbols had no "
136-
f"containment edge AND the qn-split fallback didn't match a "
137-
f"known file (likely non-Python indexer); file=None"
139+
f"containment edge AND no qn-split candidate matched a "
140+
f"known file; file=None (orphan symbols or qn format the "
141+
f"fallback heuristics don't cover)"
138142
)
139143
return diagnostics
140144

@@ -155,7 +159,13 @@ async def _pull_symbols_and_files(
155159
diagnostics: list[str] = []
156160
symbols, sym_diag = await cypher.fetch_top_symbols(graph_path, top_symbols)
157161
diagnostics.extend(sym_diag)
158-
files, file_diag = await cypher.fetch_files(graph_path, limit=top_symbols)
162+
# Files are pulled UNCAPPED regardless of ``top_symbols``. The
163+
# File→symbol containment join (cypher.fetch_file_containment)
164+
# filters by the known-files set; if files are truncated to a slice
165+
# smaller than the symbols' file population, the join collapses to
166+
# near-zero edges. Decouple: always pull every File node so
167+
# containment edges resolve. ``top_symbols`` only caps symbols.
168+
files, file_diag = await cypher.fetch_files(graph_path, limit=None)
159169
diagnostics.extend(file_diag)
160170
call_edges: list[tuple[str, str]] = []
161171
file_edges: list[tuple[str, str]] = []

mcp_server/handlers/ingest_codebase_cypher.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,68 @@
5050
)
5151

5252

53-
def file_path_from_qn(qn: str) -> str | None:
54-
"""Last-resort heuristic: derive a file-path-shaped string from a
55-
qualified_name when the graph has no (:File)-[]->(:symbol) edge
56-
for it.
57-
58-
Many language indexers emit ``qualified_name = "<file>::<sym>"``
59-
(Python via the AP indexer does this), in which case splitting on
60-
``::`` and taking the first segment yields the file path. Other
61-
languages (e.g., Rust ``crate::module::Type::method``) do NOT
62-
encode a file path here — the head segment will be a crate or
63-
module name, not a real path. Callers should prefer the
64-
containment-edge mapping; only use this fallback when no edge
65-
exists, and then validate against the known-files set before
66-
trusting the result.
53+
def file_path_from_qn(qn: str) -> list[str]:
54+
"""Last-resort heuristic: derive plausible file-path candidates
55+
from a qualified_name when the graph has no (:File)-[]->(:symbol)
56+
edge for it.
57+
58+
Returns a list of candidate paths (zero or more), in priority
59+
order. Callers MUST validate each candidate against the
60+
known-files set before trusting it — the qn alone cannot
61+
distinguish a Python module from a Rust crate path.
62+
63+
Heuristics applied (priority order):
64+
1. ``<path/with/extension>::<sym>`` — head already a file path
65+
(Python via `<file>::<sym>`, e.g. ``deps/aiofile/aio.py::AIOFile``).
66+
2. ``<dotted.module>::<sym>`` — convert dots to slashes and
67+
append ``.py`` (e.g. ``my.pkg.mod::C`` ⇒ ``my/pkg/mod.py``).
68+
3. ``<a::b::c>::<sym>`` — convert ``::`` separators to slashes
69+
and append ``.py`` (Rust-style module path used by some
70+
Python indexers, e.g. ``mcp_server::handlers::x::handler``
71+
⇒ ``mcp_server/handlers/x.py`` or ``mcp_server/handlers/x/handler.py``).
72+
4. Same as (3) but treating the trailing segment as a method
73+
on a class — drop the last two segments and use ``.py``.
74+
75+
Returns an empty list when the qn is empty or has no ``::``.
6776
"""
6877
if not qn or "::" not in qn:
69-
return None
78+
return []
79+
candidates: list[str] = []
80+
code_exts = (".py", ".ts", ".tsx", ".rs", ".js")
7081
head = qn.split("::", 1)[0]
71-
return head or None
82+
head_is_path = bool(head) and ("/" in head or head.endswith(code_exts))
83+
head_is_dotted_module = (
84+
bool(head)
85+
and "." in head
86+
and "/" not in head
87+
and not head.endswith(code_exts)
88+
)
89+
90+
# (1) head already looks like a file path — trust it as-is.
91+
if head_is_path:
92+
candidates.append(head)
93+
return candidates
94+
95+
# (2) dotted-module head (classic Python ``pkg.mod::Sym``).
96+
if head_is_dotted_module:
97+
candidates.append(head.replace(".", "/") + ".py")
98+
return candidates
99+
100+
# (3,4) Rust-style ``a::b::c::sym`` module path. Try progressively
101+
# shorter prefixes so both ``module::function`` (drop 1) and
102+
# ``module::Class::method`` (drop 2) resolve.
103+
parts = qn.split("::")
104+
for drop in (1, 2, 3):
105+
if len(parts) - drop < 1:
106+
break
107+
prefix = parts[: len(parts) - drop]
108+
if not prefix:
109+
continue
110+
cand = "/".join(prefix) + ".py"
111+
if cand not in candidates:
112+
candidates.append(cand)
113+
114+
return candidates
72115

73116

74117
async def _run_query(

mcp_server/handlers/ingest_codebase_schema.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,27 +59,6 @@
5959
),
6060
"default": False,
6161
},
62-
"top_symbols": {
63-
"type": ["integer", "null"],
64-
"description": (
65-
"Optional explicit cap on symbols materialised as memories "
66-
"+ KG nodes. Default null = pull every Function/Method/"
67-
"Struct in the graph (full chain hierarchy)."
68-
),
69-
"default": None,
70-
"minimum": 0,
71-
"examples": [None, 200, 1000],
72-
},
73-
"top_processes": {
74-
"type": ["integer", "null"],
75-
"description": (
76-
"Optional explicit cap on processes materialised as wiki "
77-
"pages. Default null = pull every entry-point process."
78-
),
79-
"default": None,
80-
"minimum": 0,
81-
"examples": [None, 25, 100],
82-
},
8362
},
8463
},
8564
}

mcp_server/tool_registry_ingest.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,21 @@ async def tool_ingest_codebase(
3131
output_dir: str | None = None,
3232
language: str = "auto",
3333
force_reindex: bool = False,
34-
top_symbols: int = 50,
35-
top_processes: int = 10,
3634
) -> str:
37-
"""Ingest upstream codebase analysis into Cortex."""
35+
"""Ingest upstream codebase analysis into Cortex.
36+
37+
No caps. Pulls every Function/Method/Struct/process the upstream
38+
graph holds, projects them all into Cortex memories + KG.
39+
"""
3840
return await safe_handler(
3941
ingest_codebase.handler,
4042
{
4143
"project_path": project_path,
4244
"output_dir": output_dir,
4345
"language": language,
4446
"force_reindex": force_reindex,
45-
"top_symbols": top_symbols,
46-
"top_processes": top_processes,
47+
"top_symbols": None,
48+
"top_processes": None,
4749
},
4850
tool_name="ingest_codebase",
4951
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "neuro-cortex-memory"
7-
version = "3.14.8"
7+
version = "3.14.9"
88
description = "Scientifically-grounded memory system based on computational neuroscience research"
99
readme = "README.md"
1010
license = "MIT"

0 commit comments

Comments
 (0)