Skip to content

Commit 12e23a4

Browse files
rodion-mclaude
andcommitted
fix(fetch): surface not-found identifiers in skill fetch script
Mirror the MCP fetch_artifacts fix: format_artifacts no longer silently skips identifiers with no content. It now lists concrete missing identifiers and tells the agent to re-check and retry them, with graceful degradation on the backend `found` flag and a requested-vs-returned backstop. Adds tests/test_fetch_format.py. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 1c14f1e commit 12e23a4

3 files changed

Lines changed: 150 additions & 13 deletions

File tree

skills/codealive-context-engine/SKILL.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,11 @@ preview** (up to 3 outgoing/incoming calls per direction). To see the full
233233
call graph, inheritance, or references, run `relationships.py` with the
234234
artifact's identifier.
235235

236+
**Missing identifiers.** If an identifier cannot be resolved (or is outside your access
237+
scope), `fetch.py` does not drop it silently — it prints a "not found" section listing each
238+
concrete identifier, with a hint to re-check those ids and retry the problematic ones. Tell
239+
the user which artifacts could not be fetched instead of omitting them.
240+
236241
**Disambiguating an identifier that lives in more than one data source.** Artifact
237242
identifiers are unique only per data source, so the same identifier can belong to
238243
more than one data source. If you fetch such an identifier without `--data-source`,

skills/codealive-context-engine/scripts/fetch.py

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -102,32 +102,67 @@ def _data_source_miss_hint(data_source: str) -> str:
102102
)
103103

104104

105-
def format_artifacts(data: dict, data_source: str = None) -> str:
106-
"""Format fetched artifacts for display."""
105+
def _not_found_lines(not_found: list) -> list:
106+
"""Lines listing requested identifiers the backend could not resolve or that are
107+
outside the caller's access scope, with a re-check/retry hint."""
108+
lines = [
109+
f"\n{'='*60}",
110+
f"⚠️ {len(not_found)} requested identifier(s) not found or inaccessible:",
111+
]
112+
for identifier in not_found:
113+
lines.append(f" • {identifier}")
114+
lines.append(f"{'='*60}")
115+
lines.append(
116+
"💡 Do NOT silently omit these. A not-found entry means the identifier did not "
117+
"resolve, or points outside the data sources this key can read — it is NOT proof "
118+
"the code is absent. Re-check those exact identifiers, re-run search.py or grep.py "
119+
"to get fresh ids, then re-fetch the problematic ones; if they still cannot be "
120+
"retrieved, tell the user which artifacts could not be fetched."
121+
)
122+
return lines
123+
124+
125+
def format_artifacts(data: dict, data_source: str = None, requested: list = None) -> str:
126+
"""Format fetched artifacts for display.
127+
128+
Requested identifiers the backend could not resolve — or that are outside the caller's
129+
access scope — come back with ``found: false`` (older backends omit the flag and return
130+
``content: null``). They are NOT dropped silently: each concrete identifier is listed in
131+
a "not found" section with a hint to re-check the ids and retry the problematic ones.
132+
A ``found: true`` artifact with empty content is still shown (it was located).
133+
``requested`` is the original identifier list; it backstops the diff so an id the
134+
backend never echoed back is still surfaced as not-found.
135+
"""
107136
artifacts = data.get("artifacts", [])
108-
if not artifacts:
109-
msg = "No artifacts returned."
110-
return msg + _data_source_miss_hint(data_source) if data_source else msg
111137

112138
output = []
113139
count = 0
114140
has_any_relationships = False
141+
returned_identifiers = set()
142+
not_found = []
115143

116144
for artifact in artifacts:
145+
identifier = artifact.get("identifier", "unknown")
146+
returned_identifiers.add(identifier)
147+
117148
content = artifact.get("content")
118-
if content is None:
149+
# Prefer the backend's explicit `found` flag; fall back to content-is-null for
150+
# older backends that don't emit it yet.
151+
found = artifact.get("found")
152+
is_missing = (found is False) if found is not None else (content is None)
153+
if is_missing:
154+
not_found.append(identifier)
119155
continue
120156

121157
count += 1
122-
identifier = artifact.get("identifier", "unknown")
123158
content_byte_size = artifact.get("contentByteSize")
124159

125160
size_str = f" ({content_byte_size} bytes)" if content_byte_size else ""
126161
output.append(f"\n{'='*60}")
127162
output.append(f"📄 {identifier}{size_str}")
128163
output.append(f"{'='*60}")
129164
start_line = artifact.get("startLine") or 1
130-
output.append(_add_line_numbers(content, start_line))
165+
output.append(_add_line_numbers(content or "", start_line))
131166

132167
relationships = artifact.get("relationships")
133168
if relationships is not None:
@@ -138,11 +173,14 @@ def format_artifacts(data: dict, data_source: str = None) -> str:
138173
if _has_any_calls(relationships):
139174
has_any_relationships = True
140175

141-
if not output:
142-
msg = "No artifacts found."
143-
return msg + _data_source_miss_hint(data_source) if data_source else msg
176+
# Backstop: any requested identifier the backend never echoed back is also missing.
177+
if requested:
178+
for identifier in requested:
179+
if identifier not in returned_identifiers and identifier not in not_found:
180+
not_found.append(identifier)
144181

145-
output.append(f"\n({count} artifact(s))")
182+
if count > 0:
183+
output.append(f"\n({count} artifact(s))")
146184

147185
if has_any_relationships:
148186
output.append(
@@ -154,6 +192,17 @@ def format_artifacts(data: dict, data_source: str = None) -> str:
154192
"[--profile callsOnly|inheritanceOnly|allRelevant|referencesOnly]"
155193
)
156194

195+
if not_found:
196+
output.extend(_not_found_lines(not_found))
197+
198+
if count == 0:
199+
# Nothing was actually fetched. Keep the data-source-specific recovery hint when a
200+
# selector was supplied; the not-found section above already lists the ids.
201+
if data_source:
202+
output.append(_data_source_miss_hint(data_source))
203+
if not output:
204+
return "No artifacts returned."
205+
157206
return "\n".join(output)
158207

159208

@@ -200,7 +249,7 @@ def main():
200249

201250
result = client.fetch_artifacts(identifiers=identifiers, data_source=data_source)
202251

203-
print(format_artifacts(result, data_source=data_source))
252+
print(format_artifacts(result, data_source=data_source, requested=identifiers))
204253

205254
except Exception as e:
206255
print(f"❌ Error: {e}", file=sys.stderr)

tests/test_fetch_format.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""Unit tests for fetch.py artifact formatting — not-found surfacing.
2+
3+
A requested identifier the backend cannot resolve (or that is outside the caller's scope)
4+
must never be dropped silently: it has to appear in an explicit "not found" section with the
5+
concrete identifier and a re-check/retry hint.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import importlib.util
11+
from pathlib import Path
12+
13+
REPO_ROOT = Path(__file__).resolve().parents[1]
14+
FETCH_PATH = REPO_ROOT / "skills" / "codealive-context-engine" / "scripts" / "fetch.py"
15+
16+
17+
def _load_fetch():
18+
spec = importlib.util.spec_from_file_location("codealive_fetch", FETCH_PATH)
19+
module = importlib.util.module_from_spec(spec)
20+
assert spec.loader is not None
21+
spec.loader.exec_module(module)
22+
return module
23+
24+
25+
fetch = _load_fetch()
26+
format_artifacts = fetch.format_artifacts
27+
28+
29+
def test_found_artifact_is_rendered():
30+
data = {"artifacts": [
31+
{"identifier": "org/repo::a.py::F", "found": True, "content": "def f():\n pass", "contentByteSize": 17},
32+
]}
33+
out = format_artifacts(data, requested=["org/repo::a.py::F"])
34+
assert "org/repo::a.py::F" in out
35+
assert "not found or inaccessible" not in out
36+
37+
38+
def test_explicit_not_found_is_surfaced_with_concrete_id_and_hint():
39+
data = {"artifacts": [
40+
{"identifier": "org/repo::a.py::F", "found": True, "content": "x"},
41+
{"identifier": "org/repo::missing.py::G", "found": False, "content": None},
42+
]}
43+
out = format_artifacts(data, requested=["org/repo::a.py::F", "org/repo::missing.py::G"])
44+
assert "1 requested identifier(s) not found" in out
45+
assert "org/repo::missing.py::G" in out
46+
assert "Do NOT silently omit" in out
47+
# the found one is still rendered
48+
assert "📄 org/repo::a.py::F" in out
49+
50+
51+
def test_legacy_backend_without_found_flag_falls_back_to_null_content():
52+
data = {"artifacts": [
53+
{"identifier": "org/repo::missing.py::G", "content": None},
54+
]}
55+
out = format_artifacts(data, requested=["org/repo::missing.py::G"])
56+
assert "not found or inaccessible" in out
57+
assert "org/repo::missing.py::G" in out
58+
59+
60+
def test_found_but_empty_content_is_rendered_not_missing():
61+
data = {"artifacts": [
62+
{"identifier": "org/repo::a.py::F", "found": True, "content": ""},
63+
]}
64+
out = format_artifacts(data, requested=["org/repo::a.py::F"])
65+
assert "📄 org/repo::a.py::F" in out
66+
assert "not found or inaccessible" not in out
67+
68+
69+
def test_all_found_has_no_not_found_section():
70+
data = {"artifacts": [
71+
{"identifier": "org/repo::a.py::F", "found": True, "content": "x"},
72+
]}
73+
out = format_artifacts(data, requested=["org/repo::a.py::F"])
74+
assert "not found or inaccessible" not in out
75+
76+
77+
def test_backstop_surfaces_id_backend_never_echoed():
78+
data = {"artifacts": [
79+
{"identifier": "org/repo::a.py::F", "found": True, "content": "x"},
80+
]}
81+
out = format_artifacts(data, requested=["org/repo::a.py::F", "org/repo::ghost.py::Z"])
82+
assert "org/repo::ghost.py::Z" in out
83+
assert "1 requested identifier(s) not found" in out

0 commit comments

Comments
 (0)