Skip to content

Commit b7871a4

Browse files
fix(scan): stop attributing host-wide findings to per-target scans
A targeted `scan <target>` currently surfaces two classes of finding whose `file_path` is outside the scan target. When the Mobb Agent Security dashboard groups scans by target, those host-wide findings make every skill look suspicious even though only one is. This fixes both leaks. Layer 2 — hidden-unicode (and every other rule-file detector): when the scan target is itself inside the user's home directory (for example a single skill at `~/.codex/skills/<name>`), user-scope wildcard patterns like `~/.agents/skills/*/SKILL.md` were walking the whole home tree and picking up sibling skills. Add a `shouldKeepUserScopeCandidate` guard that drops user-scope matches outside the scan target in exactly this case; project-root scans outside the home directory keep the existing host-wide context behavior unchanged. Layer 3 — `layer3OutcomesToFindings`: a successful outcome with no `metadata.findings[]` / `metadata.tools[]` was emitting a LOW `layer3-network_error` PARSE_ERROR finding whose `file_path` was the remote MCP URL (e.g. `https://mcp.linear.app/mcp`). The default resource executor deliberately makes no outbound calls, so this "schema mismatch" was the norm, not an anomaly, and the resulting finding polluted every per-target scan report with host-level noise. Treat all resource kinds the same way registry resources were already handled: silently skip when no actionable metadata is present. Tests added: - `tests/layer2/cross-scan-attribution.test.ts` — sibling skill inside the same home is no longer attributed to the scanned skill, but is reported when the scan target is their parent; project scans outside the home still see user-scope files. - `tests/layer3/network-error-suppression.test.ts` — HTTP/SSE schema mismatches produce no findings while timeout / auth_failure / command_error / skipped_without_consent still do. `tests/layer3/layer3-integration.test.ts` is updated to match: the case that asserted the old schema-mismatch finding now asserts its absence.
1 parent effcaae commit b7871a4

5 files changed

Lines changed: 384 additions & 19 deletions

File tree

src/pipeline.ts

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,6 @@ function layer3ErrorFinding(
314314
});
315315
}
316316

317-
function isRegistryMetadataResource(resourceId: string): boolean {
318-
return (
319-
resourceId.startsWith("npm:") || resourceId.startsWith("pypi:") || resourceId.startsWith("git:")
320-
);
321-
}
322-
323317
export function layer3OutcomesToFindings(
324318
outcomes: DeepScanOutcome[],
325319
options: { unicodeAnalysis?: boolean } = {},
@@ -353,17 +347,17 @@ export function layer3OutcomesToFindings(
353347
const derived = deriveLayer3ToolFindings(outcome.resourceId, outcome.result.metadata, options);
354348
const combined = [...parsed, ...derived];
355349

350+
// If a Layer 3 resource was fetched successfully but carries no
351+
// actionable metadata (no `findings[]`, no `tools[]`), that is not an
352+
// issue with the scan target itself — it usually means the default
353+
// no-outbound-call resource executor recorded only a URL stub, or that
354+
// a host-configured MCP endpoint simply returned an unrecognised
355+
// payload. Previously we emitted a LOW `layer3-network_error`
356+
// "schema mismatch" finding whose `file_path` was the remote URL,
357+
// which leaked host-level noise into every per-target scan report.
358+
// Fetch-level anomalies that are unrelated to the scan target are now
359+
// dropped silently for all resource kinds.
356360
if (combined.length === 0) {
357-
if (isRegistryMetadataResource(outcome.resourceId)) {
358-
continue;
359-
}
360-
findings.push(
361-
layer3ErrorFinding(
362-
outcome.resourceId,
363-
"network_error",
364-
"Deep scan response schema mismatch: expected metadata.findings[] or metadata.tools[]",
365-
),
366-
);
367361
continue;
368362
}
369363
findings.push(...combined);

src/scan.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,53 @@ function isRegularFile(path: string): boolean {
232232
}
233233
}
234234

235+
/** True when `candidatePath` resolves at or below `root`. */
236+
function isPathInside(root: string, candidatePath: string): boolean {
237+
const resolvedCandidate = resolve(candidatePath);
238+
const resolvedRoot = resolve(root);
239+
if (resolvedCandidate === resolvedRoot) {
240+
return true;
241+
}
242+
const rel = relative(resolvedRoot, resolvedCandidate);
243+
if (rel === "" || rel === ".") {
244+
return true;
245+
}
246+
if (rel.startsWith("..")) {
247+
return false;
248+
}
249+
// On Windows, relative() may return an absolute path across drives.
250+
if (rel.includes(":")) {
251+
return false;
252+
}
253+
return true;
254+
}
255+
256+
/**
257+
* Decide whether a user-scope candidate at `candidatePath` should be attached
258+
* to a scan of `scanTarget` rooted at `homeDir`.
259+
*
260+
* User-scope patterns (e.g. `~/.agents/skills/&ast;/SKILL.md`) walk the whole
261+
* home directory, so they can match files belonging to completely unrelated
262+
* skills or agents. When the scan target is itself a specific location
263+
* **inside** the user's home — e.g. scanning a single skill directory — any
264+
* user-scope match outside that scan target belongs to a different scan and
265+
* must not be attributed here.
266+
*
267+
* When the scan target lives outside the home directory (for example a
268+
* project root in a workspace), user-scope matches are accepted as legitimate
269+
* host-wide context for that scan.
270+
*/
271+
function shouldKeepUserScopeCandidate(
272+
scanTarget: string,
273+
homeDir: string,
274+
candidatePath: string,
275+
): boolean {
276+
if (isPathInside(homeDir, scanTarget)) {
277+
return isPathInside(scanTarget, candidatePath);
278+
}
279+
return true;
280+
}
281+
235282
function toUserReportPath(pattern: string): string {
236283
const normalized = normalizeUserScopePattern(pattern);
237284
return `~/${normalized}`;
@@ -411,6 +458,14 @@ function collectSelectedCandidates(
411458
const userPattern = normalizeUserScopePattern(candidate.pattern);
412459
if (userPattern.includes("*")) {
413460
for (const match of collectUserScopeWildcardMatches(options.homeDir, userPattern)) {
461+
// A scan whose target itself lives under the user's home directory
462+
// (e.g. a single skill at `~/.codex/skills/foo`) must only report
463+
// findings about files inside that target. User-scope wildcards
464+
// walk the whole home tree, so they can match sibling skills or
465+
// other agents that belong to different scans; drop those here.
466+
if (!shouldKeepUserScopeCandidate(absoluteTarget, options.homeDir, match.absolutePath)) {
467+
continue;
468+
}
414469
const reportPath = toUserReportPath(match.relativePath);
415470
if (!matchesCollectionKinds(reportPath, options.collectKinds)) {
416471
continue;
@@ -430,6 +485,9 @@ function collectSelectedCandidates(
430485
if (!existsSync(absolutePath) || !isRegularFile(absolutePath)) {
431486
continue;
432487
}
488+
if (!shouldKeepUserScopeCandidate(absoluteTarget, options.homeDir, absolutePath)) {
489+
continue;
490+
}
433491
const reportPath = toUserReportPath(userPattern);
434492
if (!matchesCollectionKinds(reportPath, options.collectKinds)) {
435493
continue;
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs";
2+
import { tmpdir } from "node:os";
3+
import { join } from "node:path";
4+
import { describe, expect, it } from "vitest";
5+
import type { CodeGateConfig } from "../../src/config";
6+
import { runScanEngine } from "../../src/scan";
7+
8+
/**
9+
* These tests pin down the fix for the cross-scan finding-attribution bug.
10+
*
11+
* User-scope patterns (e.g. `~/.agents/skills/&ast;/SKILL.md`) walk the user's
12+
* entire home directory. When the scan target itself is a specific path
13+
* inside the home directory — for example a single skill at
14+
* `~/.codex/skills/<name>` — wildcard matches for sibling skills belong to
15+
* completely different scans and must not be attributed to the current scan.
16+
*
17+
* The scenarios below simulate scanning one skill while a sibling skill that
18+
* contains hidden Unicode also exists on the host, and assert that only the
19+
* in-target finding is reported.
20+
*/
21+
22+
const BASE_CONFIG: CodeGateConfig = {
23+
severity_threshold: "high",
24+
auto_proceed_below_threshold: true,
25+
output_format: "terminal",
26+
scan_state_path: "/tmp/codegate-cross-scan-state.json",
27+
tui: { enabled: false, colour_scheme: "default", compact_mode: false },
28+
tool_discovery: { preferred_agent: "claude", agent_paths: {}, skip_tools: [] },
29+
trusted_directories: [],
30+
blocked_commands: ["bash", "sh", "curl", "wget", "nc", "python", "node"],
31+
known_safe_mcp_servers: [],
32+
known_safe_formatters: [],
33+
known_safe_lsp_servers: [],
34+
known_safe_hooks: [],
35+
unicode_analysis: true,
36+
check_ide_settings: true,
37+
owasp_mapping: true,
38+
trusted_api_domains: [],
39+
strict_collection: false,
40+
scan_collection_modes: ["default"],
41+
persona: "regular",
42+
runtime_mode: "offline",
43+
workflow_audits: { enabled: false },
44+
suppress_findings: [],
45+
scan_user_scope: true,
46+
};
47+
48+
describe("cross-scan attribution — Layer 2 hidden-unicode rule", () => {
49+
it("does not flag sibling user-scope skills when the scan target is inside the home directory", async () => {
50+
const home = mkdtempSync(join(tmpdir(), "codegate-cross-scan-home-"));
51+
52+
// Target skill: the scan target itself lives under the fake home dir,
53+
// exactly like `~/.codex/skills/<name>` in the bug report.
54+
mkdirSync(join(home, ".agents", "skills", "foo"), { recursive: true });
55+
writeFileSync(
56+
join(home, ".agents", "skills", "foo", "SKILL.md"),
57+
"Clean skill body.\n",
58+
"utf8",
59+
);
60+
61+
// Sibling skill with hidden Unicode under the same user-scope wildcard.
62+
// Under the old behavior this file would surface as a cross-scan finding
63+
// attributed to the `foo` scan.
64+
mkdirSync(join(home, ".agents", "skills", "bar"), { recursive: true });
65+
writeFileSync(
66+
join(home, ".agents", "skills", "bar", "SKILL.md"),
67+
"Sibling skill​ with a hidden zero-width space.\n",
68+
"utf8",
69+
);
70+
71+
const scanTarget = join(home, ".agents", "skills", "foo");
72+
73+
const report = await runScanEngine({
74+
version: "0.1.0",
75+
scanTarget,
76+
config: BASE_CONFIG,
77+
homeDir: home,
78+
});
79+
80+
const hiddenUnicodeFindings = report.findings.filter(
81+
(finding) => finding.rule_id === "rule-file-hidden-unicode",
82+
);
83+
84+
// No finding should reference the sibling skill.
85+
expect(hiddenUnicodeFindings.some((finding) => finding.file_path.includes("skills/bar"))).toBe(
86+
false,
87+
);
88+
89+
// No finding in the whole report should reference the sibling skill.
90+
expect(report.findings.some((finding) => finding.file_path.includes("skills/bar"))).toBe(false);
91+
});
92+
93+
it("still flags hidden Unicode in a sibling skill when the scan target *is* the parent of both", async () => {
94+
// Sanity check: restricting sibling attribution must not hide in-target
95+
// findings. When the user scans the parent directory, both skills are
96+
// inside the scan target and both must be reported.
97+
const home = mkdtempSync(join(tmpdir(), "codegate-cross-scan-parent-home-"));
98+
mkdirSync(join(home, ".agents", "skills", "foo"), { recursive: true });
99+
writeFileSync(
100+
join(home, ".agents", "skills", "foo", "SKILL.md"),
101+
"Clean skill body.\n",
102+
"utf8",
103+
);
104+
mkdirSync(join(home, ".agents", "skills", "bar"), { recursive: true });
105+
writeFileSync(
106+
join(home, ".agents", "skills", "bar", "SKILL.md"),
107+
"Sibling skill​ with a hidden zero-width space.\n",
108+
"utf8",
109+
);
110+
111+
const scanTarget = join(home, ".agents", "skills");
112+
113+
const report = await runScanEngine({
114+
version: "0.1.0",
115+
scanTarget,
116+
config: BASE_CONFIG,
117+
homeDir: home,
118+
});
119+
120+
expect(
121+
report.findings.some(
122+
(finding) =>
123+
finding.rule_id === "rule-file-hidden-unicode" &&
124+
finding.file_path.includes("bar/SKILL.md"),
125+
),
126+
).toBe(true);
127+
});
128+
129+
it("preserves user-scope attribution when the scan target is not inside the home directory", async () => {
130+
// When scanning an arbitrary project root (outside the user's home),
131+
// user-scope files remain legitimate host-wide context and should still
132+
// be included as before.
133+
const home = mkdtempSync(join(tmpdir(), "codegate-cross-scan-external-home-"));
134+
const projectRoot = mkdtempSync(join(tmpdir(), "codegate-cross-scan-project-"));
135+
136+
mkdirSync(join(home, ".agents", "skills", "bar"), { recursive: true });
137+
writeFileSync(
138+
join(home, ".agents", "skills", "bar", "SKILL.md"),
139+
"Sibling skill​ with a hidden zero-width space.\n",
140+
"utf8",
141+
);
142+
143+
const report = await runScanEngine({
144+
version: "0.1.0",
145+
scanTarget: projectRoot,
146+
config: BASE_CONFIG,
147+
homeDir: home,
148+
});
149+
150+
expect(
151+
report.findings.some(
152+
(finding) =>
153+
finding.rule_id === "rule-file-hidden-unicode" &&
154+
finding.file_path === "~/.agents/skills/bar/SKILL.md",
155+
),
156+
).toBe(true);
157+
});
158+
});

tests/layer3/layer3-integration.test.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { createEmptyReport } from "../../src/types/report";
88
import { planRemediation } from "../../src/layer4-remediation/remediator";
99

1010
describe("task 28 layer3 integration", () => {
11-
it("emits parse-style findings for consent refusal, timeout, and schema mismatch", () => {
11+
it("emits parse-style findings for consent refusal and timeout, but not for schema mismatch", () => {
1212
const outcomes: DeepScanOutcome[] = [
1313
{
1414
resourceId: "npm:@org/a",
@@ -39,11 +39,15 @@ describe("task 28 layer3 integration", () => {
3939
},
4040
];
4141

42+
// Schema-mismatch outcomes are no longer surfaced as findings: they
43+
// describe a host-configured endpoint's behavior, not an issue with
44+
// the scan target, and previously caused `layer3-network_error`
45+
// findings to appear on every per-target scan report.
4246
const findings = layer3OutcomesToFindings(outcomes);
43-
expect(findings).toHaveLength(3);
47+
expect(findings).toHaveLength(2);
4448
expect(findings[0]?.finding_id).toContain("skipped_without_consent");
4549
expect(findings[1]?.severity).toBe("MEDIUM");
46-
expect(findings[2]?.description).toContain("schema mismatch");
50+
expect(findings.some((finding) => finding.rule_id === "layer3-network_error")).toBe(false);
4751
});
4852

4953
it("merges valid layer3 findings into report summary and supports source-config remediation", () => {

0 commit comments

Comments
 (0)