Skip to content

Commit d5b8aea

Browse files
fix(scan): close cross-scan leak for single-file targets of any format
PR #54 disabled the user-scope walk when the CLI scanned a single local file, by gating on `explicitCandidates.length > 0`. That gate breaks for files whose extension is not in `inferTextLikeFormat` — e.g. `.idea/workspace.xml`, `.env` with unusual names, binary-ish configs — because `collectExplicitCandidates` returns `[]` for them, the guard never fires, and sibling user-scope findings (e.g. a hidden-unicode hit in `~/.agents/skills/foo/SKILL.md`) leak into the scan of the unrelated file. Reproducer (0.14.3): $ npx codegate-ai scan ~/workspace/.idea/workspace.xml --format json scan_target: .../.idea/workspace.xml findings: - HIGH rule-file-hidden-unicode file_path: ~/.agents/skills/api-design-guide/domains/rest/SKILL.md ## Fix Add a `stagedFromLocalFile: true` flag to `ResolvedScanTarget`, set from `stageLocalFile`. The CLI gate now uses this flag directly: scan_user_scope = --include-user-scope ? true : stagedFromLocalFile ? false : baseConfig.scan_user_scope It's a signal that doesn't depend on whether the file's extension was recognisable. Covers every file type, no per-format maintenance. ## Tests `tests/scan-target.test.ts`: - Staged `.xml` file gets `stagedFromLocalFile=true` AND empty `explicitCandidates` (the PR #54 gate would have failed here). - Staged `.json` file also gets `stagedFromLocalFile=true` and populated `explicitCandidates` — no regression on the happy path. All 155 files / 722 tests pass. Lint + prettier + typecheck clean.
1 parent 7359778 commit d5b8aea

4 files changed

Lines changed: 64 additions & 10 deletions

File tree

src/cli.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -493,17 +493,23 @@ function addScanCommand(program: Command, version: string, deps: CliDeps): void
493493
? true
494494
: (baseConfig.workflow_audits?.enabled ?? false),
495495
},
496-
// When the target was a single local file that got staged into a
497-
// temp dir (explicitCandidates set), walking the full user-scope
498-
// tree is off-target: the user asked to scan one file, not their
499-
// whole home. Leaving user-scope on here let sibling findings
500-
// (e.g. `~/.agents/skills/*/SKILL.md`) leak into single-file
501-
// scans of configs like `.claude/settings.json`. Explicit opt-in
502-
// via `--include-user-scope` still forces it on.
496+
// When the raw input was a single local file (now staged into a
497+
// temp dir), walking the full user-scope tree is off-target — the
498+
// user asked to scan one file, not their whole home. Without
499+
// this guard, sibling findings (e.g. `~/.agents/skills/*/SKILL.md`)
500+
// leak into scans of files like `.claude/settings.json` or
501+
// `.idea/workspace.xml`.
502+
//
503+
// Earlier we gated on `explicitCandidates.length > 0`, but that
504+
// falsely passed for files whose extension is not in the
505+
// text-like format list (XML, binary-ish configs, etc.) — those
506+
// produce zero explicit candidates and the guard never fired.
507+
// Using `stagedFromLocalFile` is the reliable signal.
508+
// Explicit opt-in via `--include-user-scope` still forces it on.
503509
scan_user_scope:
504510
options.includeUserScope === true
505511
? true
506-
: resolvedTarget.explicitCandidates && resolvedTarget.explicitCandidates.length > 0
512+
: resolvedTarget.stagedFromLocalFile === true
507513
? false
508514
: (baseConfig.scan_user_scope ?? false),
509515
};

src/scan-target/staging.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ export function stageLocalFile(absolutePath: string): ResolvedScanTarget {
121121
scanTarget: tempRoot,
122122
displayTarget: absolutePath,
123123
explicitCandidates: collectExplicitCandidates(tempRoot),
124+
stagedFromLocalFile: true,
124125
cleanup: () => cleanupTempDir(tempRoot),
125126
};
126127
}
@@ -134,6 +135,7 @@ export function stageLocalFile(absolutePath: string): ResolvedScanTarget {
134135
scanTarget: tempRoot,
135136
displayTarget: absolutePath,
136137
explicitCandidates: collectExplicitCandidates(tempRoot),
138+
stagedFromLocalFile: true,
137139
cleanup: () => cleanupTempDir(tempRoot),
138140
};
139141
}

src/scan-target/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ export interface ResolvedScanTarget {
1212
scanTarget: string;
1313
displayTarget: string;
1414
explicitCandidates?: ExplicitScanCandidate[];
15+
/**
16+
* `true` when the raw input was a local file that got staged into a
17+
* temp directory. This is the signal the CLI uses to disable the
18+
* user-scope walk, regardless of whether `explicitCandidates` could be
19+
* inferred for the file (an XML / binary / unrecognised extension
20+
* would still benefit from the scope guard).
21+
*/
22+
stagedFromLocalFile?: boolean;
1523
cleanup?: () => Promise<void> | void;
1624
}
1725

tests/scan-target.test.ts

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
1+
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
2+
import { tmpdir } from "node:os";
23
import { join } from "node:path";
34
import { afterEach, describe, expect, it, vi } from "vitest";
45
import { createScanDiscoveryContext } from "../src/scan";
5-
import { cloneGitRepo } from "../src/scan-target/staging";
6+
import { cloneGitRepo, stageLocalFile } from "../src/scan-target/staging";
67

78
const { cloneMock } = vi.hoisted(() => ({
89
cloneMock: vi.fn((_: string, args: string[]) => {
@@ -282,3 +283,40 @@ describe("scan target resolver", () => {
282283
expect(existsSync(destination ?? "")).toBe(false);
283284
});
284285
});
286+
287+
describe("stageLocalFile — stagedFromLocalFile flag", () => {
288+
// Regression: PR #54 gated the CLI's user-scope guard on
289+
// `explicitCandidates.length > 0`. For file types not in
290+
// `inferTextLikeFormat` (e.g. `.xml`, `.idea/workspace.xml`), that
291+
// list is empty and the guard never fired → sibling findings leaked
292+
// into single-file scans. `stagedFromLocalFile` is the reliable
293+
// signal regardless of what the file contains.
294+
it("flags staged local files even when the extension is unsupported for explicit-candidate inference", () => {
295+
const home = mkdtempSync(join(tmpdir(), "codegate-stage-home-"));
296+
const xmlPath = join(home, ".idea", "workspace.xml");
297+
mkdirSync(join(home, ".idea"), { recursive: true });
298+
writeFileSync(xmlPath, `<?xml version="1.0"?>\n<project/>\n`, "utf8");
299+
300+
const resolved = stageLocalFile(xmlPath);
301+
302+
expect(resolved.stagedFromLocalFile).toBe(true);
303+
expect(resolved.displayTarget).toBe(xmlPath);
304+
expect(resolved.scanTarget).not.toBe(xmlPath);
305+
// .xml is not in the text-like format list; this used to silently
306+
// return [] and defeat PR #54's scope guard.
307+
expect(resolved.explicitCandidates ?? []).toEqual([]);
308+
});
309+
310+
it("flags staged local files for known formats too (no regression in the supported path)", () => {
311+
const home = mkdtempSync(join(tmpdir(), "codegate-stage-home-json-"));
312+
const jsonPath = join(home, ".claude", "settings.json");
313+
mkdirSync(join(home, ".claude"), { recursive: true });
314+
writeFileSync(jsonPath, `{"hooks": {}}\n`, "utf8");
315+
316+
const resolved = stageLocalFile(jsonPath);
317+
318+
expect(resolved.stagedFromLocalFile).toBe(true);
319+
expect(resolved.displayTarget).toBe(jsonPath);
320+
expect(resolved.explicitCandidates?.length ?? 0).toBeGreaterThan(0);
321+
});
322+
});

0 commit comments

Comments
 (0)