Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions packages/git/src/exclude-patterns.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { describe, expect, it } from "vitest";
import {
matchesExcludePatterns,
parseExcludePatterns,
} from "./exclude-patterns";

function matches(content: string, entry: string): boolean {
return matchesExcludePatterns(entry, parseExcludePatterns(content));
}

describe("parseExcludePatterns", () => {
it.each([
["empty content", ""],
["only comments", "# a comment\n# another"],
["only blank lines", "\n\n \n"],
["a lone negation marker", "!"],
["a lone slash", "/"],
])("produces no patterns from %s", (_label, content) => {
expect(parseExcludePatterns(content)).toEqual([]);
});
});

describe("matchesExcludePatterns", () => {
it.each([
["basename pattern matches at root", ".env", ".env", true],
["basename pattern matches nested", ".env", "config/sub/.env", true],
[
"basename pattern does not match other names",
".env",
".env.local",
false,
],
["comment lines never match", "# .env", ".env", false],
["star glob within a segment", "*.local", ".env.local", true],
[
"star glob matches a nested file via the non-anchored prefix",
"*.local",
"a/b.local",
true,
],
["star does not span slashes", "a*b", "a/b", false],
["question mark matches one char", ".env?", ".envX", true],
["question mark does not match slash", ".env?", ".env/", false],
["anchored pattern matches from root only", "/build", "build", true],
["anchored pattern rejects nested path", "/build", "sub/build", false],
[
"middle-slash pattern anchors to root",
"config/secrets",
"config/secrets",
true,
],
[
"middle-slash pattern rejects nested",
"config/secrets",
"app/config/secrets",
false,
],
["dir-only pattern matches directory entry", ".flox/", ".flox/", true],
["dir-only pattern rejects plain file", ".flox/", ".flox", false],
["dir pattern matches files beneath it", ".flox", ".flox/cache/data", true],
[
"dir-only pattern matches files beneath it",
".flox/",
".flox/cache/data",
true,
],
["double star prefix matches any depth", "**/logs", "a/b/logs", true],
["double star suffix matches contents", "logs/**", "logs/a/b.txt", true],
[
"double star suffix does not match the dir itself",
"logs/**",
"logs",
false,
],
["middle double star spans directories", "a/**/b", "a/x/y/b", true],
["middle double star matches zero directories", "a/**/b", "a/b", true],
["character class matches", ".env.[ab]", ".env.a", true],
["negated character class rejects", ".env.[!ab]", ".env.a", false],
[
"negation un-matches an earlier pattern",
".env*\n!.env.example",
".env.example",
false,
],
[
"negation only affects matching entries",
".env*\n!.env.example",
".env",
true,
],
["later pattern wins over earlier negation", "!.env\n.env", ".env", true],
["escaped bang matches literal bang", "\\!important", "!important", true],
["escaped hash matches literal hash", "\\#file", "#file", true],
["trailing spaces are trimmed", ".env ", ".env", true],
["CRLF line endings do not defeat matching", ".env\r\n", ".env", true],
[
"consecutive double-star segments collapse",
"**/**/logs",
"a/b/logs",
true,
],
])("%s", (_label, content, entry, expected) => {
expect(matches(content, entry)).toBe(expected);
});

it("skips a malformed pattern line instead of dropping the whole file", () => {
// An unterminated char class on one line must not throw out the valid ones.
const patterns = parseExcludePatterns(".env\n[\n.envrc");
expect(matchesExcludePatterns(".env", patterns)).toBe(true);
expect(matchesExcludePatterns(".envrc", patterns)).toBe(true);
});

it("matches a pathological consecutive-double-star pattern in bounded time", () => {
// Regression for ReDoS: a run of `**/` used to compile to that many
// overlapping backtracking groups, blowing up exponentially with path depth.
const pattern = `${Array(30).fill("**").join("/")}/NOMATCH`;
const patterns = parseExcludePatterns(pattern);
const deepPath = `${Array.from({ length: 24 }, (_, i) => String.fromCharCode(97 + (i % 26))).join("/")}/`;
const start = performance.now();
expect(matchesExcludePatterns(deepPath, patterns)).toBe(false);
expect(performance.now() - start).toBeLessThan(1000);
});

it("never matches entries only reachable through unrelated names", () => {
expect(matches(".env", "node_modules/")).toBe(false);
expect(matches(".env", "dist/")).toBe(false);
});
});
162 changes: 162 additions & 0 deletions packages/git/src/exclude-patterns.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
export interface ExcludePattern {
negated: boolean;
dirOnly: boolean;
regex: RegExp;
}

/**
* Parses gitignore-style pattern lines (comments, negation, dir-only trailing
* slash, root anchoring, `*`/`?`/`**`/`[...]` globs). Used to re-apply an
* exclude file's patterns in-process against paths git has already listed, so
* callers can avoid asking git to walk huge ignored trees.
*/
export function parseExcludePatterns(content: string): ExcludePattern[] {
const patterns: ExcludePattern[] = [];

for (const rawLine of content.split("\n")) {
const line = trimUnescapedTrailingSpaces(rawLine);
if (!line || line.startsWith("#")) continue;

let pattern = line;
let negated = false;
if (pattern.startsWith("!")) {
negated = true;
pattern = pattern.slice(1);
} else if (pattern.startsWith("\\!") || pattern.startsWith("\\#")) {
pattern = pattern.slice(1);
}

let dirOnly = false;
if (pattern.endsWith("/")) {
dirOnly = true;
pattern = pattern.slice(0, -1);
}
if (!pattern) continue;

const anchored = pattern.includes("/");
if (pattern.startsWith("/")) {
pattern = pattern.slice(1);
}

// A single malformed pattern must not drop the whole exclude file: skip the
// offending line rather than letting a RegExp throw propagate out.
let regex: RegExp;
try {
regex = globToRegExp(pattern, anchored);
} catch {
continue;
}
patterns.push({ negated, dirOnly, regex });
}

return patterns;
}

/**
* Whether a path matches the pattern list, last match wins (gitignore
* semantics). `entry` may carry a trailing slash to mark a directory, as in
* `git ls-files --directory` output. A pattern matching a parent directory
* matches everything beneath it.
*/
export function matchesExcludePatterns(
entry: string,
patterns: ExcludePattern[],
): boolean {
const isDir = entry.endsWith("/");
const entryPath = isDir ? entry.slice(0, -1) : entry;

let matched = false;
for (const pattern of patterns) {
if (patternMatches(pattern, entryPath, isDir)) {
matched = !pattern.negated;
}
}
return matched;
}

function patternMatches(
pattern: ExcludePattern,
entryPath: string,
isDir: boolean,
): boolean {
if ((isDir || !pattern.dirOnly) && pattern.regex.test(entryPath)) {
return true;
}

let separatorIndex = entryPath.indexOf("/");
while (separatorIndex !== -1) {
if (pattern.regex.test(entryPath.slice(0, separatorIndex))) {
return true;
}
separatorIndex = entryPath.indexOf("/", separatorIndex + 1);
}
return false;
}

function trimUnescapedTrailingSpaces(line: string): string {
Comment thread
pauldambra marked this conversation as resolved.
// Drop a trailing CR first so CRLF-terminated exclude files don't bake a \r
// into every pattern (which would make the compiled regex match nothing).
return line.replace(/\r$/, "").replace(/(?<!\\) +$/, "");
}

function globToRegExp(pattern: string, anchored: boolean): RegExp {
let source = anchored ? "^" : "^(?:.*/)?";
let i = 0;

while (i < pattern.length) {
const char = pattern[i];
if (char === "*") {
if (pattern[i + 1] === "*") {
if (pattern[i + 2] === "/") {
// Collapse a run of consecutive `**/` into one `(?:.*/)?`. They are
// semantically equivalent, and emitting one group per segment would
// stack overlapping backtracking `.*` groups — catastrophic on a
// slash-heavy path that fails the final literal (ReDoS).
source += "(?:.*/)?";
Comment thread
pauldambra marked this conversation as resolved.
i += 3;
while (
pattern[i] === "*" &&
pattern[i + 1] === "*" &&
pattern[i + 2] === "/"
) {
i += 3;
}
} else {
source += ".*";
i += 2;
}
} else {
source += "[^/]*";
i += 1;
}
} else if (char === "?") {
source += "[^/]";
i += 1;
} else if (char === "[") {
const classEnd = pattern.indexOf("]", i + 2);
if (classEnd === -1) {
source += "\\[";
i += 1;
} else {
let charClass = pattern.slice(i + 1, classEnd);
if (charClass.startsWith("!")) {
charClass = `^${charClass.slice(1)}`;
}
source += `[${charClass}]`;
i = classEnd + 1;
}
} else if (char === "\\" && i + 1 < pattern.length) {
source += escapeRegExp(pattern[i + 1]);
i += 2;
} else {
source += escapeRegExp(char);
i += 1;
}
}

return new RegExp(`${source}$`);
}

function escapeRegExp(char: string): string {
return /[.*+?^${}()|[\]\\/]/.test(char) ? `\\${char}` : char;
}
Loading
Loading