Skip to content

Commit 2376a08

Browse files
committed
feat: add .gitignore parsing to scanner and CI test job
- Add self-contained gitignore parser (no external deps) with 22 tests - Scanner now reads .gitignore files (root + nested) and skips ignored paths - Add skipGitignore option to ScanOptions for opting out - Export gitignore utilities from package index for potential upstream to dev-utils - Add documents-loader-tests CI job (no database required)
1 parent ec92af4 commit 2376a08

5 files changed

Lines changed: 458 additions & 2 deletions

File tree

.github/workflows/integration-test.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,33 @@ jobs:
421421
cd packages/agentic-db
422422
pnpm test -- --forceExit --detectOpenHandles __tests__/rag.test.ts __tests__/rag-unified-search.test.ts __tests__/cli-search-integration.test.ts
423423
424+
documents-loader-tests:
425+
runs-on: ubuntu-latest
426+
timeout-minutes: 10
427+
428+
steps:
429+
- name: Checkout
430+
uses: actions/checkout@v4
431+
432+
- name: Setup pnpm
433+
uses: pnpm/action-setup@v2
434+
with:
435+
version: 10.22.0
436+
437+
- name: Setup Node.js
438+
uses: actions/setup-node@v4
439+
with:
440+
node-version: '22'
441+
cache: 'pnpm'
442+
443+
- name: Install dependencies
444+
run: pnpm install --frozen-lockfile
445+
446+
- name: Run documents-loader tests
447+
run: |
448+
cd packages/documents-loader
449+
pnpm test
450+
424451
cli-e2e-tests:
425452
runs-on: ubuntu-latest
426453
timeout-minutes: 25
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
import { mkdirSync, writeFileSync, rmSync } from 'fs';
2+
import { join } from 'path';
3+
import { tmpdir } from 'os';
4+
5+
import {
6+
parseGitignoreLine,
7+
parseGitignore,
8+
createGitignoreMatcher,
9+
loadGitignore,
10+
} from '../src/gitignore';
11+
12+
describe('parseGitignoreLine', () => {
13+
it('returns null for empty lines', () => {
14+
expect(parseGitignoreLine('')).toBeNull();
15+
expect(parseGitignoreLine(' ')).toBeNull();
16+
});
17+
18+
it('returns null for comments', () => {
19+
expect(parseGitignoreLine('# this is a comment')).toBeNull();
20+
});
21+
22+
it('parses a simple pattern', () => {
23+
const rule = parseGitignoreLine('*.log');
24+
expect(rule).not.toBeNull();
25+
expect(rule!.negated).toBe(false);
26+
expect(rule!.directoryOnly).toBe(false);
27+
});
28+
29+
it('parses negated patterns', () => {
30+
const rule = parseGitignoreLine('!important.log');
31+
expect(rule).not.toBeNull();
32+
expect(rule!.negated).toBe(true);
33+
});
34+
35+
it('parses directory-only patterns', () => {
36+
const rule = parseGitignoreLine('build/');
37+
expect(rule).not.toBeNull();
38+
expect(rule!.directoryOnly).toBe(true);
39+
});
40+
41+
it('strips trailing whitespace', () => {
42+
const rule = parseGitignoreLine('*.log ');
43+
expect(rule).not.toBeNull();
44+
expect(rule!.pattern).toBe('*.log');
45+
});
46+
});
47+
48+
describe('parseGitignore', () => {
49+
it('parses multiple lines with blanks and comments', () => {
50+
const content = `
51+
# Build artifacts
52+
dist/
53+
build/
54+
55+
# Logs
56+
*.log
57+
58+
# But keep error logs
59+
!error.log
60+
`;
61+
const rules = parseGitignore(content);
62+
expect(rules).toHaveLength(4);
63+
expect(rules[0].directoryOnly).toBe(true);
64+
expect(rules[0].pattern).toBe('dist');
65+
expect(rules[3].negated).toBe(true);
66+
});
67+
68+
it('handles Windows line endings', () => {
69+
const content = '*.log\r\ndist/\r\n';
70+
const rules = parseGitignore(content);
71+
expect(rules).toHaveLength(2);
72+
});
73+
});
74+
75+
describe('createGitignoreMatcher', () => {
76+
it('matches simple glob patterns', () => {
77+
const m = createGitignoreMatcher('*.log');
78+
expect(m.ignores('debug.log')).toBe(true);
79+
expect(m.ignores('src/debug.log')).toBe(true);
80+
expect(m.ignores('debug.txt')).toBe(false);
81+
});
82+
83+
it('matches directory-only patterns only for directories', () => {
84+
const m = createGitignoreMatcher('build/');
85+
expect(m.ignores('build', true)).toBe(true);
86+
expect(m.ignores('build', false)).toBe(false);
87+
});
88+
89+
it('handles negation (un-ignore)', () => {
90+
const m = createGitignoreMatcher('*.log\n!important.log');
91+
expect(m.ignores('debug.log')).toBe(true);
92+
expect(m.ignores('important.log')).toBe(false);
93+
});
94+
95+
it('matches double-star patterns for nested paths', () => {
96+
const m = createGitignoreMatcher('**/logs');
97+
expect(m.ignores('logs')).toBe(true);
98+
expect(m.ignores('src/logs')).toBe(true);
99+
expect(m.ignores('src/deep/logs')).toBe(true);
100+
});
101+
102+
it('matches double-star in the middle', () => {
103+
const m = createGitignoreMatcher('src/**/test.ts');
104+
expect(m.ignores('src/test.ts')).toBe(true);
105+
expect(m.ignores('src/utils/test.ts')).toBe(true);
106+
expect(m.ignores('src/a/b/test.ts')).toBe(true);
107+
expect(m.ignores('lib/test.ts')).toBe(false);
108+
});
109+
110+
it('matches anchored patterns (leading /)', () => {
111+
const m = createGitignoreMatcher('/TODO');
112+
expect(m.ignores('TODO')).toBe(true);
113+
expect(m.ignores('src/TODO')).toBe(false);
114+
});
115+
116+
it('matches ? wildcard', () => {
117+
const m = createGitignoreMatcher('file?.txt');
118+
expect(m.ignores('file1.txt')).toBe(true);
119+
expect(m.ignores('fileA.txt')).toBe(true);
120+
expect(m.ignores('file.txt')).toBe(false);
121+
expect(m.ignores('file12.txt')).toBe(false);
122+
});
123+
124+
it('matches character classes', () => {
125+
const m = createGitignoreMatcher('file[0-9].txt');
126+
expect(m.ignores('file0.txt')).toBe(true);
127+
expect(m.ignores('file9.txt')).toBe(true);
128+
expect(m.ignores('fileA.txt')).toBe(false);
129+
});
130+
131+
it('anchors patterns with / in the middle', () => {
132+
const m = createGitignoreMatcher('doc/generated');
133+
expect(m.ignores('doc/generated')).toBe(true);
134+
expect(m.ignores('src/doc/generated')).toBe(false);
135+
});
136+
137+
it('handles the add() method for incremental rules', () => {
138+
const m = createGitignoreMatcher('*.log');
139+
expect(m.ignores('test.tmp')).toBe(false);
140+
m.add('*.tmp');
141+
expect(m.ignores('test.tmp')).toBe(true);
142+
});
143+
144+
it('handles a realistic .gitignore', () => {
145+
const content = `
146+
node_modules/
147+
dist/
148+
.env
149+
.env.local
150+
*.log
151+
coverage/
152+
.DS_Store
153+
*.tsbuildinfo
154+
`;
155+
const m = createGitignoreMatcher(content);
156+
157+
// Directories
158+
expect(m.ignores('node_modules', true)).toBe(true);
159+
expect(m.ignores('dist', true)).toBe(true);
160+
expect(m.ignores('coverage', true)).toBe(true);
161+
162+
// Files
163+
expect(m.ignores('.env')).toBe(true);
164+
expect(m.ignores('.env.local')).toBe(true);
165+
expect(m.ignores('debug.log')).toBe(true);
166+
expect(m.ignores('src/app.log')).toBe(true);
167+
expect(m.ignores('.DS_Store')).toBe(true);
168+
expect(m.ignores('tsconfig.tsbuildinfo')).toBe(true);
169+
170+
// Should NOT be ignored
171+
expect(m.ignores('src/index.ts')).toBe(false);
172+
expect(m.ignores('package.json')).toBe(false);
173+
expect(m.ignores('README.md')).toBe(false);
174+
});
175+
});
176+
177+
describe('loadGitignore', () => {
178+
let tmpDir: string;
179+
180+
beforeEach(() => {
181+
tmpDir = join(tmpdir(), `gitignore-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
182+
mkdirSync(tmpDir, { recursive: true });
183+
});
184+
185+
afterEach(() => {
186+
rmSync(tmpDir, { recursive: true, force: true });
187+
});
188+
189+
it('loads .gitignore from root directory', () => {
190+
writeFileSync(join(tmpDir, '.gitignore'), '*.log\ndist/\n');
191+
const m = loadGitignore(tmpDir);
192+
193+
expect(m.ignores('debug.log')).toBe(true);
194+
expect(m.ignores('dist', true)).toBe(true);
195+
expect(m.ignores('src/index.ts')).toBe(false);
196+
});
197+
198+
it('returns empty matcher when no .gitignore exists', () => {
199+
const m = loadGitignore(tmpDir);
200+
expect(m.ignores('anything.log')).toBe(false);
201+
expect(m.ignores('node_modules', true)).toBe(false);
202+
});
203+
204+
it('handles empty .gitignore', () => {
205+
writeFileSync(join(tmpDir, '.gitignore'), '');
206+
const m = loadGitignore(tmpDir);
207+
expect(m.ignores('anything')).toBe(false);
208+
});
209+
});

0 commit comments

Comments
 (0)