Skip to content

Commit 1cbd5a8

Browse files
colbymchenryclaude
andauthored
fix(extraction): recurse into git submodules when listing files (#150)
`git ls-files -co --exclude-standard` only sees the submodule pointer in the main repo's index, so projects using submodules indexed 0 files. Now the tracked list runs with `-c --recurse-submodules` so submodule contents are included; untracked files are gathered with a separate `-o --exclude-standard` call (the two flags can't be combined — git only supports --recurse-submodules with --cached/--stage). Fixes #147. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b47c956 commit 1cbd5a8

2 files changed

Lines changed: 71 additions & 7 deletions

File tree

__tests__/extraction.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3080,6 +3080,58 @@ describe('Directory Exclusion', () => {
30803080
});
30813081
});
30823082

3083+
describe('Git Submodules', () => {
3084+
let tempDir: string;
3085+
3086+
beforeEach(() => {
3087+
tempDir = createTempDir();
3088+
});
3089+
3090+
afterEach(() => {
3091+
cleanupTempDir(tempDir);
3092+
});
3093+
3094+
it('should index files inside git submodules (issue #147)', async () => {
3095+
const { execFileSync } = await import('child_process');
3096+
const git = (cwd: string, ...args: string[]) =>
3097+
execFileSync('git', args, { cwd, stdio: 'pipe' });
3098+
3099+
// Build a separate "library" repo to use as a submodule source.
3100+
const libDir = path.join(tempDir, '_lib');
3101+
fs.mkdirSync(libDir, { recursive: true });
3102+
git(libDir, 'init', '-q');
3103+
git(libDir, 'config', 'user.email', 'test@test.com');
3104+
git(libDir, 'config', 'user.name', 'Test');
3105+
fs.writeFileSync(path.join(libDir, 'lib.ts'), 'export const fromSubmodule = 1;');
3106+
git(libDir, 'add', '-A');
3107+
git(libDir, 'commit', '-q', '-m', 'lib init');
3108+
3109+
// Build the main repo and add the lib repo as a submodule.
3110+
const mainDir = path.join(tempDir, 'main');
3111+
fs.mkdirSync(mainDir, { recursive: true });
3112+
git(mainDir, 'init', '-q');
3113+
git(mainDir, 'config', 'user.email', 'test@test.com');
3114+
git(mainDir, 'config', 'user.name', 'Test');
3115+
fs.writeFileSync(path.join(mainDir, 'app.ts'), 'export const app = 1;');
3116+
git(mainDir, 'add', '-A');
3117+
git(mainDir, 'commit', '-q', '-m', 'app init');
3118+
// protocol.file.allow=always is required to add a local-path submodule on
3119+
// recent git versions (CVE-2022-39253 mitigation).
3120+
execFileSync(
3121+
'git',
3122+
['-c', 'protocol.file.allow=always', 'submodule', 'add', '-q', libDir, 'libs/lib'],
3123+
{ cwd: mainDir, stdio: 'pipe' }
3124+
);
3125+
git(mainDir, 'commit', '-q', '-m', 'add submodule');
3126+
3127+
const config = { ...DEFAULT_CONFIG, rootDir: mainDir };
3128+
const files = scanDirectory(mainDir, config);
3129+
3130+
expect(files).toContain('app.ts');
3131+
expect(files).toContain('libs/lib/lib.ts');
3132+
});
3133+
});
3134+
30833135
// =============================================================================
30843136
// Scala
30853137
// =============================================================================

src/extraction/index.ts

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,19 +156,31 @@ function getGitVisibleFiles(rootDir: string): Set<string> | null {
156156
}
157157
}
158158

159-
// -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
160-
const output = execFileSync(
161-
'git',
162-
['ls-files', '-co', '--exclude-standard'],
163-
{ cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] }
164-
);
165159
const files = new Set<string>();
166-
for (const line of output.split('\n')) {
160+
const gitOpts = { cwd: rootDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'] };
161+
162+
// Tracked files. --recurse-submodules pulls in files from active submodules,
163+
// which the main repo's index would otherwise represent only as a commit pointer.
164+
// Without this, monorepos using submodules index 0 files. (See issue #147.)
165+
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
166+
// can't be combined with -o, so untracked files are gathered separately below.
167+
const tracked = execFileSync('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
168+
for (const line of tracked.split('\n')) {
167169
const trimmed = line.trim();
168170
if (trimmed) {
169171
files.add(normalizePath(trimmed));
170172
}
171173
}
174+
175+
// Untracked files in the main repo (submodules manage their own untracked state).
176+
const untracked = execFileSync('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
177+
for (const line of untracked.split('\n')) {
178+
const trimmed = line.trim();
179+
if (trimmed) {
180+
files.add(normalizePath(trimmed));
181+
}
182+
}
183+
172184
return files;
173185
} catch {
174186
return null;

0 commit comments

Comments
 (0)