Skip to content

Commit d315292

Browse files
wip
1 parent 32c68e7 commit d315292

9 files changed

Lines changed: 96 additions & 94 deletions

File tree

packages/backend/src/constants.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import { env } from "./env.js";
12
import { Settings } from "./types.js";
3+
import path from "path";
24

35
/**
46
* Default settings.
@@ -22,4 +24,7 @@ export const DEFAULT_SETTINGS: Settings = {
2224

2325
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [
2426
'github',
25-
];
27+
];
28+
29+
export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos');
30+
export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index');

packages/backend/src/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ export const env = createEnv({
4444
LOGTAIL_TOKEN: z.string().optional(),
4545
LOGTAIL_HOST: z.string().url().optional(),
4646
SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"),
47+
DEBUG_ENABLE_GROUPMQ_LOGGING: booleanSchema.default('false'),
4748

4849
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
4950
CONFIG_PATH: z.string().optional(),

packages/backend/src/git.ts

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,47 @@
11
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
22
import { mkdir } from 'node:fs/promises';
33
import { env } from './env.js';
4+
import { dirname, resolve } from 'node:path';
5+
import { existsSync } from 'node:fs';
46

57
type onProgressFn = (event: SimpleGitProgressEvent) => void;
68

9+
/**
10+
* Creates a simple-git client that has it's working directory
11+
* set to the given path.
12+
*/
13+
const createGitClientForPath = (path: string, onProgress?: onProgressFn) => {
14+
if (!existsSync(path)) {
15+
throw new Error(`Path ${path} does not exist`);
16+
}
17+
18+
const parentPath = resolve(dirname(path));
19+
20+
const git = simpleGit({
21+
progress: onProgress,
22+
})
23+
.env({
24+
...process.env,
25+
/**
26+
* @note on some inside-baseball on why this is necessary: The specific
27+
* issue we saw was that a `git clone` would fail without throwing, and
28+
* then a subsequent `git config` command would run, but since the clone
29+
* failed, it wouldn't be running in a git directory. Git would then walk
30+
* up the directory tree until it either found a git directory (in the case
31+
* of the development env) or it would hit a GIT_DISCOVERY_ACROSS_FILESYSTEM
32+
* error when trying to cross a filesystem boundary (in the prod case).
33+
* GIT_CEILING_DIRECTORIES ensures that this walk will be limited to the
34+
* parent directory.
35+
*/
36+
GIT_CEILING_DIRECTORIES: parentPath,
37+
})
38+
.cwd({
39+
path,
40+
});
41+
42+
return git;
43+
}
44+
745
export const cloneRepository = async (
846
{
947
cloneUrl,
@@ -20,11 +58,7 @@ export const cloneRepository = async (
2058
try {
2159
await mkdir(path, { recursive: true });
2260

23-
const git = simpleGit({
24-
progress: onProgress,
25-
}).cwd({
26-
path,
27-
})
61+
const git = createGitClientForPath(path, onProgress);
2862

2963
const cloneArgs = [
3064
"--bare",
@@ -62,11 +96,7 @@ export const fetchRepository = async (
6296
}
6397
) => {
6498
try {
65-
const git = simpleGit({
66-
progress: onProgress,
67-
}).cwd({
68-
path: path,
69-
})
99+
const git = createGitClientForPath(path, onProgress);
70100

71101
if (authHeader) {
72102
await git.addConfig("http.extraHeader", authHeader);
@@ -108,9 +138,7 @@ export const fetchRepository = async (
108138
* present in gitConfig.
109139
*/
110140
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: onProgressFn) => {
111-
const git = simpleGit({
112-
progress: onProgress,
113-
}).cwd(path);
141+
const git = createGitClientForPath(path, onProgress);
114142

115143
try {
116144
for (const [key, value] of Object.entries(gitConfig)) {
@@ -130,9 +158,7 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
130158
* If a key is not set, this is a no-op.
131159
*/
132160
export const unsetGitConfig = async (path: string, keys: string[], onProgress?: onProgressFn) => {
133-
const git = simpleGit({
134-
progress: onProgress,
135-
}).cwd(path);
161+
const git = createGitClientForPath(path, onProgress);
136162

137163
try {
138164
const configList = await git.listConfig();
@@ -156,9 +182,7 @@ export const unsetGitConfig = async (path: string, keys: string[], onProgress?:
156182
* Returns true if `path` is the _root_ of a git repository.
157183
*/
158184
export const isPathAValidGitRepoRoot = async (path: string, onProgress?: onProgressFn) => {
159-
const git = simpleGit({
160-
progress: onProgress,
161-
}).cwd(path);
185+
const git = createGitClientForPath(path, onProgress);
162186

163187
try {
164188
return git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT);
@@ -184,7 +208,7 @@ export const isUrlAValidGitRepo = async (url: string) => {
184208
}
185209

186210
export const getOriginUrl = async (path: string) => {
187-
const git = simpleGit().cwd(path);
211+
const git = createGitClientForPath(path);
188212

189213
try {
190214
const remotes = await git.getConfig('remote.origin.url', GitConfigScope.local);
@@ -199,18 +223,13 @@ export const getOriginUrl = async (path: string) => {
199223
}
200224

201225
export const getBranches = async (path: string) => {
202-
const git = simpleGit();
203-
const branches = await git.cwd({
204-
path,
205-
}).branch();
206-
226+
const git = createGitClientForPath(path);
227+
const branches = await git.branch();
207228
return branches.all;
208229
}
209230

210231
export const getTags = async (path: string) => {
211-
const git = simpleGit();
212-
const tags = await git.cwd({
213-
path,
214-
}).tags();
232+
const git = createGitClientForPath(path);
233+
const tags = await git.tags();
215234
return tags.all;
216235
}

packages/backend/src/index.ts

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@ import { hasEntitlement, loadConfig } from '@sourcebot/shared';
66
import { existsSync } from 'fs';
77
import { mkdir } from 'fs/promises';
88
import { Redis } from 'ioredis';
9-
import path from 'path';
109
import { ConnectionManager } from './connectionManager.js';
11-
import { DEFAULT_SETTINGS } from './constants.js';
12-
import { env } from "./env.js";
10+
import { DEFAULT_SETTINGS, INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js';
1311
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
14-
import { PromClient } from './promClient.js';
15-
import { RepoManager } from './repoManager.js';
16-
import { AppContext } from "./types.js";
1712
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
13+
import { env } from "./env.js";
1814
import { IndexSyncer } from "./indexSyncer.js";
15+
import { PromClient } from './promClient.js';
16+
import { RepoManager } from './repoManager.js';
1917

2018

2119
const logger = createLogger('backend-entrypoint');
@@ -34,9 +32,8 @@ const getSettings = async (configPath?: string) => {
3432
}
3533

3634

37-
const cacheDir = env.DATA_CACHE_DIR;
38-
const reposPath = path.join(cacheDir, 'repos');
39-
const indexPath = path.join(cacheDir, 'index');
35+
const reposPath = REPOS_CACHE_DIR;
36+
const indexPath = INDEX_CACHE_DIR;
4037

4138
if (!existsSync(reposPath)) {
4239
await mkdir(reposPath, { recursive: true });
@@ -45,12 +42,6 @@ if (!existsSync(indexPath)) {
4542
await mkdir(indexPath, { recursive: true });
4643
}
4744

48-
const context: AppContext = {
49-
indexPath,
50-
reposPath,
51-
cachePath: cacheDir,
52-
}
53-
5445
const prisma = new PrismaClient();
5546

5647
const redis = new Redis(env.REDIS_URL, {
@@ -69,10 +60,10 @@ const promClient = new PromClient();
6960
const settings = await getSettings(env.CONFIG_PATH);
7061

7162
const connectionManager = new ConnectionManager(prisma, settings, redis);
72-
const repoManager = new RepoManager(prisma, settings, redis, promClient, context);
63+
const repoManager = new RepoManager(prisma, settings, redis, promClient);
7364
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
7465
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis);
75-
const indexSyncer = new IndexSyncer(prisma, settings, redis, context);
66+
const indexSyncer = new IndexSyncer(prisma, settings, redis);
7667

7768
// await repoManager.validateIndexedReposHaveShards();
7869

packages/backend/src/indexSyncer.ts

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import { existsSync } from 'fs';
55
import { readdir, rm } from 'fs/promises';
66
import { Job, Queue, ReservedJob, Worker } from "groupmq";
77
import { Redis } from 'ioredis';
8+
import { INDEX_CACHE_DIR } from './constants.js';
89
import { env } from './env.js';
910
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
10-
import { AppContext, repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
11+
import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
1112
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
1213
import { indexGitRepository } from './zoekt.js';
1314

@@ -33,25 +34,22 @@ export class IndexSyncer {
3334
private db: PrismaClient,
3435
private settings: Settings,
3536
redis: Redis,
36-
private ctx: AppContext,
3737
) {
3838
this.queue = new Queue<JobPayload>({
3939
redis,
4040
namespace: 'index-sync-queue',
4141
jobTimeoutMs: JOB_TIMEOUT_MS,
4242
maxAttempts: 3,
43-
...(env.SOURCEBOT_LOG_LEVEL === 'debug' ? {
44-
logger,
45-
}: {}),
43+
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
4644
});
4745

4846
this.worker = new Worker<JobPayload>({
4947
queue: this.queue,
5048
maxStalledCount: 1,
5149
handler: this.runJob.bind(this),
5250
concurrency: this.settings.maxRepoIndexingJobConcurrency,
53-
...(env.SOURCEBOT_LOG_LEVEL === 'debug' ? {
54-
logger,
51+
...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
52+
logger: true,
5553
}: {}),
5654
});
5755

@@ -62,6 +60,7 @@ export class IndexSyncer {
6260
}
6361

6462
public async startScheduler() {
63+
logger.debug('Starting scheduler');
6564
this.interval = setInterval(async () => {
6665
await this.scheduleIndexJobs();
6766
await this.scheduleCleanupJobs();
@@ -240,7 +239,7 @@ export class IndexSyncer {
240239
}
241240

242241
private async indexRepository(repo: RepoWithConnections, logger: Logger) {
243-
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
242+
const { path: repoPath, isReadOnly } = getRepoPath(repo);
244243

245244
const metadata = repoMetadataSchema.parse(repo.metadata);
246245

@@ -304,22 +303,22 @@ export class IndexSyncer {
304303
}
305304

306305
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
307-
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
306+
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings));
308307
const indexDuration_s = durationMs / 1000;
309308
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
310309
}
311310

312311
private async cleanupRepository(repo: Repo, logger: Logger) {
313-
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
312+
const { path: repoPath, isReadOnly } = getRepoPath(repo);
314313
if (existsSync(repoPath) && !isReadOnly) {
315314
logger.info(`Deleting repo directory ${repoPath}`);
316315
await rm(repoPath, { recursive: true, force: true });
317316
}
318317

319318
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
320-
const files = (await readdir(this.ctx.indexPath)).filter(file => file.startsWith(shardPrefix));
319+
const files = (await readdir(INDEX_CACHE_DIR)).filter(file => file.startsWith(shardPrefix));
321320
for (const file of files) {
322-
const filePath = `${this.ctx.indexPath}/${file}`;
321+
const filePath = `${INDEX_CACHE_DIR}/${file}`;
323322
logger.info(`Deleting shard file ${filePath}`);
324323
await rm(filePath, { force: true });
325324
}

packages/backend/src/repoManager.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ import { createLogger } from "@sourcebot/logger";
44
import { Job, Queue, Worker } from 'bullmq';
55
import { existsSync, promises, readdirSync } from 'fs';
66
import { Redis } from 'ioredis';
7+
import { INDEX_CACHE_DIR } from "./constants.js";
78
import { env } from './env.js';
89
import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js";
910
import { PromClient } from './promClient.js';
10-
import { AppContext, RepoWithConnections, Settings, repoMetadataSchema } from "./types.js";
11+
import { RepoWithConnections, Settings, repoMetadataSchema } from "./types.js";
1112
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, measure } from "./utils.js";
1213
import { indexGitRepository } from "./zoekt.js";
1314

@@ -36,7 +37,6 @@ export class RepoManager {
3637
private settings: Settings,
3738
redis: Redis,
3839
private promClient: PromClient,
39-
private ctx: AppContext,
4040
) {
4141
// Repo indexing
4242
this.indexQueue = new Queue<RepoIndexingPayload>(REPO_INDEXING_QUEUE, {
@@ -162,7 +162,7 @@ export class RepoManager {
162162
}
163163

164164
private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) {
165-
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
165+
const { path: repoPath, isReadOnly } = getRepoPath(repo);
166166

167167
const metadata = repoMetadataSchema.parse(repo.metadata);
168168

@@ -225,7 +225,7 @@ export class RepoManager {
225225
}
226226

227227
logger.info(`Indexing ${repo.displayName}...`);
228-
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
228+
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings));
229229
const indexDuration_s = durationMs / 1000;
230230
logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`);
231231
}
@@ -422,17 +422,17 @@ export class RepoManager {
422422
});
423423

424424
// delete cloned repo
425-
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
425+
const { path: repoPath, isReadOnly } = getRepoPath(repo);
426426
if (existsSync(repoPath) && !isReadOnly) {
427427
logger.info(`Deleting repo directory ${repoPath}`);
428428
await promises.rm(repoPath, { recursive: true, force: true });
429429
}
430430

431431
// delete shards
432432
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
433-
const files = readdirSync(this.ctx.indexPath).filter(file => file.startsWith(shardPrefix));
433+
const files = readdirSync(INDEX_CACHE_DIR).filter(file => file.startsWith(shardPrefix));
434434
for (const file of files) {
435-
const filePath = `${this.ctx.indexPath}/${file}`;
435+
const filePath = `${INDEX_CACHE_DIR}/${file}`;
436436
logger.info(`Deleting shard file ${filePath}`);
437437
await promises.rm(filePath, { force: true });
438438
}
@@ -493,7 +493,7 @@ export class RepoManager {
493493
return;
494494
}
495495

496-
const files = readdirSync(this.ctx.indexPath);
496+
const files = readdirSync(INDEX_CACHE_DIR);
497497
const reposToReindex: number[] = [];
498498
for (const repo of indexedRepos) {
499499
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
@@ -504,7 +504,7 @@ export class RepoManager {
504504
try {
505505
hasShards = files.some(file => file.startsWith(shardPrefix));
506506
} catch (error) {
507-
logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`);
507+
logger.error(`Failed to read index directory ${INDEX_CACHE_DIR}: ${error}`);
508508
continue;
509509
}
510510

0 commit comments

Comments
 (0)