Skip to content

Commit f05f21f

Browse files
fixes
1 parent 96235b7 commit f05f21f

2 files changed

Lines changed: 222 additions & 2 deletions

File tree

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
import { promises as fs } from 'node:fs';
2+
import path from 'node:path';
3+
import { v4 as uuidv4 } from 'uuid';
4+
import { logger } from '#o11y/logger';
5+
import { execCommand, failOnError } from '#utils/exec';
6+
7+
export interface SWEInstance {
8+
instance_id: string;
9+
// text: string;
10+
repo: string;
11+
base_commit: string;
12+
problem_statement: string;
13+
hints_text: string;
14+
created_at: string;
15+
patch: string;
16+
test_patch: string;
17+
version: string;
18+
FAIL_TO_PASS: string;
19+
PASS_TO_PASS: string;
20+
environment_setup_commit: string;
21+
}
22+
23+
const CONTAINER_MEMORY = '8g';
24+
25+
// https://epoch.ai/blog/swebench-docker
26+
function getIssueImageName(problemId: string): string {
27+
return `ghcr.io/epoch-research/swe-bench.eval.x86_64.${problemId}:latest`;
28+
}
29+
30+
export async function stopContainer(containerIdOrName: string, removeContainer = true): Promise<void> {
31+
logger.info(`Stopping and removing container ${containerIdOrName}`);
32+
// Use execCommand but ignore errors since container might not exist.
33+
await execCommand(`docker stop ${containerIdOrName}`).catch(() => {
34+
/* ignore */
35+
});
36+
if (removeContainer) {
37+
await execCommand(`docker rm ${containerIdOrName}`).catch(() => {
38+
/* ignore */
39+
});
40+
}
41+
}
42+
43+
export const CONTAINER_PATH = '/testbed';
44+
45+
export async function startContainer(workspacePath: string, problemId: string): Promise<{ containerId: string; repoPathOnHost: string }> {
46+
const containerName = `sweb.typedai.${problemId}_${uuidv4().slice(0, 8)}`;
47+
const tempContainerName = `${containerName}.temp`;
48+
49+
// Clean up previous runs if any
50+
await stopContainer(containerName);
51+
await stopContainer(tempContainerName);
52+
53+
const imageName = getIssueImageName(problemId);
54+
logger.info(`Pulling image ${imageName}`);
55+
failOnError(`Failed to pull image ${imageName}`, await execCommand(`docker pull ${imageName}`));
56+
57+
// This function uses a bind mount to avoid host permission issues with Docker volumes.
58+
// First, we prepare a directory on the host.
59+
const repoPathOnHost = path.join(workspacePath, problemId);
60+
await fs.rm(repoPathOnHost, { recursive: true, force: true }); // Clean up from previous runs
61+
await fs.mkdir(repoPathOnHost, { recursive: true });
62+
63+
// Then, create a temporary container to copy the initial repository state from.
64+
logger.info(`Creating temporary container ${tempContainerName} to copy repo contents...`);
65+
const createResult = await execCommand(`docker create --name ${tempContainerName} ${imageName}`);
66+
failOnError(`Failed to create temporary container for ${problemId}`, createResult);
67+
68+
// Copy the repository from the temporary container to the host directory.
69+
logger.info(`Copying from ${tempContainerName}:${CONTAINER_PATH}/. to ${repoPathOnHost}`);
70+
const cpResult = await execCommand(`docker cp ${tempContainerName}:${CONTAINER_PATH}/. ${repoPathOnHost}`);
71+
failOnError(`Failed to copy repo contents for ${problemId}`, cpResult);
72+
73+
// Clean up the temporary container.
74+
await stopContainer(tempContainerName);
75+
76+
// Start the main container, bind mounting the host directory into the container.
77+
logger.info(`Starting container for ${problemId} with name ${containerName}`);
78+
const runResult = await execCommand(
79+
`docker run --name ${containerName} --memory="${CONTAINER_MEMORY}" -d -v ${repoPathOnHost}:${CONTAINER_PATH} ${imageName} bash -c "git config --global user.email a && git config --global user.name a && git config --global --add safe.directory ${CONTAINER_PATH} && git commit --allow-empty -am typedai && sleep 7200"`,
80+
);
81+
failOnError(`Failed to start container for ${problemId}`, runResult);
82+
const containerId = runResult.stdout.trim();
83+
84+
await new Promise((resolve) => setTimeout(resolve, 10000)); // Wait for container to be ready
85+
86+
return { containerId, repoPathOnHost };
87+
}
88+
89+
async function generatePatch(repoPath: string): Promise<string> {
90+
logger.info(`Generating patch in ${repoPath}`);
91+
const result = await execCommand('git --no-pager diff -U5 --no-color HEAD', { workingDirectory: repoPath });
92+
failOnError(`Failed to generate patch in ${repoPath}`, result);
93+
return result.stdout;
94+
}
95+
96+
async function runEvaluation(predictionsFile: string, dataset: string, runId: string, swebenchVenvPath: string): Promise<void> {
97+
const reportDir = path.dirname(predictionsFile);
98+
const instanceId = JSON.parse(await fs.readFile(predictionsFile, 'utf-8'))[0].instance_id;
99+
100+
await stopContainer(`sweb.eval.${instanceId}.swe_work`);
101+
102+
const cmdParts = [
103+
path.join(swebenchVenvPath, 'bin', 'python'),
104+
'-m',
105+
'swebench.harness.run_evaluation',
106+
'--dataset_name',
107+
dataset,
108+
'--predictions_path',
109+
path.basename(predictionsFile),
110+
'--run_id',
111+
runId,
112+
'--report_dir',
113+
'.', // report dir is relative to cwd
114+
'--cache_level',
115+
'instance',
116+
'--namespace',
117+
'epoch-research/swe-bench',
118+
'--instance_image_tag',
119+
'latest',
120+
];
121+
const cmd = cmdParts.join(' ');
122+
123+
logger.info(`Running evaluation: ${cmd} in ${reportDir}`);
124+
try {
125+
// The original python script has complex retry logic which is simplified here.
126+
// Using execCommand which doesn't stream stdio. Output will be logged after completion.
127+
const result = await execCommand(cmd, { workingDirectory: reportDir });
128+
logger.info(result.stdout);
129+
if (result.stderr) logger.error(result.stderr);
130+
failOnError('Evaluation failed', result);
131+
} catch (e) {
132+
logger.error('Evaluation failed', e);
133+
}
134+
}
135+
136+
async function runEvalOnSingleProblem(problemId: string, workspacePath: string): Promise<{ is_success: boolean }> {
137+
const predictionsFile = path.join(workspacePath, 'predictions.json');
138+
const evalOutcomes = { is_success: false };
139+
140+
try {
141+
const swebenchVenvPath = process.env.SWEBENCH_VENV_PATH || path.join(process.env.HOME, 'swebench_eval_tools_env');
142+
await runEvaluation(predictionsFile, 'princeton-nlp/SWE-bench', problemId, swebenchVenvPath);
143+
144+
const evalFile = path.join(workspacePath, `typedai-agent.${problemId}.json`);
145+
const evalDict = JSON.parse(await fs.readFile(evalFile, 'utf-8'));
146+
if (evalDict.resolved_ids.includes(problemId)) {
147+
evalOutcomes.is_success = true;
148+
}
149+
logger.info(`Evaluated ${problemId} successfully.`);
150+
} catch (e) {
151+
logger.error(`Failed to report results for ${problemId}`, e);
152+
}
153+
return evalOutcomes;
154+
}
155+
156+
export async function runAgentOnSingleProblem(
157+
problem: SWEInstance,
158+
rolloutIdx: number,
159+
workspaceBasePath: string,
160+
): Promise<{ diff: string; duration: number; evalOutcomes: { is_success: boolean } }> {
161+
const { instance_id: problemId } = problem;
162+
const logsPrefix = `[${problemId}]`;
163+
logger.info(`${logsPrefix} Starting rollout ${rolloutIdx}`);
164+
165+
const workspacePath = path.join(workspaceBasePath, problemId, `rollout_${rolloutIdx}`);
166+
await fs.mkdir(workspacePath, { recursive: true });
167+
168+
let containerId: string;
169+
let repoPathOnHost: string;
170+
let diff: string;
171+
let duration: number;
172+
173+
try {
174+
({ containerId, repoPathOnHost } = await startContainer(workspacePath, problemId));
175+
logger.info(`${logsPrefix} Docker container started with ID: ${containerId}`);
176+
177+
await fs.writeFile(path.join(repoPathOnHost, 'instance.json'), JSON.stringify(problem, null, 2));
178+
179+
logger.info(`${logsPrefix} Starting Node.js agent run...`);
180+
const startTime = Date.now();
181+
182+
const agentEntrypoint = path.resolve(process.cwd(), 'src/cli/swe-bench-agent.ts');
183+
184+
const agentCmd = `node --env-file=variables/local.env -r esbuild-register ${agentEntrypoint} --container-id ${containerId} "${problem.problem_statement.replace(/"/g, '\\"')}"`;
185+
logger.info(`Executing agent: ${agentCmd} in ${repoPathOnHost}`);
186+
// Note: execCommand buffers output. For long-running agents, live output is not available.
187+
const agentResult = await execCommand(agentCmd, { workingDirectory: repoPathOnHost });
188+
logger.info(agentResult.stdout);
189+
if (agentResult.stderr) {
190+
logger.error(agentResult.stderr);
191+
}
192+
failOnError('Agent run failed', agentResult);
193+
194+
duration = (Date.now() - startTime) / 1000;
195+
logger.info(`${logsPrefix} Agent run completed in ${duration.toFixed(2)}s.`);
196+
197+
diff = await generatePatch(repoPathOnHost);
198+
const predictions = [
199+
{
200+
instance_id: problemId,
201+
model_name_or_path: 'typedai-agent',
202+
model_patch: diff,
203+
},
204+
];
205+
await fs.writeFile(path.join(workspacePath, 'predictions.json'), JSON.stringify(predictions, null, 2));
206+
} finally {
207+
if (containerId) {
208+
logger.info(`${logsPrefix} Stopping Docker container...`);
209+
await stopContainer(containerId);
210+
logger.info(`${logsPrefix} Docker container stopped`);
211+
}
212+
}
213+
214+
logger.info(`${logsPrefix} Evaluating the generated diff...`);
215+
const evalStartTime = Date.now();
216+
const evalOutcomes = await runEvalOnSingleProblem(problemId, workspacePath);
217+
const evalDuration = (Date.now() - evalStartTime) / 1000;
218+
logger.info(`${logsPrefix} Evaluation completed in ${evalDuration.toFixed(2)}s.`);
219+
220+
return { diff, duration, evalOutcomes };
221+
}

src/functions/scm/gitlab.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ export class GitLab extends AbstractSCM implements SourceControlManagement {
420420
}
421421

422422
/**
423-
*
423+
* @see https://docs.gitlab.com/api/events/#get-contribution-events-for-a-user
424424
* @param date The day to get activity for.
425425
* @returns the activity for the user on the given day
426426
*/
@@ -433,7 +433,6 @@ export class GitLab extends AbstractSCM implements SourceControlManagement {
433433
const endOfDay = new Date(date);
434434
endOfDay.setHours(23, 59, 59, 999);
435435

436-
437436
const user = await this.api().Users.showCurrentUser();
438437

439438
const activity = await this.api().Users.allEvents(user.id, { after: startOfDay.toISOString(), before: endOfDay.toISOString() });

0 commit comments

Comments
 (0)