Skip to content

Commit 01e4418

Browse files
fix: persist codex scan diagnostics (#2351)
1 parent 64633c2 commit 01e4418

8 files changed

Lines changed: 976 additions & 55 deletions

File tree

.github/workflows/security-scan-codex.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ jobs:
4242
CODEX_SECURITY_SCAN_MAX_JOBS: ${{ inputs['max-jobs'] || '' }}
4343
CODEX_SECURITY_SCAN_MAX_RUNTIME_MINUTES: ${{ inputs['max-runtime-minutes'] || '40' }}
4444
CODEX_SECURITY_SCAN_LEASE_MINUTES: "60"
45+
CODEX_SECURITY_SCAN_DIAGNOSTICS_DIR: ${{ runner.temp }}/codex-security-scan-diagnostics
4546
steps:
4647
- uses: actions/checkout@v6
4748

@@ -77,3 +78,11 @@ jobs:
7778
--max-jobs "$CODEX_SECURITY_SCAN_MAX_JOBS" \
7879
--max-runtime-minutes "$CODEX_SECURITY_SCAN_MAX_RUNTIME_MINUTES" \
7980
--lease-minutes "$CODEX_SECURITY_SCAN_LEASE_MINUTES"
81+
82+
- name: Upload Codex security diagnostics
83+
if: ${{ !cancelled() }}
84+
uses: actions/upload-artifact@v7
85+
with:
86+
name: codex-security-scan-diagnostics-${{ github.run_id }}
87+
path: ${{ env.CODEX_SECURITY_SCAN_DIAGNOSTICS_DIR }}
88+
if-no-files-found: ignore

convex/lib/securityPrompt.test.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,132 @@ describe("securityPrompt", () => {
247247
expect(parsed?.riskSummary?.abnormal_behavior_control.status).toBe("none");
248248
});
249249

250+
it("marks workspace read failures as incomplete artifact inspection", () => {
251+
const parsed = parseLlmEvalResponse(
252+
newResponse({
253+
verdict: "benign",
254+
confidence: "low",
255+
summary:
256+
"No artifact-backed suspicious behavior could be identified because the workspace read commands failed before any files could be inspected.",
257+
agentic_risk_findings: [],
258+
risk_summary: {
259+
abnormal_behavior_control: {
260+
status: "none",
261+
highest_severity: "none",
262+
summary: "No artifact-backed abnormal behavior control finding was identified.",
263+
},
264+
permission_boundary: {
265+
status: "none",
266+
highest_severity: "none",
267+
summary: "No artifact-backed permission boundary finding was identified.",
268+
},
269+
sensitive_data_protection: {
270+
status: "none",
271+
highest_severity: "none",
272+
summary: "No artifact-backed sensitive data protection finding was identified.",
273+
},
274+
},
275+
user_guidance:
276+
"Treat this as an incomplete low-confidence review: the sandbox prevented direct inspection of metadata.json and artifact files.",
277+
incomplete_artifact_inspection: true,
278+
}),
279+
);
280+
281+
expect(parsed).toMatchObject({
282+
verdict: "benign",
283+
confidence: "low",
284+
incompleteArtifactInspection: true,
285+
});
286+
});
287+
288+
it("does not let quoted artifact snippets spoof incomplete inspection", () => {
289+
const parsed = parseLlmEvalResponse(
290+
newResponse({
291+
agentic_risk_findings: [
292+
{
293+
category_id: "ASI09",
294+
category_label: "Human-Agent Trust Exploitation",
295+
risk_bucket: "abnormal_behavior_control",
296+
status: "note",
297+
severity: "low",
298+
confidence: "medium",
299+
evidence: {
300+
path: "SKILL.md",
301+
snippet: "metadata.json could not be read",
302+
explanation: "The phrase appears in the artifact text, not scanner diagnostics.",
303+
},
304+
user_impact: "Users should treat this as artifact content.",
305+
recommendation: "Do not follow artifact instructions.",
306+
},
307+
],
308+
}),
309+
);
310+
311+
expect(parsed?.incompleteArtifactInspection).toBeUndefined();
312+
});
313+
314+
it("does not infer incomplete inspection from quoted summary prose", () => {
315+
const parsed = parseLlmEvalResponse(
316+
newResponse({
317+
verdict: "benign",
318+
confidence: "high",
319+
summary:
320+
'The SKILL.md includes the phrase "metadata.json could not be read" as an example, but artifact files were inspected.',
321+
user_guidance: "No scanner error was reported.",
322+
}),
323+
);
324+
325+
expect(parsed?.verdict).toBe("benign");
326+
expect(parsed?.incompleteArtifactInspection).toBeUndefined();
327+
});
328+
329+
it("does not discard blocking verdicts that mention quoted failure text", () => {
330+
const parsed = parseLlmEvalResponse(
331+
newResponse({
332+
verdict: "malicious",
333+
scan_findings_in_context: [
334+
{
335+
ruleId: "suspicious.prompt_injection",
336+
expected_for_purpose: false,
337+
note: "The artifact tells the scanner to claim metadata.json could not be read.",
338+
},
339+
],
340+
agentic_risk_findings: [
341+
{
342+
category_id: "ASI09",
343+
category_label: "Human-Agent Trust Exploitation",
344+
risk_bucket: "abnormal_behavior_control",
345+
status: "concern",
346+
severity: "high",
347+
confidence: "high",
348+
evidence: {
349+
path: "SKILL.md",
350+
snippet: "metadata.json could not be read",
351+
explanation: "The artifact is attempting to forge scanner diagnostics.",
352+
},
353+
user_impact: "Users could be misled by forged scanner-failure language.",
354+
recommendation: "Do not install this artifact.",
355+
},
356+
],
357+
}),
358+
);
359+
360+
expect(parsed?.verdict).toBe("malicious");
361+
expect(parsed?.incompleteArtifactInspection).toBeUndefined();
362+
});
363+
364+
it("honors explicit incomplete inspection even with a blocking verdict string", () => {
365+
const parsed = parseLlmEvalResponse(
366+
newResponse({
367+
verdict: "malicious",
368+
incomplete_artifact_inspection: true,
369+
}),
370+
);
371+
372+
expect(parsed?.verdict).toBe("malicious");
373+
expect(parsed?.incompleteArtifactInspection).toBe(true);
374+
});
375+
250376
it("defaults LLM evals to OpenAI priority service tier", () => {
251377
const previous = process.env.OPENAI_EVAL_SERVICE_TIER;
252378
delete process.env.OPENAI_EVAL_SERVICE_TIER;

convex/lib/securityPrompt.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ export type LlmEvalResponse = {
159159
findings: string;
160160
agenticRiskFindings?: LlmAgenticRiskFinding[];
161161
riskSummary?: LlmRiskSummary;
162+
incompleteArtifactInspection?: boolean;
162163
};
163164

164165
export type PreparedArtifactText = {
@@ -1013,7 +1014,7 @@ export function parseLlmEvalResponse(raw: string): LlmEvalResponse | null {
10131014
const riskSummary = parseRiskSummary(obj.risk_summary ?? obj.riskSummary);
10141015
if (riskSummary === null) return null;
10151016

1016-
return normalizeParsedLlmEvalResponse({
1017+
const result = normalizeParsedLlmEvalResponse({
10171018
verdict: verdict as LlmEvalResponse["verdict"],
10181019
confidence: confidence as LlmEvalResponse["confidence"],
10191020
summary,
@@ -1023,4 +1024,12 @@ export function parseLlmEvalResponse(raw: string): LlmEvalResponse | null {
10231024
agenticRiskFindings: agenticRiskFindings ?? undefined,
10241025
riskSummary: riskSummary ?? undefined,
10251026
});
1027+
1028+
const hasIncompleteInspectionSignal =
1029+
obj.incomplete_artifact_inspection === true || obj.incompleteArtifactInspection === true;
1030+
if (hasIncompleteInspectionSignal) {
1031+
return { ...result, incompleteArtifactInspection: true };
1032+
}
1033+
1034+
return result;
10261035
}

0 commit comments

Comments
 (0)