Skip to content

Commit 7727aae

Browse files
committed
Harden orchestrator review verdict prompt
1 parent 18361db commit 7727aae

3 files changed

Lines changed: 27 additions & 6 deletions

File tree

.github/AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ State lives in the master issue body as a hidden HTML comment:
8686
### Phases
8787

8888
- **DEV**: Initial agent run. Agent creates a PR. Transition to `REVIEW`.
89-
- **REVIEW**: Agent reviews the PR. If approved and no `auto-merge` label, transition to `HUMAN_REVIEW`. If `auto-merge`, transition to `MERGE`. If changes requested, transition to `FEEDBACK`.
89+
- **REVIEW**: Agent reviews the PR. The orchestrator posts `/oc review` with explicit instructions requiring the final line to be exactly `Verdict: approve` or `Verdict: request-changes`. If approved and no `auto-merge` label, transition to `HUMAN_REVIEW`. If `auto-merge`, transition to `MERGE`. If changes requested, transition to `FEEDBACK`.
9090
- **FEEDBACK**: Agent addresses review comments. Transition to `REVIEW`.
9191
- **HUMAN_REVIEW**: Orchestrator waits for native GitHub PR review from a human maintainer. Approval → `MERGE`, Request changes → `FEEDBACK`.
9292
- **MERGE**: Orchestrator squashes the PR and deletes the branch inline.

.github/scripts/orchestrator.js

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ module.exports = async ({ github, context, core }) => {
4646
return null;
4747
}
4848

49+
function getReviewPrompt() {
50+
return '/oc review\n\nYou are reviewing a pull request for the orchestrator. Your final line must be exactly one of:\nVerdict: approve\nVerdict: request-changes\n\nUse "Verdict: request-changes" if the PR needs code changes. Use "Verdict: approve" only if the PR is correct and ready for the next orchestrator phase.';
51+
}
52+
4953
async function getAgentReviewVerdict(prNumber) {
5054
const comments = await github.rest.issues.listComments({
5155
owner: context.repo.owner,
@@ -410,7 +414,7 @@ module.exports = async ({ github, context, core }) => {
410414
core.info(`Orchestrator already in progress. Retriggering current task #${getTaskIssueNumber(state.current_task)}`);
411415
const target = isMultiStepTask(state.current_task) && state.current_task.pr ? state.current_task.pr : getTaskIssueNumber(state.current_task);
412416
const msg = isMultiStepTask(state.current_task) && state.current_task.phase === 'FEEDBACK' ? "/oc fix Address review feedback."
413-
: isMultiStepTask(state.current_task) && state.current_task.phase === 'REVIEW' ? "/oc review"
417+
: isMultiStepTask(state.current_task) && state.current_task.phase === 'REVIEW' ? getReviewPrompt()
414418
: "/oc Please proceed with this task.";
415419
await triggerTask(target, msg);
416420
}
@@ -491,7 +495,7 @@ module.exports = async ({ github, context, core }) => {
491495
task.pr = foundPr;
492496
task.phase = 'REVIEW';
493497
task.retries = 0;
494-
await triggerTask(task.pr, "/oc review");
498+
await triggerTask(task.pr, getReviewPrompt());
495499
} else {
496500
// No PR found, treat as completed
497501
state.completed.push(task.issue);
@@ -559,7 +563,7 @@ module.exports = async ({ github, context, core }) => {
559563
} else {
560564
task.phase = 'REVIEW';
561565
task.retries = 0;
562-
await triggerTask(task.pr, "/oc review");
566+
await triggerTask(task.pr, getReviewPrompt());
563567
}
564568
}
565569
break;
@@ -576,7 +580,7 @@ module.exports = async ({ github, context, core }) => {
576580
isDone = true;
577581
} else {
578582
const target = task.pr ? task.pr : task.issue;
579-
const msg = task.phase === 'FEEDBACK' ? "/oc fix Address review feedback." : task.phase === 'REVIEW' ? "/oc review" : "/oc Please proceed with this task.";
583+
const msg = task.phase === 'FEEDBACK' ? "/oc fix Address review feedback." : task.phase === 'REVIEW' ? getReviewPrompt() : "/oc Please proceed with this task.";
580584
await triggerTask(target, msg);
581585
await updateMasterIssue(masterIssueNumber, state);
582586
return;

.github/scripts/tests/orchestrator.test.js

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ function createMocks(eventName = 'issue_comment', commentBody = '/orchestrate')
1111
};
1212

1313
const calls = { getIssue: [], updateIssue: [], createComment: [], listComments: [], addLabels: [], removeLabel: [] };
14+
const updateIssueDetails = [];
1415
let masterIssueBody = `- [ ] #2\n- [ ] #3`;
1516
let currentTaskData = { labels: [], state: 'open' };
1617
let prData = { labels: [], state: 'open', head: { ref: 'opencode/issue2-fix' } };
@@ -26,6 +27,7 @@ function createMocks(eventName = 'issue_comment', commentBody = '/orchestrate')
2627
},
2728
update: async ({ issue_number, body, state }) => {
2829
calls.updateIssue.push(issue_number);
30+
updateIssueDetails.push({ issue_number, body, state });
2931
if (issue_number === 1 && body) masterIssueBody = body;
3032
},
3133
createComment: async ({ issue_number, body }) => {
@@ -69,7 +71,7 @@ function createMocks(eventName = 'issue_comment', commentBody = '/orchestrate')
6971
};
7072
}
7173

72-
return { core, github, context, logs, calls, setMasterBody: (b) => masterIssueBody = b, setTaskData: (d) => currentTaskData = d };
74+
return { core, github, context, logs, calls, updateIssueDetails, setMasterBody: (b) => masterIssueBody = b, setTaskData: (d) => currentTaskData = d };
7375
}
7476

7577
test('orchestrator.js test suite', async (t) => {
@@ -105,6 +107,21 @@ test('orchestrator.js test suite', async (t) => {
105107
assert.strictEqual(calls.createComment.length, 1, 'Should trigger review phase');
106108
assert.strictEqual(calls.createComment[0].issue_number, 99, 'Triggers on PR #99');
107109
assert.ok(calls.createComment[0].body.includes('/oc review'));
110+
assert.ok(calls.createComment[0].body.includes('Verdict: approve'));
111+
assert.ok(calls.createComment[0].body.includes('Verdict: request-changes'));
112+
});
113+
114+
await t.test('Review phase accepts explicit approve verdict', async () => {
115+
const { core, github, context, calls, updateIssueDetails, setMasterBody, setTaskData } = createMocks('workflow_run');
116+
117+
setMasterBody(`<!-- ORCHESTRATOR_STATE:\n{ "status": "IN_PROGRESS", "current_task": { "issue": 2, "pr": 99, "phase": "REVIEW", "turn": 0, "max_turns": 3, "retries": 0 }, "queue": [3], "completed": [], "failed": [], "order": [2, 3] }\n-->`);
118+
setTaskData({ labels: [], state: 'open' });
119+
github.rest.issues.listComments = async () => ({ data: [{ body: 'Verdict: approve' }] });
120+
121+
await runOrchestrator({ github, context, core });
122+
123+
assert.ok(calls.createComment.some(c => c.issue_number === 99 && c.body.includes('Agent review passed')));
124+
assert.ok(updateIssueDetails.some(u => u.issue_number === 1 && u.body.includes('HUMAN_REVIEW')));
108125
});
109126

110127
await t.test('Workflow run handles task failure (retries)', async () => {

0 commit comments

Comments
 (0)