diff --git a/.github/codex/prompts/pr-labels.md b/.github/codex/prompts/pr-labels.md index 4e47c93612..d1f3d73a5e 100644 --- a/.github/codex/prompts/pr-labels.md +++ b/.github/codex/prompts/pr-labels.md @@ -3,11 +3,12 @@ You are Codex running in CI to propose labels for a pull request in the openai-agents-python repository. Inputs: +- PR context: .tmp/pr-labels/pr-context.json - PR diff: .tmp/pr-labels/changes.diff - Changed files: .tmp/pr-labels/changed-files.txt Task: -- Inspect the diff and changed files. +- Inspect the PR context, diff, and changed files. - Output JSON with a single top-level key: "labels" (array of strings). - Only use labels from the allowed list. - Prefer false negatives over false positives. If you are unsure, leave the label out. @@ -53,7 +54,7 @@ Label rules: - feature:voice: Voice pipeline behavior is a primary deliverable of the PR. Decision process: -1. Determine the PR's primary intent in one sentence from the title and dominant runtime diff. +1. Determine the PR's primary intent in one sentence from the PR title/body and dominant runtime diff. 2. Start with zero labels. 3. Add `bug` or `enhancement` conservatively. 4. Add only the minimum `feature:*` labels needed to describe the primary surface area. diff --git a/.github/codex/schemas/pr-labels.json b/.github/codex/schemas/pr-labels.json new file mode 100644 index 0000000000..5a1249a48a --- /dev/null +++ b/.github/codex/schemas/pr-labels.json @@ -0,0 +1,29 @@ +{ + "type": "object", + "additionalProperties": false, + "required": ["labels"], + "properties": { + "labels": { + "type": "array", + "uniqueItems": true, + "items": { + "type": "string", + "enum": [ + "documentation", + "project", + "bug", + "enhancement", + "dependencies", + "feature:chat-completions", + "feature:core", + "feature:lite-llm", + "feature:mcp", + "feature:realtime", + "feature:sessions", + "feature:tracing", + "feature:voice" + ] + } + } + } +} diff --git a/.github/scripts/pr_labels.py b/.github/scripts/pr_labels.py index b1d7e720d9..037f62f9be 100644 --- a/.github/scripts/pr_labels.py +++ b/.github/scripts/pr_labels.py @@ -26,6 +26,19 @@ "feature:voice", } +DETERMINISTIC_LABELS: Final[set[str]] = { + "documentation", + "project", + "dependencies", +} + +MODEL_ONLY_LABELS: Final[set[str]] = { + "bug", + "enhancement", +} + +FEATURE_LABELS: Final[set[str]] = ALLOWED_LABELS - DETERMINISTIC_LABELS - MODEL_ONLY_LABELS + SOURCE_FEATURE_PREFIXES: Final[dict[str, tuple[str, ...]]] = { "feature:realtime": ("src/agents/realtime/",), "feature:voice": ("src/agents/voice/",), @@ -201,27 +214,30 @@ def load_json(path: pathlib.Path) -> Any: return json.loads(path.read_text()) -def load_codex_labels(path: pathlib.Path) -> list[str]: +def load_codex_labels(path: pathlib.Path) -> tuple[list[str], bool]: if not path.exists(): - return [] + return [], False raw = path.read_text().strip() if not raw: - return [] + return [], False try: payload = load_json(path) except json.JSONDecodeError: - return [] + return [], False if not isinstance(payload, dict): - return [] + return [], False - labels = payload.get("labels", []) + labels = payload.get("labels") if not isinstance(labels, list): - return [] + return [], False - return [label for label in labels if isinstance(label, str)] + if not all(isinstance(label, str) for label in labels): + return [], False + + return list(labels), True def fetch_existing_labels(pr_number: str) -> set[str]: @@ -237,6 +253,7 @@ def compute_desired_labels( changed_files: Sequence[str], diff_text: str, codex_ran: bool, + codex_output_valid: bool, codex_labels: Sequence[str], base_sha: str | None, head_sha: str | None, @@ -257,7 +274,7 @@ def compute_desired_labels( if dependencies_allowed: desired.add("dependencies") - if codex_ran: + if codex_ran and codex_output_valid: for label in codex_labels: if label == "dependencies" and not dependencies_allowed: continue @@ -269,6 +286,13 @@ def compute_desired_labels( return desired +def compute_managed_labels(*, codex_ran: bool, codex_output_valid: bool) -> set[str]: + managed = DETERMINISTIC_LABELS | FEATURE_LABELS + if codex_ran and codex_output_valid: + managed |= MODEL_ONLY_LABELS + return managed + + def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--pr-number", default=os.environ.get("PR_NUMBER", "")) @@ -308,18 +332,29 @@ def main(argv: Sequence[str] | None = None) -> int: ] diff_text = changes_diff_path.read_text() if changes_diff_path.exists() else "" + codex_labels, codex_output_valid = load_codex_labels(codex_output_path) + if codex_ran and not codex_output_valid: + print( + "Codex output missing or invalid; using fallback feature labels and preserving " + "model-only labels." + ) desired = compute_desired_labels( changed_files=changed_files, diff_text=diff_text, codex_ran=codex_ran, - codex_labels=load_codex_labels(codex_output_path), + codex_output_valid=codex_output_valid, + codex_labels=codex_labels, base_sha=args.base_sha or None, head_sha=args.head_sha or None, ) existing = fetch_existing_labels(args.pr_number) + managed_labels = compute_managed_labels( + codex_ran=codex_ran, + codex_output_valid=codex_output_valid, + ) to_add = sorted(desired - existing) - to_remove = sorted((existing & ALLOWED_LABELS) - desired) + to_remove = sorted((existing & managed_labels) - desired) if not to_add and not to_remove: print("Labels already up to date.") diff --git a/.github/workflows/pr-labels.yml b/.github/workflows/pr-labels.yml index 6a3bf52c8b..0d73ade160 100644 --- a/.github/workflows/pr-labels.yml +++ b/.github/workflows/pr-labels.yml @@ -65,6 +65,8 @@ jobs: core.setOutput('head_sha', pr.head.sha); core.setOutput('head_repo', headRepo); core.setOutput('is_fork', headRepo !== repoFullName); + core.setOutput('title', pr.title || ''); + core.setOutput('body', pr.body || ''); - name: Checkout base uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd @@ -81,14 +83,36 @@ jobs: "https://github.com/${PR_HEAD_REPO}.git" \ "${PR_HEAD_SHA}" - name: Collect PR diff + id: diff env: PR_BASE_SHA: ${{ steps.pr.outputs.base_sha }} PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + PR_TITLE: ${{ steps.pr.outputs.title }} + PR_BODY: ${{ steps.pr.outputs.body }} run: | set -euo pipefail mkdir -p .tmp/pr-labels - git diff --name-only "$PR_BASE_SHA" "$PR_HEAD_SHA" > .tmp/pr-labels/changed-files.txt - git diff "$PR_BASE_SHA" "$PR_HEAD_SHA" > .tmp/pr-labels/changes.diff + diff_base_sha="$(git merge-base "$PR_BASE_SHA" "$PR_HEAD_SHA")" + echo "diff_base_sha=${diff_base_sha}" >> "$GITHUB_OUTPUT" + git diff --name-only "$diff_base_sha" "$PR_HEAD_SHA" > .tmp/pr-labels/changed-files.txt + git diff "$diff_base_sha" "$PR_HEAD_SHA" > .tmp/pr-labels/changes.diff + python - <<'PY' + import json + import os + import pathlib + + pathlib.Path(".tmp/pr-labels/pr-context.json").write_text( + json.dumps( + { + "title": os.environ.get("PR_TITLE", ""), + "body": os.environ.get("PR_BODY", ""), + }, + ensure_ascii=False, + indent=2, + ) + + "\n" + ) + PY - name: Prepare Codex output id: codex-output run: | @@ -105,13 +129,14 @@ jobs: openai-api-key: ${{ secrets.PROD_OPENAI_API_KEY }} prompt-file: .github/codex/prompts/pr-labels.md output-file: ${{ steps.codex-output.outputs.output_file }} + output-schema-file: .github/codex/schemas/pr-labels.json safety-strategy: drop-sudo sandbox: read-only - name: Apply labels env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} PR_NUMBER: ${{ steps.pr.outputs.pr_number }} - PR_BASE_SHA: ${{ steps.pr.outputs.base_sha }} + PR_BASE_SHA: ${{ steps.diff.outputs.diff_base_sha }} PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }} CODEX_OUTPUT_PATH: ${{ steps.codex-output.outputs.output_file }} CODEX_CONCLUSION: ${{ steps.run_codex.conclusion }} diff --git a/tests/test_pr_labels.py b/tests/test_pr_labels.py index fa834bc8bc..fd0b164d8f 100644 --- a/tests/test_pr_labels.py +++ b/tests/test_pr_labels.py @@ -43,9 +43,32 @@ def test_compute_desired_labels_removes_stale_fallback_labels() -> None: changed_files=["src/agents/models/chatcmpl_converter.py"], diff_text="", codex_ran=False, + codex_output_valid=False, codex_labels=[], base_sha=None, head_sha=None, ) assert desired == {"feature:chat-completions"} + + +def test_compute_desired_labels_falls_back_when_codex_output_is_invalid() -> None: + desired = pr_labels.compute_desired_labels( + changed_files=["src/agents/run_internal/approvals.py"], + diff_text="", + codex_ran=True, + codex_output_valid=False, + codex_labels=[], + base_sha=None, + head_sha=None, + ) + + assert desired == {"feature:core"} + + +def test_compute_managed_labels_preserves_model_only_labels_without_valid_codex_output() -> None: + managed = pr_labels.compute_managed_labels(codex_ran=True, codex_output_valid=False) + + assert "bug" not in managed + assert "enhancement" not in managed + assert "feature:core" in managed