Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/codex/prompts/pr-labels.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
You are Codex running in CI to propose labels for a pull request in the openai-agents-python repository.

Inputs:
- PR context: .tmp/pr-labels/pr-context.json
- PR diff: .tmp/pr-labels/changes.diff
- Changed files: .tmp/pr-labels/changed-files.txt

Task:
- Inspect the diff and changed files.
- Inspect the PR context, diff, and changed files.
- Output JSON with a single top-level key: "labels" (array of strings).
- Only use labels from the allowed list.
- Prefer false negatives over false positives. If you are unsure, leave the label out.
Expand Down Expand Up @@ -53,7 +54,7 @@ Label rules:
- feature:voice: Voice pipeline behavior is a primary deliverable of the PR.

Decision process:
1. Determine the PR's primary intent in one sentence from the title and dominant runtime diff.
1. Determine the PR's primary intent in one sentence from the PR title/body and dominant runtime diff.
2. Start with zero labels.
3. Add `bug` or `enhancement` conservatively.
4. Add only the minimum `feature:*` labels needed to describe the primary surface area.
Expand Down
29 changes: 29 additions & 0 deletions .github/codex/schemas/pr-labels.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"type": "object",
"additionalProperties": false,
"required": ["labels"],
"properties": {
"labels": {
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"enum": [
"documentation",
"project",
"bug",
"enhancement",
"dependencies",
"feature:chat-completions",
"feature:core",
"feature:lite-llm",
"feature:mcp",
"feature:realtime",
"feature:sessions",
"feature:tracing",
"feature:voice"
]
}
}
}
}
57 changes: 46 additions & 11 deletions .github/scripts/pr_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@
"feature:voice",
}

DETERMINISTIC_LABELS: Final[set[str]] = {
"documentation",
"project",
"dependencies",
}

MODEL_ONLY_LABELS: Final[set[str]] = {
"bug",
"enhancement",
}

FEATURE_LABELS: Final[set[str]] = ALLOWED_LABELS - DETERMINISTIC_LABELS - MODEL_ONLY_LABELS

SOURCE_FEATURE_PREFIXES: Final[dict[str, tuple[str, ...]]] = {
"feature:realtime": ("src/agents/realtime/",),
"feature:voice": ("src/agents/voice/",),
Expand Down Expand Up @@ -201,27 +214,30 @@ def load_json(path: pathlib.Path) -> Any:
return json.loads(path.read_text())


def load_codex_labels(path: pathlib.Path) -> list[str]:
def load_codex_labels(path: pathlib.Path) -> tuple[list[str], bool]:
if not path.exists():
return []
return [], False

raw = path.read_text().strip()
if not raw:
return []
return [], False

try:
payload = load_json(path)
except json.JSONDecodeError:
return []
return [], False

if not isinstance(payload, dict):
return []
return [], False

labels = payload.get("labels", [])
labels = payload.get("labels")
if not isinstance(labels, list):
return []
return [], False

return [label for label in labels if isinstance(label, str)]
if not all(isinstance(label, str) for label in labels):
return [], False

return list(labels), True


def fetch_existing_labels(pr_number: str) -> set[str]:
Expand All @@ -237,6 +253,7 @@ def compute_desired_labels(
changed_files: Sequence[str],
diff_text: str,
codex_ran: bool,
codex_output_valid: bool,
codex_labels: Sequence[str],
base_sha: str | None,
head_sha: str | None,
Expand All @@ -257,7 +274,7 @@ def compute_desired_labels(
if dependencies_allowed:
desired.add("dependencies")

if codex_ran:
if codex_ran and codex_output_valid:
for label in codex_labels:
if label == "dependencies" and not dependencies_allowed:
continue
Expand All @@ -269,6 +286,13 @@ def compute_desired_labels(
return desired


def compute_managed_labels(*, codex_ran: bool, codex_output_valid: bool) -> set[str]:
managed = DETERMINISTIC_LABELS | FEATURE_LABELS
if codex_ran and codex_output_valid:
managed |= MODEL_ONLY_LABELS
return managed


def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--pr-number", default=os.environ.get("PR_NUMBER", ""))
Expand Down Expand Up @@ -308,18 +332,29 @@ def main(argv: Sequence[str] | None = None) -> int:
]

diff_text = changes_diff_path.read_text() if changes_diff_path.exists() else ""
codex_labels, codex_output_valid = load_codex_labels(codex_output_path)
if codex_ran and not codex_output_valid:
print(
"Codex output missing or invalid; using fallback feature labels and preserving "
"model-only labels."
)
desired = compute_desired_labels(
changed_files=changed_files,
diff_text=diff_text,
codex_ran=codex_ran,
codex_labels=load_codex_labels(codex_output_path),
codex_output_valid=codex_output_valid,
codex_labels=codex_labels,
base_sha=args.base_sha or None,
head_sha=args.head_sha or None,
)

existing = fetch_existing_labels(args.pr_number)
managed_labels = compute_managed_labels(
codex_ran=codex_ran,
codex_output_valid=codex_output_valid,
)
to_add = sorted(desired - existing)
to_remove = sorted((existing & ALLOWED_LABELS) - desired)
to_remove = sorted((existing & managed_labels) - desired)

if not to_add and not to_remove:
print("Labels already up to date.")
Expand Down
31 changes: 28 additions & 3 deletions .github/workflows/pr-labels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ jobs:
core.setOutput('head_sha', pr.head.sha);
core.setOutput('head_repo', headRepo);
core.setOutput('is_fork', headRepo !== repoFullName);
core.setOutput('title', pr.title || '');
core.setOutput('body', pr.body || '');

- name: Checkout base
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
Expand All @@ -81,14 +83,36 @@ jobs:
"https://github.com/${PR_HEAD_REPO}.git" \
"${PR_HEAD_SHA}"
- name: Collect PR diff
id: diff
env:
PR_BASE_SHA: ${{ steps.pr.outputs.base_sha }}
PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
PR_TITLE: ${{ steps.pr.outputs.title }}
PR_BODY: ${{ steps.pr.outputs.body }}
run: |
set -euo pipefail
mkdir -p .tmp/pr-labels
git diff --name-only "$PR_BASE_SHA" "$PR_HEAD_SHA" > .tmp/pr-labels/changed-files.txt
git diff "$PR_BASE_SHA" "$PR_HEAD_SHA" > .tmp/pr-labels/changes.diff
diff_base_sha="$(git merge-base "$PR_BASE_SHA" "$PR_HEAD_SHA")"
echo "diff_base_sha=${diff_base_sha}" >> "$GITHUB_OUTPUT"
git diff --name-only "$diff_base_sha" "$PR_HEAD_SHA" > .tmp/pr-labels/changed-files.txt
git diff "$diff_base_sha" "$PR_HEAD_SHA" > .tmp/pr-labels/changes.diff
python - <<'PY'
import json
import os
import pathlib

pathlib.Path(".tmp/pr-labels/pr-context.json").write_text(
json.dumps(
{
"title": os.environ.get("PR_TITLE", ""),
"body": os.environ.get("PR_BODY", ""),
},
ensure_ascii=False,
indent=2,
)
+ "\n"
)
PY
- name: Prepare Codex output
id: codex-output
run: |
Expand All @@ -105,13 +129,14 @@ jobs:
openai-api-key: ${{ secrets.PROD_OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/pr-labels.md
output-file: ${{ steps.codex-output.outputs.output_file }}
output-schema-file: .github/codex/schemas/pr-labels.json
safety-strategy: drop-sudo
sandbox: read-only
- name: Apply labels
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ steps.pr.outputs.pr_number }}
PR_BASE_SHA: ${{ steps.pr.outputs.base_sha }}
PR_BASE_SHA: ${{ steps.diff.outputs.diff_base_sha }}
PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
CODEX_OUTPUT_PATH: ${{ steps.codex-output.outputs.output_file }}
CODEX_CONCLUSION: ${{ steps.run_codex.conclusion }}
Expand Down
23 changes: 23 additions & 0 deletions tests/test_pr_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,32 @@ def test_compute_desired_labels_removes_stale_fallback_labels() -> None:
changed_files=["src/agents/models/chatcmpl_converter.py"],
diff_text="",
codex_ran=False,
codex_output_valid=False,
codex_labels=[],
base_sha=None,
head_sha=None,
)

assert desired == {"feature:chat-completions"}


def test_compute_desired_labels_falls_back_when_codex_output_is_invalid() -> None:
desired = pr_labels.compute_desired_labels(
changed_files=["src/agents/run_internal/approvals.py"],
diff_text="",
codex_ran=True,
codex_output_valid=False,
codex_labels=[],
base_sha=None,
head_sha=None,
)

assert desired == {"feature:core"}


def test_compute_managed_labels_preserves_model_only_labels_without_valid_codex_output() -> None:
managed = pr_labels.compute_managed_labels(codex_ran=True, codex_output_valid=False)

assert "bug" not in managed
assert "enhancement" not in managed
assert "feature:core" in managed
Loading