Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9fad948
Convert module cleanup to GitHub agentic workflow
trask May 6, 2026
163ae89
Fork-test tweaks: workflow_dispatch only, fork repo guard, drop otelb…
trask May 6, 2026
898a366
Test: disable AWF sandbox to bypass api-proxy gpt-5.5 routing issue
trask May 6, 2026
39082bd
Make create_pull_request mandatory whenever commits exist
trask May 6, 2026
f1a99b0
Module cleanup: simplify finalize, drop retry loops + FORCE_MODULE
trask May 7, 2026
f94b42c
Move MODULE env from frontmatter to GITHUB_ENV in pre-LLM step
trask May 7, 2026
928329c
Fix: absolute patch path before cd into wip worktree
trask May 7, 2026
836e47a
finalize: don't abort on gh pr create failure
trask May 7, 2026
ddd36e5
Per-chain wip branch (module-cleanup-wip-<chain_id>)
trask May 7, 2026
434666a
Remove stray invalid.yml artifact
trask May 7, 2026
3395506
up
trask May 7, 2026
d6d086a
Adopt orphan wip branches; delete wip on PR open
trask May 7, 2026
302e9d2
Switch to fixed module-cleanup-wip branch (atomic rename on flush)
trask May 7, 2026
e6dff9d
Cleanup for apache-dubbo-2.7:testing
otelbot[bot] May 8, 2026
3efd266
Cleanup for apache-httpclient-2.0:javaagent
otelbot[bot] May 8, 2026
0773a6f
Cleanup for apache-httpclient-5.0:javaagent
otelbot[bot] May 8, 2026
649d185
Cleanup for armeria-1.3:library
otelbot[bot] May 8, 2026
029ae50
Cleanup for aws-lambda-core-1.0:javaagent
otelbot[bot] May 8, 2026
b2a3a03
Cleanup for aws-lambda-events-2.2:javaagent
otelbot[bot] May 8, 2026
cbf4685
Cleanup for aws-lambda-events-common-2.2:library
otelbot[bot] May 8, 2026
2875f05
Cleanup for aws-sdk-1.11:library
otelbot[bot] May 8, 2026
bfa120c
Cleanup for aws-sdk-1.11:library-autoconfigure
otelbot[bot] May 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
*.cmd text eol=crlf

licenses/** linguist-generated

.github/workflows/*.lock.yml linguist-generated=true merge=ours
14 changes: 14 additions & 0 deletions .github/aw/actions-lock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"entries": {
"actions/github-script@v9.0.0": {
"repo": "actions/github-script",
"version": "v9.0.0",
"sha": "3a2844b7e9c422d3c10d287c895573f7108da1b3"
},
"github/gh-aw-actions/setup@v0.71.5": {
"repo": "github/gh-aw-actions/setup",
"version": "v0.71.5",
"sha": "b8068426813005612b960b5ab0b8bd2c27142323"
}
}
}
84 changes: 42 additions & 42 deletions .github/scripts/module-cleanup/build-cleanup-matrix.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,60 @@
#!/usr/bin/env python3
"""Build the ordered list of instrumentation modules for this review run.
"""Pick the next instrumentation module for this cleanup run.

Reads module list from settings.gradle.kts, filters out already-reviewed
modules (read from the otelbot/module-cleanup-progress branch by the workflow
and passed via REVIEW_PROGRESS), respects the open-PR cap, and writes a
`modules` JSON array + `has_work` flag to $GITHUB_OUTPUT.
Reads the module list from settings.gradle.kts, filters out already-processed
modules (passed via REVIEW_PROGRESS), and emits a single module to walk this
run plus a count of how many unprocessed modules remain after it.

The review job processes modules sequentially on a single branch, stopping
after it accumulates at least `FILE_THRESHOLD` modified files, so the list
emitted here is an upper-bound slice the job is allowed to walk through.
The workflow chains itself one module at a time. The finalize step uses
`queue_remaining` to decide whether to self-dispatch or flush the pending
queue into a PR.

Environment variables:
GITHUB_OUTPUT - path to the GitHub Actions output file
GH_TOKEN - token for `gh` CLI (set automatically by the workflow)
REVIEW_PROGRESS - newline-separated list of reviewed module names
(contents of reviewed.txt on the progress branch)
GITHUB_OUTPUT - path to the GitHub Actions output file
GH_TOKEN - token for `gh` CLI (set automatically by the workflow)
REVIEW_PROGRESS - newline-separated list of processed module names
(contents of processed.txt on the memory branch, plus
shorts already in inflight module-cleanup PR bodies)

Outputs (to $GITHUB_OUTPUT):
has_work - "true" if a module was picked, "false" otherwise
short_name - picked module's gradle short name (e.g. "akka-actor:javaagent")
module_dir - picked module's repo-relative directory
queue_remaining - count of unprocessed modules left AFTER this one
"""

import json
import os
import re
import subprocess
from pathlib import Path

SETTINGS_FILE = "settings.gradle.kts"
# Skip the run entirely if at least this many automated review PRs are already open.
# Skip the run entirely if at least this many module-cleanup PRs are already open.
MAX_OPEN_PRS = 5
# Upper bound on modules the review job will walk through in a single run,
# even if the file-count threshold is never reached. Keeps one run bounded.
MODULE_LIMIT_PER_RUN = 50


def parse_modules() -> list[tuple[str, str]]:
"""Return list of (gradle_name, module_dir) from settings.gradle.kts."""
text = Path(SETTINGS_FILE).read_text(encoding="utf-8")
# Match include(":instrumentation:activej-http:6.0:javaagent")
raw = re.findall(r'include\(":instrumentation:([^"]+)"\)', text)
pairs = []
for entry in sorted(raw):
parts = entry.split(":")
# Skip shared/helper modules (e.g. "cdi-testing") that don't follow the
# <library>:<variant> layout used for real instrumentation modules.
if len(parts) < 2:
continue
module_dir = "instrumentation/" + "/".join(parts)
# Gradle module name: second-to-last:last
gradle_name = f"{parts[-2]}:{parts[-1]}"
pairs.append((gradle_name, module_dir))
return pairs


def load_reviewed() -> set[str]:
"""Load already-reviewed module names from the REVIEW_PROGRESS env var."""
def load_processed() -> set[str]:
"""Load already-processed module names from the REVIEW_PROGRESS env var."""
progress = os.environ.get("REVIEW_PROGRESS", "")
return {line.strip() for line in progress.splitlines() if line.strip()}


def count_open_prs() -> int:
"""Count open PRs with the module cleanup label."""
result = subprocess.run(
["gh", "pr", "list", "--label", "module cleanup",
"--state", "open", "--json", "number", "--jq", "length"],
Expand All @@ -67,46 +64,49 @@ def count_open_prs() -> int:


def write_output(key: str, value: str) -> None:
"""Append a key=value to $GITHUB_OUTPUT. Values must not contain newlines."""
assert "\n" not in value, f"multi-line $GITHUB_OUTPUT value not supported: {value!r}"
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f:
f.write(f"{key}={value}\n")


def emit_no_work() -> None:
write_output("has_work", "false")
write_output("short_name", "")
write_output("module_dir", "")
write_output("queue_remaining", "0")


def main() -> None:
all_modules = parse_modules()
print(f"Total instrumentation modules: {len(all_modules)}")

reviewed = load_reviewed()
print(f"Already reviewed: {len(reviewed)}")
processed = load_processed()
print(f"Already processed: {len(processed)}")

remaining = [(name, d) for name, d in all_modules if name not in reviewed]
remaining = [(n, d) for n, d in all_modules if n not in processed]
print(f"Remaining modules: {len(remaining)}")

if not remaining:
print("All modules have been reviewed!")
write_output("has_work", "false")
write_output("modules", "[]")
print("All modules have been processed!")
emit_no_work()
return

open_prs = count_open_prs()
print(f"Open review PRs: {open_prs}")

print(f"Open module-cleanup PRs: {open_prs}")
if open_prs >= MAX_OPEN_PRS:
print(f"PR cap reached ({open_prs} open >= {MAX_OPEN_PRS}). Skipping this cycle.")
write_output("has_work", "false")
write_output("modules", "[]")
emit_no_work()
return

batch = remaining[:MODULE_LIMIT_PER_RUN]
print(f"Dispatching {len(batch)} modules (upper bound for this run)")

modules = [{"short_name": name, "module_dir": d} for name, d in batch]
modules_json = json.dumps(modules)
print(json.dumps(modules, indent=2))
short_name, module_dir = remaining[0]
queue_remaining = len(remaining) - 1
print(f"Picked: {short_name} ({module_dir})")
print(f"Queue remaining after this run: {queue_remaining}")

write_output("has_work", "true")
write_output("modules", modules_json)
write_output("short_name", short_name)
write_output("module_dir", module_dir)
write_output("queue_remaining", str(queue_remaining))


if __name__ == "__main__":
Expand Down
Loading