Skip to content

Commit f1a99b0

Browse files
committed
Module cleanup: simplify finalize, drop retry loops + FORCE_MODULE
1 parent 39082bd commit f1a99b0

5 files changed

Lines changed: 546 additions & 527 deletions

File tree

.github/scripts/module-cleanup/build-cleanup-matrix.py

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,60 @@
11
#!/usr/bin/env python3
2-
"""Build the ordered list of instrumentation modules for this review run.
2+
"""Pick the next instrumentation module for this cleanup run.
33
4-
Reads module list from settings.gradle.kts, filters out already-reviewed
5-
modules (read from the otelbot/module-cleanup-progress branch by the workflow
6-
and passed via REVIEW_PROGRESS), respects the open-PR cap, and writes a
7-
`modules` JSON array + `has_work` flag to $GITHUB_OUTPUT.
4+
Reads the module list from settings.gradle.kts, filters out already-processed
5+
modules (passed via REVIEW_PROGRESS), and emits a single module to walk this
6+
run plus a count of how many unprocessed modules remain after it.
87
9-
The review job processes modules sequentially on a single branch, stopping
10-
after it accumulates at least `FILE_THRESHOLD` modified files, so the list
11-
emitted here is an upper-bound slice the job is allowed to walk through.
8+
The workflow chains itself one module at a time. The finalize step uses
9+
`queue_remaining` to decide whether to self-dispatch or flush the pending
10+
queue into a PR.
1211
1312
Environment variables:
14-
GITHUB_OUTPUT - path to the GitHub Actions output file
15-
GH_TOKEN - token for `gh` CLI (set automatically by the workflow)
16-
REVIEW_PROGRESS - newline-separated list of reviewed module names
17-
(contents of reviewed.txt on the progress branch)
13+
GITHUB_OUTPUT - path to the GitHub Actions output file
14+
GH_TOKEN - token for `gh` CLI (set automatically by the workflow)
15+
REVIEW_PROGRESS - newline-separated list of processed module names
16+
(contents of processed.txt on the memory branch, plus
17+
shorts already in inflight module-cleanup PR bodies)
18+
19+
Outputs (to $GITHUB_OUTPUT):
20+
has_work - "true" if a module was picked, "false" otherwise
21+
short_name - picked module's gradle short name (e.g. "akka-actor:javaagent")
22+
module_dir - picked module's repo-relative directory
23+
queue_remaining - count of unprocessed modules left AFTER this one
1824
"""
1925

20-
import json
2126
import os
2227
import re
2328
import subprocess
2429
from pathlib import Path
2530

2631
SETTINGS_FILE = "settings.gradle.kts"
27-
# Skip the run entirely if at least this many automated review PRs are already open.
32+
# Skip the run entirely if at least this many module-cleanup PRs are already open.
2833
MAX_OPEN_PRS = 5
29-
# Upper bound on modules the review job will walk through in a single run,
30-
# even if the file-count threshold is never reached. Keeps one run bounded.
31-
MODULE_LIMIT_PER_RUN = 50
3234

3335

3436
def parse_modules() -> list[tuple[str, str]]:
3537
"""Return list of (gradle_name, module_dir) from settings.gradle.kts."""
3638
text = Path(SETTINGS_FILE).read_text(encoding="utf-8")
37-
# Match include(":instrumentation:activej-http:6.0:javaagent")
3839
raw = re.findall(r'include\(":instrumentation:([^"]+)"\)', text)
3940
pairs = []
4041
for entry in sorted(raw):
4142
parts = entry.split(":")
42-
# Skip shared/helper modules (e.g. "cdi-testing") that don't follow the
43-
# <library>:<variant> layout used for real instrumentation modules.
4443
if len(parts) < 2:
4544
continue
4645
module_dir = "instrumentation/" + "/".join(parts)
47-
# Gradle module name: second-to-last:last
4846
gradle_name = f"{parts[-2]}:{parts[-1]}"
4947
pairs.append((gradle_name, module_dir))
5048
return pairs
5149

5250

53-
def load_reviewed() -> set[str]:
54-
"""Load already-reviewed module names from the REVIEW_PROGRESS env var."""
51+
def load_processed() -> set[str]:
52+
"""Load already-processed module names from the REVIEW_PROGRESS env var."""
5553
progress = os.environ.get("REVIEW_PROGRESS", "")
5654
return {line.strip() for line in progress.splitlines() if line.strip()}
5755

5856

5957
def count_open_prs() -> int:
60-
"""Count open PRs with the module cleanup label."""
6158
result = subprocess.run(
6259
["gh", "pr", "list", "--label", "module cleanup",
6360
"--state", "open", "--json", "number", "--jq", "length"],
@@ -67,46 +64,49 @@ def count_open_prs() -> int:
6764

6865

6966
def write_output(key: str, value: str) -> None:
70-
"""Append a key=value to $GITHUB_OUTPUT. Values must not contain newlines."""
7167
assert "\n" not in value, f"multi-line $GITHUB_OUTPUT value not supported: {value!r}"
7268
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f:
7369
f.write(f"{key}={value}\n")
7470

7571

72+
def emit_no_work() -> None:
73+
write_output("has_work", "false")
74+
write_output("short_name", "")
75+
write_output("module_dir", "")
76+
write_output("queue_remaining", "0")
77+
78+
7679
def main() -> None:
7780
all_modules = parse_modules()
7881
print(f"Total instrumentation modules: {len(all_modules)}")
7982

80-
reviewed = load_reviewed()
81-
print(f"Already reviewed: {len(reviewed)}")
83+
processed = load_processed()
84+
print(f"Already processed: {len(processed)}")
8285

83-
remaining = [(name, d) for name, d in all_modules if name not in reviewed]
86+
remaining = [(n, d) for n, d in all_modules if n not in processed]
8487
print(f"Remaining modules: {len(remaining)}")
8588

8689
if not remaining:
87-
print("All modules have been reviewed!")
88-
write_output("has_work", "false")
89-
write_output("modules", "[]")
90+
print("All modules have been processed!")
91+
emit_no_work()
9092
return
9193

9294
open_prs = count_open_prs()
93-
print(f"Open review PRs: {open_prs}")
94-
95+
print(f"Open module-cleanup PRs: {open_prs}")
9596
if open_prs >= MAX_OPEN_PRS:
9697
print(f"PR cap reached ({open_prs} open >= {MAX_OPEN_PRS}). Skipping this cycle.")
97-
write_output("has_work", "false")
98-
write_output("modules", "[]")
98+
emit_no_work()
9999
return
100100

101-
batch = remaining[:MODULE_LIMIT_PER_RUN]
102-
print(f"Dispatching {len(batch)} modules (upper bound for this run)")
103-
104-
modules = [{"short_name": name, "module_dir": d} for name, d in batch]
105-
modules_json = json.dumps(modules)
106-
print(json.dumps(modules, indent=2))
101+
short_name, module_dir = remaining[0]
102+
queue_remaining = len(remaining) - 1
103+
print(f"Picked: {short_name} ({module_dir})")
104+
print(f"Queue remaining after this run: {queue_remaining}")
107105

108106
write_output("has_work", "true")
109-
write_output("modules", modules_json)
107+
write_output("short_name", short_name)
108+
write_output("module_dir", module_dir)
109+
write_output("queue_remaining", str(queue_remaining))
110110

111111

112112
if __name__ == "__main__":
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
# Final action invoked by the LLM agent: format-patch the cleanup commit
3+
# range into /tmp/gh-aw/agent/cleanup.patch so gh-aw's auto-uploader
4+
# includes it in the `agent` workflow artifact. The finalize job then
5+
# downloads that artifact and applies the patch onto module-cleanup-wip.
6+
#
7+
# Idempotent and write-only to /tmp. Does NOT push anything.
8+
#
9+
# Args:
10+
# $1 - module short_name (used for logging only)
11+
12+
set -euo pipefail
13+
14+
SHORT="${1:?short_name argument required}"
15+
OUT_DIR="${OUT_DIR:-/tmp/gh-aw/agent}"
16+
mkdir -p "$OUT_DIR"
17+
18+
if ! git rev-parse --verify origin/main >/dev/null 2>&1; then
19+
git fetch origin main --depth=1 || true
20+
fi
21+
22+
if [ -z "$(git log origin/main..HEAD --oneline 2>/dev/null || true)" ]; then
23+
echo "No commit produced by agent for $SHORT; nothing to export."
24+
exit 0
25+
fi
26+
27+
# Capture every commit the persona made on top of main. The persona is
28+
# expected to produce exactly one commit per its Phase 5 contract, but
29+
# format-patch range-form is robust if it makes more than one.
30+
git format-patch origin/main..HEAD --stdout > "$OUT_DIR/cleanup.patch"
31+
echo "Wrote cleanup patch for $SHORT to $OUT_DIR/cleanup.patch"
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#!/bin/bash
2+
# Finalize: single writer for both module-cleanup-wip and the
3+
# memory/module-cleanup branch. Runs after the agent job (regardless of
4+
# whether the agent succeeded, no-oped, or failed).
5+
#
6+
# Steps:
7+
# 1. Append <short> to memory/module-cleanup:processed.txt; if the
8+
# agent failed, also append to failed.txt. This guarantees a failing
9+
# module is recorded as "processed" (so it isn't retried in a loop)
10+
# AND logged as a failure for diagnostics.
11+
# 2. If the agent produced a cleanup patch, apply it onto
12+
# module-cleanup-wip and push.
13+
# 3. If wip diff vs origin/main has reached FLUSH_THRESHOLD files OR
14+
# the queue is empty, cut a batch branch from wip, open the PR,
15+
# and reset wip back to origin/main.
16+
# 4. Self-dispatch the workflow unless we just opened a PR or the
17+
# queue is empty (cron will pick up later).
18+
#
19+
# No rebase-retry loops on push: the workflow uses
20+
# concurrency.group=module-cleanup with cancel-in-progress=false, so this
21+
# job is the only writer of either branch and runs serialized across
22+
# workflow runs.
23+
#
24+
# Required env:
25+
# GH_TOKEN - token with contents:write, pull-requests:write,
26+
# and actions:write
27+
# GITHUB_REPOSITORY - owner/repo
28+
# SHORT_NAME - the module short_name processed this run
29+
# AGENT_RESULT - github.needs.agent.result ('success'|'failure'|...)
30+
# ARTIFACT_DIR - directory of the downloaded `agent` artifact
31+
# (may or may not contain cleanup.patch)
32+
# QUEUE_REMAINING - count of unprocessed modules left after this one
33+
#
34+
# Optional env:
35+
# FLUSH_THRESHOLD - file count that triggers a PR (default 10)
36+
# WORKFLOW_FILE - workflow file name for self-dispatch
37+
# MEMORY_BRANCH - default: memory/module-cleanup
38+
# WIP_BRANCH - default: module-cleanup-wip
39+
40+
set -euo pipefail
41+
42+
MEMORY_BRANCH="${MEMORY_BRANCH:-memory/module-cleanup}"
43+
WIP_BRANCH="${WIP_BRANCH:-module-cleanup-wip}"
44+
THRESHOLD="${FLUSH_THRESHOLD:-10}"
45+
QUEUE_REMAINING="${QUEUE_REMAINING:-0}"
46+
REPO="${GITHUB_REPOSITORY:?GITHUB_REPOSITORY required}"
47+
WORKFLOW_FILE="${WORKFLOW_FILE:-module-cleanup.lock.yml}"
48+
SHORT="${SHORT_NAME:?SHORT_NAME required}"
49+
AGENT_RESULT="${AGENT_RESULT:-failure}"
50+
ARTIFACT_DIR="${ARTIFACT_DIR:-./agent-artifact}"
51+
52+
git fetch origin main --depth=1
53+
git fetch origin "$MEMORY_BRANCH" --depth=1 2>/dev/null || true
54+
git fetch origin "$WIP_BRANCH" --depth=1 2>/dev/null || true
55+
56+
# ---- 1. Update processed.txt (and failed.txt on failure) ----
57+
58+
MEM_WT=/tmp/memory-wt
59+
rm -rf "$MEM_WT"
60+
if git rev-parse --verify "origin/$MEMORY_BRANCH" >/dev/null 2>&1; then
61+
git worktree add -B "$MEMORY_BRANCH" "$MEM_WT" "origin/$MEMORY_BRANCH"
62+
else
63+
git worktree add --orphan -B "$MEMORY_BRANCH" "$MEM_WT"
64+
rm -rf "$MEM_WT"/*
65+
fi
66+
67+
PROCESSED="$MEM_WT/processed.txt"
68+
FAILED="$MEM_WT/failed.txt"
69+
70+
touch "$PROCESSED"
71+
if ! grep -Fxq "$SHORT" "$PROCESSED"; then
72+
echo "$SHORT" >> "$PROCESSED"
73+
fi
74+
75+
if [ "$AGENT_RESULT" != "success" ]; then
76+
ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
77+
echo -e "$SHORT\t$ts\tagent_result=$AGENT_RESULT" >> "$FAILED"
78+
fi
79+
80+
(
81+
cd "$MEM_WT"
82+
git add -A
83+
if ! git diff --cached --quiet; then
84+
git commit -m "Mark $SHORT processed (agent_result=$AGENT_RESULT)"
85+
git push origin "$MEMORY_BRANCH"
86+
fi
87+
)
88+
89+
# ---- 2. Apply cleanup patch (if any) onto wip ----
90+
91+
PATCH_SRC=""
92+
for candidate in \
93+
"$ARTIFACT_DIR/agent/cleanup.patch" \
94+
"$ARTIFACT_DIR/tmp/gh-aw/agent/cleanup.patch" \
95+
"$ARTIFACT_DIR/cleanup.patch"; do
96+
if [ -f "$candidate" ]; then
97+
PATCH_SRC="$candidate"
98+
echo "Found cleanup patch at $candidate"
99+
break
100+
fi
101+
done
102+
if [ -z "$PATCH_SRC" ]; then
103+
echo "No cleanup.patch (no-op or agent failed before commit)."
104+
fi
105+
106+
WIP_WT=/tmp/wip-wt
107+
rm -rf "$WIP_WT"
108+
if git rev-parse --verify "origin/$WIP_BRANCH" >/dev/null 2>&1; then
109+
git worktree add -B "$WIP_BRANCH" "$WIP_WT" "origin/$WIP_BRANCH"
110+
else
111+
git worktree add -B "$WIP_BRANCH" "$WIP_WT" origin/main
112+
fi
113+
114+
if [ -n "$PATCH_SRC" ]; then
115+
(
116+
cd "$WIP_WT"
117+
if git am --3way "$PATCH_SRC"; then
118+
echo "Applied cleanup for $SHORT to $WIP_BRANCH"
119+
git push origin "$WIP_BRANCH"
120+
else
121+
git am --abort 2>/dev/null || true
122+
echo "FAILED to apply cleanup for $SHORT (rebase conflict)."
123+
ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
124+
(
125+
cd "$MEM_WT"
126+
echo -e "$SHORT\t$ts\tgit am failed (rebase conflict)" >> "$FAILED"
127+
git add -A
128+
git commit -m "Record $SHORT as patch-conflict failure"
129+
git push origin "$MEMORY_BRANCH" || true
130+
)
131+
fi
132+
)
133+
fi
134+
135+
git fetch origin "$WIP_BRANCH" --depth=50 2>/dev/null || true
136+
137+
# ---- 3. Decide flush ----
138+
139+
if git rev-parse --verify "origin/$WIP_BRANCH" >/dev/null 2>&1; then
140+
FILE_COUNT=$(git diff --name-only origin/main "origin/$WIP_BRANCH" | wc -l)
141+
AHEAD=$(git rev-list --count "origin/main..origin/$WIP_BRANCH")
142+
else
143+
FILE_COUNT=0
144+
AHEAD=0
145+
fi
146+
147+
echo "wip ahead of main: $AHEAD commit(s), $FILE_COUNT file(s)"
148+
echo "queue remaining: $QUEUE_REMAINING"
149+
echo "threshold: $THRESHOLD"
150+
151+
SHOULD_FLUSH=false
152+
if [ "$AHEAD" -gt 0 ]; then
153+
if [ "$FILE_COUNT" -ge "$THRESHOLD" ]; then
154+
SHOULD_FLUSH=true
155+
echo "Flushing: file count >= threshold."
156+
elif [ "$QUEUE_REMAINING" -eq 0 ]; then
157+
SHOULD_FLUSH=true
158+
echo "Flushing: queue exhausted."
159+
fi
160+
fi
161+
162+
OPENED_PR=false
163+
if [ "$SHOULD_FLUSH" = "true" ]; then
164+
RUN_ID="${GITHUB_RUN_ID:-$(date -u +%Y%m%d%H%M%S)}"
165+
BATCH_BRANCH="module-cleanup-batch-$RUN_ID"
166+
167+
git push origin "refs/remotes/origin/$WIP_BRANCH:refs/heads/$BATCH_BRANCH"
168+
169+
BODY_FILE=$(mktemp)
170+
{
171+
echo "Automated module-cleanup batch."
172+
echo
173+
echo "## Modules in this batch"
174+
echo
175+
git -C "$WIP_WT" log "origin/main..origin/$WIP_BRANCH" \
176+
--reverse --format='- `%s`' \
177+
| sed 's|^- `Cleanup for |- `|'
178+
echo
179+
echo "---"
180+
echo
181+
git -C "$WIP_WT" log "origin/main..origin/$WIP_BRANCH" \
182+
--reverse --format='## %s%n%n%b%n'
183+
} > "$BODY_FILE"
184+
185+
gh pr create \
186+
--repo "$REPO" \
187+
--base main \
188+
--head "$BATCH_BRANCH" \
189+
--title "Module cleanup: batch (run $RUN_ID)" \
190+
--body-file "$BODY_FILE" \
191+
--label "module cleanup"
192+
193+
git push --force origin "origin/main:refs/heads/$WIP_BRANCH"
194+
195+
OPENED_PR=true
196+
fi
197+
198+
# ---- 4. Self-dispatch ----
199+
200+
if [ "$OPENED_PR" = "true" ]; then
201+
echo "Opened a PR; cron will resume the chain on its next tick."
202+
elif [ "$QUEUE_REMAINING" -le 0 ]; then
203+
echo "Queue empty; nothing to dispatch."
204+
else
205+
echo "Self-dispatching workflow for next module."
206+
gh workflow run "$WORKFLOW_FILE" --repo "$REPO" --ref main
207+
fi
208+
209+
git worktree remove --force "$MEM_WT" 2>/dev/null || true
210+
git worktree remove --force "$WIP_WT" 2>/dev/null || true

0 commit comments

Comments
 (0)