Skip to content

Commit cc0e33d

Browse files
authored
fix(usage-metrics): replace the per-version inline retry with a multi-pass approach (#3620)
* fix(usage-metrics): replace the per-version inline retry with a multi-pass approach Try all versions in a first pass, collect failures, wait for the rate limit window to reset, then retry only the failed versions. Repeat until all succeed or a max number of passes is hit. This avoids cascading failures where retries for one version burn rate budget for the next. * chore(usage-data): manually trigger the workflow contributes to an existing PR * fix: deduplicate versions and only retry retryable errors * fix: preserve collected metrics when checking out existing branch
1 parent 440ee5c commit cc0e33d

2 files changed

Lines changed: 158 additions & 71 deletions

File tree

.github/workflows/usage-metrics.yml

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
# Query all versions in a single command
6565
go run collect-metrics.go $VERSION_FLAGS -csv "../docs/usage-metrics.csv"
6666
67-
- name: Create Pull Request
67+
- name: Create or update Pull Request
6868
run: |
6969
git config user.name "github-actions[bot]"
7070
git config user.email "github-actions[bot]@users.noreply.github.com"
@@ -75,16 +75,41 @@ jobs:
7575
exit 0
7676
fi
7777
78-
# Create a new branch for the PR
7978
DATE=$(date +%Y-%m-%d)
80-
BRANCH_NAME="chore/update-usage-metrics-$DATE"
81-
git checkout -b "$BRANCH_NAME"
79+
MONTH=$(date +%Y-%m)
80+
BRANCH_NAME="chore/update-usage-metrics-$MONTH"
81+
82+
# Check if the branch already exists on the remote
83+
if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
84+
echo "Branch $BRANCH_NAME already exists, pushing new commit to it"
85+
# Preserve the newly collected metrics before switching branches
86+
cp docs/usage-metrics.csv /tmp/usage-metrics.csv
87+
git fetch origin "$BRANCH_NAME"
88+
git checkout "$BRANCH_NAME"
89+
# Restore the newly collected metrics on top of the existing branch
90+
cp /tmp/usage-metrics.csv docs/usage-metrics.csv
91+
git add docs/usage-metrics.csv
92+
# Check again after checkout — the diff might be empty if the
93+
# branch already has identical data
94+
if git diff --staged --quiet; then
95+
echo "No changes to commit after checking out existing branch"
96+
exit 0
97+
fi
98+
else
99+
echo "Creating new branch $BRANCH_NAME"
100+
git checkout -b "$BRANCH_NAME"
101+
fi
82102
83103
git commit -m "chore(metrics): update usage metrics ($DATE)"
84104
git push -u origin "$BRANCH_NAME"
85105
86-
# Create PR using gh CLI
87-
gh pr create \
88-
--title "chore: update usage metrics ($DATE)" \
89-
--body "Automated update of usage metrics data. This PR updates the usage metrics CSV file with the latest GitHub usage data for testcontainers-go versions." \
90-
--base main
106+
# Create a PR only if one doesn't already exist for this branch
107+
EXISTING_PR=$(gh pr list --head "$BRANCH_NAME" --state open --json number --jq '.[0].number')
108+
if [ -n "$EXISTING_PR" ]; then
109+
echo "PR #$EXISTING_PR already exists for branch $BRANCH_NAME, updated with new commit"
110+
else
111+
gh pr create \
112+
--title "chore: update usage metrics ($MONTH)" \
113+
--body "Automated update of usage metrics data. This PR updates the usage metrics CSV file with the latest GitHub usage data for testcontainers-go versions." \
114+
--base main
115+
fi

usage-metrics/collect-metrics.go

Lines changed: 124 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -55,43 +55,80 @@ func main() {
5555

5656
func collectMetrics(versions []string, csvPath string) error {
5757
date := time.Now().Format("2006-01-02")
58-
metrics := make([]usageMetric, 0, len(versions))
58+
metrics := make(map[string]usageMetric)
5959

60-
// Query all versions sequentially
60+
// Build a unique, non-empty list of versions to query
61+
pending := make([]string, 0, len(versions))
62+
seen := make(map[string]struct{}, len(versions))
6163
for _, version := range versions {
6264
version = strings.TrimSpace(version)
6365
if version == "" {
6466
continue
6567
}
66-
67-
// Add delay BEFORE querying to avoid rate limiting
68-
if len(metrics) > 0 {
69-
log.Printf("Waiting 7 seconds before querying next version...")
70-
time.Sleep(7 * time.Second) // 10 requests per 60 seconds = 6 seconds minimum
68+
if _, ok := seen[version]; ok {
69+
continue
7170
}
71+
seen[version] = struct{}{}
72+
pending = append(pending, version)
73+
}
74+
if len(pending) == 0 {
75+
return errors.New("at least one non-empty version is required")
76+
}
7277

73-
count, err := queryGitHubUsageWithRetry(version)
74-
if err != nil {
75-
log.Printf("Warning: Failed to query version %s after retries: %v", version, err)
76-
continue
78+
const (
79+
maxPasses = 5
80+
interRequestWait = 7 * time.Second // 10 requests per 60 seconds = 6 seconds minimum
81+
passCooldown = 120 * time.Second // wait for rate limit window to fully reset between passes
82+
)
83+
84+
for pass := 0; pass < maxPasses && len(pending) > 0; pass++ {
85+
if pass > 0 {
86+
log.Printf("Pass %d: waiting %v for rate limit window to reset before retrying %d failed version(s)...",
87+
pass+1, passCooldown, len(pending))
88+
time.Sleep(passCooldown)
89+
} else {
90+
log.Printf("Pass 1: querying %d version(s)...", len(pending))
7791
}
7892

79-
metric := usageMetric{
80-
Date: date,
81-
Version: version,
82-
Count: count,
93+
var failed []string
94+
queriesMade := 0
95+
for _, version := range pending {
96+
// Add delay before querying to avoid rate limiting
97+
if queriesMade > 0 {
98+
log.Printf("Waiting %v before querying next version...", interRequestWait)
99+
time.Sleep(interRequestWait)
100+
}
101+
102+
count, err := queryGitHubUsage(version)
103+
queriesMade++
104+
if err != nil {
105+
log.Printf("Pass %d: failed to query version %s: %v", pass+1, version, err)
106+
if isRetryableError(err) {
107+
failed = append(failed, version)
108+
continue
109+
}
110+
return fmt.Errorf("query %s: %w", version, err)
111+
}
112+
113+
metrics[version] = usageMetric{
114+
Date: date,
115+
Version: version,
116+
Count: count,
117+
}
118+
fmt.Printf("Successfully queried: %s has %d usages on %s\n", version, count, date)
83119
}
84120

85-
metrics = append(metrics, metric)
86-
fmt.Printf("Successfully queried: %s has %d usages on %s\n", version, count, metric.Date)
121+
pending = failed
122+
if len(pending) == 0 {
123+
log.Printf("All versions queried successfully after %d pass(es).", pass+1)
124+
}
87125
}
88126

89-
// Sort metrics by version
90-
sort.Slice(metrics, func(i, j int) bool {
91-
return metrics[i].Version < metrics[j].Version
92-
})
127+
if len(pending) > 0 {
128+
log.Printf("Warning: %d version(s) still failed after %d passes: %s", len(pending), maxPasses, strings.Join(pending, ", "))
129+
}
93130

94-
// Write all metrics to CSV
131+
// Append new metrics to CSV
95132
for _, metric := range metrics {
96133
if err := appendToCSV(csvPath, metric); err != nil {
97134
log.Printf("Warning: Failed to write metric for %s: %v", metric.Version, err)
@@ -100,49 +137,25 @@ func collectMetrics(versions []string, csvPath string) error {
100137
fmt.Printf("Successfully recorded: %s has %d usages on %s\n", metric.Version, metric.Count, metric.Date)
101138
}
102139

103-
return nil
104-
}
105-
106-
func queryGitHubUsageWithRetry(version string) (int, error) {
107-
var lastErr error
108-
// Backoff intervals: wait longer for rate limit to reset (rolling window)
109-
backoffIntervals := []time.Duration{
110-
60 * time.Second, // Wait for rolling window
111-
60 * time.Second,
112-
60 * time.Second,
140+
// Sort the entire CSV so rows are ordered by (date, version) regardless
141+
// of the order they were appended across multiple runs.
142+
if err := sortCSV(csvPath); err != nil {
143+
return fmt.Errorf("sort csv: %w", err)
113144
}
114145

115-
// maxRetries includes the initial attempt plus one retry per backoff interval
116-
maxRetries := len(backoffIntervals) + 1
117-
118-
for attempt := 0; attempt < maxRetries; attempt++ {
119-
if attempt > 0 {
120-
// Use predefined backoff intervals
121-
waitTime := backoffIntervals[attempt-1]
122-
log.Printf("Retrying version %s in %v (attempt %d/%d)", version, waitTime, attempt+1, maxRetries)
123-
time.Sleep(waitTime)
124-
}
125-
126-
count, err := queryGitHubUsage(version)
127-
if err == nil {
128-
return count, nil
129-
}
130-
131-
lastErr = err
132-
133-
// Check if it's a rate limit error
134-
if strings.Contains(err.Error(), "rate limit") ||
135-
strings.Contains(err.Error(), "403") ||
136-
strings.Contains(err.Error(), "429") {
137-
log.Printf("Rate limit hit for version %s, will retry with backoff", version)
138-
continue
139-
}
140-
141-
// For non-rate-limit errors, retry but with shorter backoff
142-
log.Printf("Error querying version %s: %v", version, err)
143-
}
146+
return nil
147+
}
144148

145-
return 0, fmt.Errorf("max retries reached: %w", lastErr)
149+
// isRetryableError returns true for rate-limit and transient HTTP errors
150+
// that are worth retrying in a subsequent pass.
151+
func isRetryableError(err error) bool {
152+
msg := err.Error()
153+
return strings.Contains(msg, "rate limit") ||
154+
strings.Contains(msg, "403") ||
155+
strings.Contains(msg, "429") ||
156+
strings.Contains(msg, "500") ||
157+
strings.Contains(msg, "502") ||
158+
strings.Contains(msg, "503")
146159
}
147160

148161
func queryGitHubUsage(version string) (int, error) {
@@ -173,6 +186,55 @@ func queryGitHubUsage(version string) (int, error) {
173186
return resp.TotalCount, nil
174187
}
175188

189+
func sortCSV(csvPath string) error {
190+
absPath, err := filepath.Abs(csvPath)
191+
if err != nil {
192+
return fmt.Errorf("resolve path: %w", err)
193+
}
194+
195+
file, err := os.Open(absPath)
196+
if err != nil {
197+
return fmt.Errorf("open file: %w", err)
198+
}
199+
200+
reader := csv.NewReader(file)
201+
records, err := reader.ReadAll()
202+
file.Close()
203+
if err != nil {
204+
return fmt.Errorf("read csv: %w", err)
205+
}
206+
207+
if len(records) <= 1 {
208+
return nil // nothing to sort (header only or empty)
209+
}
210+
211+
header := records[0]
212+
data := records[1:]
213+
214+
sort.SliceStable(data, func(i, j int) bool {
215+
if data[i][0] != data[j][0] {
216+
return data[i][0] < data[j][0] // date ascending
217+
}
218+
return data[i][1] < data[j][1] // version ascending
219+
})
220+
221+
out, err := os.Create(absPath)
222+
if err != nil {
223+
return fmt.Errorf("create file: %w", err)
224+
}
225+
defer out.Close()
226+
227+
writer := csv.NewWriter(out)
228+
if err := writer.Write(header); err != nil {
229+
return fmt.Errorf("write header: %w", err)
230+
}
231+
if err := writer.WriteAll(data); err != nil {
232+
return fmt.Errorf("write records: %w", err)
233+
}
234+
235+
return nil
236+
}
237+
176238
func appendToCSV(csvPath string, metric usageMetric) error {
177239
absPath, err := filepath.Abs(csvPath)
178240
if err != nil {

0 commit comments

Comments
 (0)