Skip to content

Commit 059c0ff

Browse files
committed
perf(github): cache check-run lookups with retry
Cache GitHub check-run API responses to avoid repeated paginated API calls during status updates. Use a mutex+channel pattern so concurrent goroutines share a single in-flight fetch. Failed fetches are not cached, allowing retries with exponential backoff on transient API errors. Signed-off-by: Akshay Pant <akpant@redhat.com> Assisted-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 532790b commit 059c0ff

3 files changed

Lines changed: 262 additions & 41 deletions

File tree

pkg/provider/github/github.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,25 @@ type Provider struct {
7272
pacUserLogin string // user/bot login used by PAC
7373
clock clockwork.Clock
7474
graphQLClient *graphQLClient
75+
checkRunsCache checkRunsCache
7576
}
7677

7778
type skippedRun struct {
7879
mutex *sync.Mutex
7980
checkRunID int64
8081
}
8182

83+
type checkRunsCache struct {
84+
mu sync.Mutex
85+
entries map[string]*checkRunsCacheEntry
86+
}
87+
88+
type checkRunsCacheEntry struct {
89+
runs []*github.CheckRun
90+
loading bool
91+
done chan struct{}
92+
}
93+
8294
func New() *Provider {
8395
return &Provider{
8496
APIURL: github.Ptr(keys.PublicGithubAPIURL),
@@ -87,6 +99,9 @@ func New() *Provider {
8799
mutex: &sync.Mutex{},
88100
},
89101
clock: clockwork.NewRealClock(),
102+
checkRunsCache: checkRunsCache{
103+
entries: map[string]*checkRunsCacheEntry{},
104+
},
90105
}
91106
}
92107

pkg/provider/github/status.go

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ import (
2323
)
2424

2525
const (
26-
botType = "Bot"
27-
pendingApproval = "Pending approval, waiting for an /ok-to-test"
26+
botType = "Bot"
27+
pendingApproval = "Pending approval, waiting for an /ok-to-test"
28+
checkRunsFetchMaxRetries = 2
29+
checkRunsFetchInitialBackoff = 200 * time.Millisecond
2830
)
2931

3032
const taskStatusTemplate = `
@@ -42,7 +44,9 @@ const taskStatusTemplate = `
4244
{{- end }}
4345
</table>`
4446

45-
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
47+
// fetchAllCheckRunPages retrieves every page of check runs for the event SHA.
48+
func (v *Provider) fetchAllCheckRunPages(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
49+
var all []*github.CheckRun
4650
opt := github.ListOptions{PerPage: v.PaginedNumber}
4751
for {
4852
res, resp, err := wrapAPI(v, "list_check_runs_for_ref", func() (*github.ListCheckRunsResults, *github.Response, error) {
@@ -55,25 +59,114 @@ func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Eve
5559
if err != nil {
5660
return nil, err
5761
}
62+
all = append(all, res.CheckRuns...)
63+
if resp.NextPage == 0 {
64+
break
65+
}
66+
opt.Page = resp.NextPage
67+
}
68+
return all, nil
69+
}
5870

59-
for _, checkrun := range res.CheckRuns {
60-
// if it is a Pending approval CheckRun then overwrite it
61-
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
62-
if v.canIUseCheckrunID(checkrun.ID) {
63-
return checkrun.ID, nil
64-
}
71+
func (v *Provider) searchCheckRuns(runs []*github.CheckRun, status providerstatus.StatusOpts) *int64 {
72+
for _, checkrun := range runs {
73+
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
74+
if v.canIUseCheckrunID(checkrun.ID) {
75+
return checkrun.ID
6576
}
66-
if *checkrun.ExternalID == status.PipelineRunName {
67-
return checkrun.ID, nil
77+
}
78+
if checkrun.ExternalID != nil && *checkrun.ExternalID == status.PipelineRunName {
79+
return checkrun.ID
80+
}
81+
}
82+
return nil
83+
}
84+
85+
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
86+
appID := "none"
87+
if v.ApplicationID != nil {
88+
appID = strconv.FormatInt(*v.ApplicationID, 10)
89+
}
90+
cacheKey := fmt.Sprintf("%s/%s/%s/%s", runevent.Organization, runevent.Repository, appID, runevent.SHA)
91+
92+
// Loop handles the wait from channel and fetch of the check runs from the API
93+
for {
94+
// Check if the check runs are already cached
95+
v.checkRunsCache.mu.Lock()
96+
entry, ok := v.checkRunsCache.entries[cacheKey]
97+
if ok {
98+
// Wait for fetch to complete if the check runs are still loading.
99+
if entry.loading {
100+
done := entry.done
101+
v.checkRunsCache.mu.Unlock()
102+
<-done
103+
continue
68104
}
105+
// Return the check run ID if the check runs are loaded.
106+
runs := entry.runs
107+
v.checkRunsCache.mu.Unlock()
108+
return v.searchCheckRuns(runs, status), nil
69109
}
70-
if resp.NextPage == 0 {
71-
break
110+
111+
// Create a new "loading" entry if the check runs are not cached.
112+
entry = &checkRunsCacheEntry{
113+
loading: true,
114+
done: make(chan struct{}),
72115
}
73-
opt.Page = resp.NextPage
116+
v.checkRunsCache.entries[cacheKey] = entry
117+
v.checkRunsCache.mu.Unlock()
118+
119+
// Fetch check runs from the API.
120+
runs, err := v.fetchAllCheckRunPagesWithRetry(ctx, runevent)
121+
122+
v.checkRunsCache.mu.Lock()
123+
// Delete the entry if fetch failed.
124+
if err != nil {
125+
delete(v.checkRunsCache.entries, cacheKey)
126+
} else {
127+
// Update the entry if fetch succeeded.
128+
entry.runs = runs
129+
entry.loading = false
130+
}
131+
close(entry.done)
132+
v.checkRunsCache.mu.Unlock()
133+
134+
// Return the error if fetch failed.
135+
if err != nil {
136+
return nil, err
137+
}
138+
// Return the check run ID if fetch succeeded.
139+
return v.searchCheckRuns(runs, status), nil
74140
}
141+
}
142+
143+
func (v *Provider) fetchAllCheckRunPagesWithRetry(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
144+
var lastErr error
145+
for attempt := range checkRunsFetchMaxRetries + 1 {
146+
runs, err := v.fetchAllCheckRunPages(ctx, runevent)
147+
if err == nil {
148+
return runs, nil
149+
}
150+
lastErr = err
151+
152+
if attempt == checkRunsFetchMaxRetries {
153+
return nil, err
154+
}
155+
156+
backoff := time.Duration(1<<uint(attempt)) * checkRunsFetchInitialBackoff
157+
if v.Logger != nil {
158+
v.Logger.Debugf("check-runs lookup failed for %s/%s@%s (attempt %d/%d): %v; retrying in %v",
159+
runevent.Organization, runevent.Repository, runevent.SHA,
160+
attempt+1, checkRunsFetchMaxRetries+1, err, backoff)
161+
}
75162

76-
return nil, nil
163+
select {
164+
case <-ctx.Done():
165+
return nil, ctx.Err()
166+
case <-v.getClock().After(backoff):
167+
}
168+
}
169+
return nil, lastErr
77170
}
78171

79172
func isPendingApprovalCheckrun(run *github.CheckRun) bool {

0 commit comments

Comments
 (0)