Skip to content

Commit 7c46558

Browse files
committed
perf(github): cache check-run lookups with retry
Cache GitHub check-run API responses to avoid repeated paginated API calls during status updates. Use a mutex+channel pattern so concurrent goroutines share a single in-flight fetch. Failed fetches are not cached, allowing retries with exponential backoff on transient API errors. Signed-off-by: Akshay Pant <akpant@redhat.com> Assisted-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 68c5f1d commit 7c46558

3 files changed

Lines changed: 188 additions & 17 deletions

File tree

pkg/provider/github/github.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,31 @@ type Provider struct {
7272
pacUserLogin string // user/bot login used by PAC
7373
clock clockwork.Clock
7474
graphQLClient *graphQLClient
75+
checkRunsCache checkRunsCache
7576
}
7677

7778
type skippedRun struct {
7879
mutex *sync.Mutex
7980
checkRunID int64
8081
}
8182

83+
type checkRunsCache struct {
84+
mu sync.Mutex
85+
runs []*github.CheckRun
86+
loading bool
87+
done chan struct{}
88+
populated bool
89+
}
90+
8291
func New() *Provider {
8392
return &Provider{
8493
APIURL: github.Ptr(keys.PublicGithubAPIURL),
8594
PaginedNumber: defaultPaginedNumber,
8695
skippedRun: skippedRun{
8796
mutex: &sync.Mutex{},
8897
},
89-
clock: clockwork.NewRealClock(),
98+
clock: clockwork.NewRealClock(),
99+
checkRunsCache: checkRunsCache{},
90100
}
91101
}
92102

pkg/provider/github/status.go

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ import (
2323
)
2424

2525
const (
26-
botType = "Bot"
27-
pendingApproval = "Pending approval, waiting for an /ok-to-test"
26+
botType = "Bot"
27+
pendingApproval = "Pending approval, waiting for an /ok-to-test"
28+
checkRunsFetchMaxRetries = 2
29+
checkRunsFetchInitialBackoff = 200 * time.Millisecond
2830
)
2931

3032
const taskStatusTemplate = `
@@ -42,7 +44,9 @@ const taskStatusTemplate = `
4244
{{- end }}
4345
</table>`
4446

45-
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
47+
// fetchAllCheckRunPages retrieves every page of check runs for the event SHA.
48+
func (v *Provider) fetchAllCheckRunPages(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
49+
var all []*github.CheckRun
4650
opt := github.ListOptions{PerPage: v.PaginedNumber}
4751
for {
4852
res, resp, err := wrapAPI(v, "list_check_runs_for_ref", func() (*github.ListCheckRunsResults, *github.Response, error) {
@@ -55,25 +59,95 @@ func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Eve
5559
if err != nil {
5660
return nil, err
5761
}
58-
59-
for _, checkrun := range res.CheckRuns {
60-
// if it is a Pending approval CheckRun then overwrite it
61-
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
62-
if v.canIUseCheckrunID(checkrun.ID) {
63-
return checkrun.ID, nil
64-
}
65-
}
66-
if *checkrun.ExternalID == status.PipelineRunName {
67-
return checkrun.ID, nil
68-
}
69-
}
62+
all = append(all, res.CheckRuns...)
7063
if resp.NextPage == 0 {
7164
break
7265
}
7366
opt.Page = resp.NextPage
7467
}
68+
return all, nil
69+
}
70+
71+
func (v *Provider) searchCheckRuns(runs []*github.CheckRun, status providerstatus.StatusOpts) *int64 {
72+
for _, checkrun := range runs {
73+
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
74+
if v.canIUseCheckrunID(checkrun.ID) {
75+
return checkrun.ID
76+
}
77+
}
78+
if checkrun.ExternalID != nil && *checkrun.ExternalID == status.PipelineRunName {
79+
return checkrun.ID
80+
}
81+
}
82+
return nil
83+
}
84+
85+
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
86+
for {
87+
v.checkRunsCache.mu.Lock()
88+
if v.checkRunsCache.loading {
89+
done := v.checkRunsCache.done
90+
v.checkRunsCache.mu.Unlock()
91+
<-done
92+
continue
93+
}
94+
if v.checkRunsCache.populated {
95+
runs := v.checkRunsCache.runs
96+
v.checkRunsCache.mu.Unlock()
97+
return v.searchCheckRuns(runs, status), nil
98+
}
99+
100+
v.checkRunsCache.loading = true
101+
v.checkRunsCache.done = make(chan struct{})
102+
v.checkRunsCache.mu.Unlock()
103+
104+
runs, err := v.fetchAllCheckRunPagesWithRetry(ctx, runevent)
105+
106+
v.checkRunsCache.mu.Lock()
107+
if err != nil {
108+
v.checkRunsCache.loading = false
109+
} else {
110+
v.checkRunsCache.runs = runs
111+
v.checkRunsCache.loading = false
112+
v.checkRunsCache.populated = true
113+
}
114+
close(v.checkRunsCache.done)
115+
v.checkRunsCache.mu.Unlock()
116+
117+
if err != nil {
118+
return nil, err
119+
}
120+
return v.searchCheckRuns(runs, status), nil
121+
}
122+
}
123+
124+
func (v *Provider) fetchAllCheckRunPagesWithRetry(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
125+
var lastErr error
126+
for attempt := range checkRunsFetchMaxRetries + 1 {
127+
runs, err := v.fetchAllCheckRunPages(ctx, runevent)
128+
if err == nil {
129+
return runs, nil
130+
}
131+
lastErr = err
132+
133+
if attempt == checkRunsFetchMaxRetries {
134+
return nil, err
135+
}
136+
137+
backoff := time.Duration(1<<uint(attempt)) * checkRunsFetchInitialBackoff
138+
if v.Logger != nil {
139+
v.Logger.Debugf("check-runs lookup failed for %s/%s@%s (attempt %d/%d): %v; retrying in %v",
140+
runevent.Organization, runevent.Repository, runevent.SHA,
141+
attempt+1, checkRunsFetchMaxRetries+1, err, backoff)
142+
}
75143

76-
return nil, nil
144+
select {
145+
case <-ctx.Done():
146+
return nil, ctx.Err()
147+
case <-v.getClock().After(backoff):
148+
}
149+
}
150+
return nil, lastErr
77151
}
78152

79153
func isPendingApprovalCheckrun(run *github.CheckRun) bool {

pkg/provider/github/status_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"reflect"
99
"strconv"
1010
"strings"
11+
"sync"
12+
"sync/atomic"
1113
"testing"
1214

1315
"github.com/google/go-github/v84/github"
@@ -686,3 +688,88 @@ func TestProviderGetExistingCheckRunID(t *testing.T) {
686688
})
687689
}
688690
}
691+
692+
func TestGetExistingCheckRunIDCache(t *testing.T) {
693+
tests := []struct {
694+
name string
695+
jsonret string
696+
failFirstN int
697+
goroutines int
698+
secondLookup string
699+
expectedID int64
700+
expectedAPIHits int64
701+
}{
702+
{
703+
name: "second call serves from cache",
704+
jsonret: `{"total_count": 2, "check_runs": [{"id": 55555, "external_id": "mypr"}, {"id": 55556, "external_id": "mypr2"}]}`,
705+
secondLookup: "mypr2",
706+
expectedID: 55555,
707+
expectedAPIHits: 1,
708+
},
709+
{
710+
name: "concurrent calls share single fetch",
711+
jsonret: `{"total_count": 2, "check_runs": [{"id": 55555, "external_id": "mypr"}, {"id": 55556, "external_id": "mypr2"}]}`,
712+
goroutines: 10,
713+
expectedAPIHits: 1,
714+
},
715+
{
716+
name: "retries on transient error",
717+
jsonret: `{"total_count": 1, "check_runs": [{"id": 77777, "external_id": "mypr"}]}`,
718+
failFirstN: 1,
719+
expectedID: 77777,
720+
expectedAPIHits: 2,
721+
},
722+
}
723+
for _, tt := range tests {
724+
t.Run(tt.name, func(t *testing.T) {
725+
ctx, _ := rtesting.SetupFakeContext(t)
726+
client, mux, _, teardown := ghtesthelper.SetupGH()
727+
defer teardown()
728+
729+
event := &info.Event{
730+
Organization: "owner",
731+
Repository: "repository",
732+
SHA: "sha",
733+
}
734+
735+
var apiHits atomic.Int64
736+
mux.HandleFunc(fmt.Sprintf("/repos/%v/%v/commits/%v/check-runs", event.Organization, event.Repository, event.SHA), func(w http.ResponseWriter, _ *http.Request) {
737+
hit := apiHits.Add(1)
738+
if int(hit) <= tt.failFirstN {
739+
w.WriteHeader(http.StatusBadGateway)
740+
return
741+
}
742+
fmt.Fprint(w, tt.jsonret)
743+
})
744+
745+
cnx := New()
746+
cnx.SetGithubClient(client)
747+
748+
if tt.goroutines > 1 {
749+
var wg sync.WaitGroup
750+
wg.Add(tt.goroutines)
751+
for range tt.goroutines {
752+
go func() {
753+
defer wg.Done()
754+
_, _ = cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr"})
755+
}()
756+
}
757+
wg.Wait()
758+
} else {
759+
id, err := cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr"})
760+
assert.NilError(t, err)
761+
if tt.expectedID != 0 {
762+
assert.Assert(t, id != nil)
763+
assert.Equal(t, *id, tt.expectedID)
764+
}
765+
if tt.secondLookup != "" {
766+
id2, err := cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: tt.secondLookup})
767+
assert.NilError(t, err)
768+
assert.Assert(t, id2 != nil)
769+
}
770+
}
771+
772+
assert.Equal(t, apiHits.Load(), tt.expectedAPIHits)
773+
})
774+
}
775+
}

0 commit comments

Comments
 (0)