Skip to content

Commit 2a8c9de

Browse files
committed
perf(github): cache check-run lookups with retry
Cache GitHub check-run API responses to avoid repeated paginated API calls during status updates. Use a mutex+channel pattern so concurrent goroutines share a single in-flight fetch. Failed fetches are not cached, allowing retries with exponential backoff on transient API errors. Signed-off-by: Akshay Pant <akpant@redhat.com> Assisted-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c51c085 commit 2a8c9de

3 files changed

Lines changed: 219 additions & 17 deletions

File tree

pkg/provider/github/github.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,31 @@ type Provider struct {
7272
pacUserLogin string // user/bot login used by PAC
7373
clock clockwork.Clock
7474
graphQLClient *graphQLClient
75+
checkRunsCache checkRunsCache
7576
}
7677

7778
type skippedRun struct {
7879
mutex *sync.Mutex
7980
checkRunID int64
8081
}
8182

83+
type checkRunsCache struct {
84+
mu sync.Mutex
85+
runs []*github.CheckRun
86+
loading bool
87+
done chan struct{}
88+
populated bool
89+
}
90+
8291
func New() *Provider {
8392
return &Provider{
8493
APIURL: github.Ptr(keys.PublicGithubAPIURL),
8594
PaginedNumber: defaultPaginedNumber,
8695
skippedRun: skippedRun{
8796
mutex: &sync.Mutex{},
8897
},
89-
clock: clockwork.NewRealClock(),
98+
clock: clockwork.NewRealClock(),
99+
checkRunsCache: checkRunsCache{},
90100
}
91101
}
92102

pkg/provider/github/status.go

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ import (
2323
)
2424

2525
const (
26-
botType = "Bot"
27-
pendingApproval = "Pending approval, waiting for an /ok-to-test"
26+
botType = "Bot"
27+
pendingApproval = "Pending approval, waiting for an /ok-to-test"
28+
checkRunsFetchMaxRetries = 2
29+
checkRunsFetchInitialBackoff = 200 * time.Millisecond
2830
)
2931

3032
const taskStatusTemplate = `
@@ -42,7 +44,9 @@ const taskStatusTemplate = `
4244
{{- end }}
4345
</table>`
4446

45-
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
47+
// fetchAllCheckRunPages retrieves every page of check runs for the event SHA.
48+
func (v *Provider) fetchAllCheckRunPages(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
49+
var all []*github.CheckRun
4650
opt := github.ListOptions{PerPage: v.PaginedNumber}
4751
for {
4852
res, resp, err := wrapAPI(v, "list_check_runs_for_ref", func() (*github.ListCheckRunsResults, *github.Response, error) {
@@ -55,25 +59,95 @@ func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Eve
5559
if err != nil {
5660
return nil, err
5761
}
58-
59-
for _, checkrun := range res.CheckRuns {
60-
// if it is a Pending approval CheckRun then overwrite it
61-
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
62-
if v.canIUseCheckrunID(checkrun.ID) {
63-
return checkrun.ID, nil
64-
}
65-
}
66-
if *checkrun.ExternalID == status.PipelineRunName {
67-
return checkrun.ID, nil
68-
}
69-
}
62+
all = append(all, res.CheckRuns...)
7063
if resp.NextPage == 0 {
7164
break
7265
}
7366
opt.Page = resp.NextPage
7467
}
68+
return all, nil
69+
}
70+
71+
func (v *Provider) searchCheckRuns(runs []*github.CheckRun, status providerstatus.StatusOpts) *int64 {
72+
for _, checkrun := range runs {
73+
if isPendingApprovalCheckrun(checkrun) || isFailedCheckrun(checkrun) {
74+
if v.canIUseCheckrunID(checkrun.ID) {
75+
return checkrun.ID
76+
}
77+
}
78+
if checkrun.ExternalID != nil && *checkrun.ExternalID == status.PipelineRunName {
79+
return checkrun.ID
80+
}
81+
}
82+
return nil
83+
}
84+
85+
func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status providerstatus.StatusOpts) (*int64, error) {
86+
for {
87+
v.checkRunsCache.mu.Lock()
88+
if v.checkRunsCache.loading {
89+
done := v.checkRunsCache.done
90+
v.checkRunsCache.mu.Unlock()
91+
<-done
92+
continue
93+
}
94+
if v.checkRunsCache.populated {
95+
runs := v.checkRunsCache.runs
96+
v.checkRunsCache.mu.Unlock()
97+
return v.searchCheckRuns(runs, status), nil
98+
}
99+
100+
v.checkRunsCache.loading = true
101+
v.checkRunsCache.done = make(chan struct{})
102+
v.checkRunsCache.mu.Unlock()
103+
104+
runs, err := v.fetchAllCheckRunPagesWithRetry(ctx, runevent)
105+
106+
v.checkRunsCache.mu.Lock()
107+
if err != nil {
108+
v.checkRunsCache.loading = false
109+
} else {
110+
v.checkRunsCache.runs = runs
111+
v.checkRunsCache.loading = false
112+
v.checkRunsCache.populated = true
113+
}
114+
close(v.checkRunsCache.done)
115+
v.checkRunsCache.mu.Unlock()
116+
117+
if err != nil {
118+
return nil, err
119+
}
120+
return v.searchCheckRuns(runs, status), nil
121+
}
122+
}
123+
124+
func (v *Provider) fetchAllCheckRunPagesWithRetry(ctx context.Context, runevent *info.Event) ([]*github.CheckRun, error) {
125+
var lastErr error
126+
for attempt := range checkRunsFetchMaxRetries + 1 {
127+
runs, err := v.fetchAllCheckRunPages(ctx, runevent)
128+
if err == nil {
129+
return runs, nil
130+
}
131+
lastErr = err
132+
133+
if attempt == checkRunsFetchMaxRetries {
134+
return nil, err
135+
}
136+
137+
backoff := time.Duration(1<<uint(attempt)) * checkRunsFetchInitialBackoff
138+
if v.Logger != nil {
139+
v.Logger.Debugf("check-runs lookup failed for %s/%s@%s (attempt %d/%d): %v; retrying in %v",
140+
runevent.Organization, runevent.Repository, runevent.SHA,
141+
attempt+1, checkRunsFetchMaxRetries+1, err, backoff)
142+
}
75143

76-
return nil, nil
144+
select {
145+
case <-ctx.Done():
146+
return nil, ctx.Err()
147+
case <-v.getClock().After(backoff):
148+
}
149+
}
150+
return nil, lastErr
77151
}
78152

79153
func isPendingApprovalCheckrun(run *github.CheckRun) bool {

pkg/provider/github/status_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@ import (
88
"reflect"
99
"strconv"
1010
"strings"
11+
"sync"
12+
"sync/atomic"
1113
"testing"
14+
"time"
1215

1316
"github.com/google/go-github/v84/github"
17+
"github.com/jonboulle/clockwork"
1418
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys"
1519
"github.com/openshift-pipelines/pipelines-as-code/pkg/params"
1620
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/clients"
@@ -686,3 +690,117 @@ func TestProviderGetExistingCheckRunID(t *testing.T) {
686690
})
687691
}
688692
}
693+
694+
func TestGetExistingCheckRunIDCacheHit(t *testing.T) {
695+
ctx, _ := rtesting.SetupFakeContext(t)
696+
client, mux, _, teardown := ghtesthelper.SetupGH()
697+
defer teardown()
698+
699+
event := &info.Event{
700+
Organization: "owner",
701+
Repository: "repository",
702+
SHA: "sha",
703+
}
704+
705+
apiHits := 0
706+
mux.HandleFunc(fmt.Sprintf("/repos/%v/%v/commits/%v/check-runs", event.Organization, event.Repository, event.SHA), func(w http.ResponseWriter, _ *http.Request) {
707+
apiHits++
708+
fmt.Fprint(w, `{"total_count": 2, "check_runs": [{"id": 55555, "external_id": "mypr"}, {"id": 55556, "external_id": "mypr2"}]}`)
709+
})
710+
711+
cnx := New()
712+
cnx.SetGithubClient(client)
713+
714+
// First call should hit the API and populate the cache.
715+
id, err := cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr"})
716+
assert.NilError(t, err)
717+
assert.Assert(t, id != nil)
718+
719+
// Second call should serve from cache.
720+
id2, err := cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr2"})
721+
assert.NilError(t, err)
722+
assert.Assert(t, id2 != nil)
723+
assert.Equal(t, apiHits, 1)
724+
}
725+
726+
func TestGetExistingCheckRunIDConcurrent(t *testing.T) {
727+
ctx, _ := rtesting.SetupFakeContext(t)
728+
client, mux, _, teardown := ghtesthelper.SetupGH()
729+
defer teardown()
730+
731+
event := &info.Event{
732+
Organization: "owner",
733+
Repository: "repository",
734+
SHA: "sha",
735+
}
736+
737+
var apiHits atomic.Int64
738+
mux.HandleFunc(fmt.Sprintf("/repos/%v/%v/commits/%v/check-runs", event.Organization, event.Repository, event.SHA), func(w http.ResponseWriter, _ *http.Request) {
739+
apiHits.Add(1)
740+
fmt.Fprint(w, `{"total_count": 2, "check_runs": [{"id": 55555, "external_id": "mypr"}, {"id": 55556, "external_id": "mypr2"}]}`)
741+
})
742+
743+
cnx := New()
744+
cnx.SetGithubClient(client)
745+
746+
const goroutines = 10
747+
var wg sync.WaitGroup
748+
wg.Add(goroutines)
749+
for range goroutines {
750+
go func() {
751+
defer wg.Done()
752+
_, _ = cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr"})
753+
}()
754+
}
755+
wg.Wait()
756+
757+
// Only one goroutine should have fetched from the API; the rest hit the cache.
758+
assert.Equal(t, apiHits.Load(), int64(1))
759+
}
760+
761+
func TestGetExistingCheckRunIDRetryOnError(t *testing.T) {
762+
ctx, _ := rtesting.SetupFakeContext(t)
763+
client, mux, _, teardown := ghtesthelper.SetupGH()
764+
defer teardown()
765+
766+
event := &info.Event{
767+
Organization: "owner",
768+
Repository: "repository",
769+
SHA: "sha",
770+
}
771+
772+
var apiHits atomic.Int64
773+
mux.HandleFunc(fmt.Sprintf("/repos/%v/%v/commits/%v/check-runs", event.Organization, event.Repository, event.SHA), func(w http.ResponseWriter, _ *http.Request) {
774+
hit := apiHits.Add(1)
775+
if hit == 1 {
776+
// First call: simulate a transient GitHub 502.
777+
w.WriteHeader(http.StatusBadGateway)
778+
return
779+
}
780+
// Second call: succeed.
781+
fmt.Fprint(w, `{"total_count": 1, "check_runs": [{"id": 77777, "external_id": "mypr"}]}`)
782+
})
783+
784+
cnx := New()
785+
cnx.SetGithubClient(client)
786+
clock := clockwork.NewFakeClock()
787+
cnx.clock = clock
788+
789+
errCh := make(chan error, 1)
790+
var id *int64
791+
go func() {
792+
var err error
793+
id, err = cnx.getExistingCheckRunID(ctx, event, providerstatus.StatusOpts{PipelineRunName: "mypr"})
794+
errCh <- err
795+
}()
796+
797+
// Wait for the first attempt to fail and backoff timer to start
798+
_ = clock.BlockUntilContext(ctx, 1)
799+
clock.Advance(200 * time.Millisecond)
800+
801+
err := <-errCh
802+
assert.NilError(t, err)
803+
assert.Assert(t, id != nil)
804+
assert.Equal(t, *id, int64(77777))
805+
assert.Equal(t, apiHits.Load(), int64(2))
806+
}

0 commit comments

Comments
 (0)