Skip to content

Commit 58b0ea9

Browse files
authored
fix: use adaptive retry for AWS API calls to handle rate limiting (#757)
* fix: use adaptive retry for AWS API calls to handle rate limiting Switch from the default AWS SDK retry strategy (3 attempts, basic exponential backoff) to adaptive retry mode with up to 10 attempts. Adaptive mode maintains a client-side token bucket that tracks throttling responses over time and proactively slows the request rate. Standard mode just does backoff-and-retry; adaptive mode avoids hammering a rate-limited API by draining tokens on 429s. * docs: add comment explaining adaptive retry config * revert: restore test parallelism to -p=8 Reverts #747 which reduced parallelism from 8 to 6 as a workaround for AWS rate limiting. The adaptive retry config now handles this properly, so we can restore the original parallelism.
1 parent 1625f55 commit 58b0ea9

2 files changed

Lines changed: 25 additions & 8 deletions

File tree

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ setup_test_to_use_staging_server_image:
121121

122122
test_integration: deps vet ensure_network test_setup ## Run tests except the too slow ones
123123
@[ -e ~/.kosli.yml ] && mv ~/.kosli.yml ~/.kosli-renamed.yml || true
124-
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) -- --short -p=6 -coverprofile=cover.out ./...
124+
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) -- --short -p=8 -coverprofile=cover.out ./...
125125
@go tool cover -func=cover.out | grep total:
126126
@go tool cover -html=cover.out
127127
@[ -e ~/.kosli-renamed.yml ] && mv ~/.kosli-renamed.yml ~/.kosli.yml || true
@@ -130,14 +130,14 @@ test_integration: deps vet ensure_network test_setup ## Run tests except the too
130130
test_integration_full: deps vet ensure_network test_setup ## Run all tests
131131
@[ -e ~/.kosli.yml ] && mv ~/.kosli.yml ~/.kosli-renamed.yml || true
132132
@mkdir -p junit-test-results
133-
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) --junitfile junit-test-results/junit.xml -- -p=6 -coverprofile=cover.out ./...
133+
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) --junitfile junit-test-results/junit.xml -- -p=8 -coverprofile=cover.out ./...
134134
@go tool cover -func=cover.out
135135
@[ -e ~/.kosli-renamed.yml ] && mv ~/.kosli-renamed.yml ~/.kosli.yml || true
136136

137137

138138
test_integration_restart_server: test_setup_restart_server
139139
@[ -e ~/.kosli.yml ] && mv ~/.kosli.yml ~/.kosli-renamed.yml || true
140-
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) -- --short -p=6 -coverprofile=cover.out ./...
140+
@export KOSLI_TESTS=true $(FAKE_CI_ENV) && $(GOTESTSUM) -- --short -p=8 -coverprofile=cover.out ./...
141141
@go tool cover -html=cover.out
142142
@[ -e ~/.kosli-renamed.yml ] && mv ~/.kosli-renamed.yml ~/.kosli.yml || true
143143

internal/aws/aws.go

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/aws/aws-sdk-go-v2/aws"
15+
"github.com/aws/aws-sdk-go-v2/aws/retry"
1516
"github.com/aws/aws-sdk-go-v2/config"
1617
"github.com/aws/aws-sdk-go-v2/credentials"
1718
s3manager "github.com/aws/aws-sdk-go-v2/feature/s3/manager"
@@ -95,13 +96,29 @@ func (s *AWSStaticCreds) GetConfigOptFns() []func(*config.LoadOptions) error {
9596

9697
// NewAWSConfigFromEnvOrFlags returns an AWS config that can be used to construct
9798
// AWS service clients.
98-
// Credentials for config can be sourced from multiple sources, in this order:
99+
//
100+
// Credentials are sourced in this order:
99101
// 1) static credentials (from CLI flags or KOSLI env vars), if provided
100-
// 2) AWS Environment variables
101-
// 3) Shared AWS Configuration/Credentials files (see https://docs.aws.amazon.com/sdkref/latest/guide/file-format.html)
102-
// more details can be found here: https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/#specifying-credentials
102+
// 2) AWS environment variables
103+
// 3) shared AWS configuration/credentials files (see https://docs.aws.amazon.com/sdkref/latest/guide/file-format.html)
104+
//
105+
// Retry: uses adaptive mode (up to MaxAttempts attempts) with a shared in-memory token
106+
// bucket. Commands like "kosli snapshot lambda" fetch functions concurrently
107+
// (see GetLambdaPackageData), which can trigger AWS rate limits (HTTP 429).
108+
// The shared token bucket slows down the entire batch of goroutines when
109+
// throttling is detected, rather than each goroutine retrying independently.
110+
//
111+
// More details: https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/aws/retry
103112
func (staticCreds *AWSStaticCreds) NewAWSConfigFromEnvOrFlags() (aws.Config, error) {
104-
return config.LoadDefaultConfig(context.TODO(), staticCreds.GetConfigOptFns()...)
113+
optFns := staticCreds.GetConfigOptFns()
114+
optFns = append(optFns, config.WithRetryer(func() aws.Retryer {
115+
return retry.NewAdaptiveMode(func(o *retry.AdaptiveModeOptions) {
116+
o.StandardOptions = append(o.StandardOptions, func(so *retry.StandardOptions) {
117+
so.MaxAttempts = 10
118+
})
119+
})
120+
}))
121+
return config.LoadDefaultConfig(context.TODO(), optFns...)
105122
}
106123

107124
// NewS3Client returns a new S3 API client

0 commit comments

Comments
 (0)