Skip to content

Commit c0ee91a

Browse files
committed
impr(CLDSRV-796): Handle redis unavailability during rate limit token refill
1 parent 8121e6d commit c0ee91a

File tree

7 files changed

+150
-28
lines changed

7 files changed

+150
-28
lines changed

.github/actions/setup-ci/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,5 @@ runs:
5050
- name: enable rate limiting in config and set the lisa account as the service user
5151
shell: bash
5252
run: |
53-
cat config.json | jq -r '.rateLimiting = {enabled: true, serviceUserArn: "arn:aws:iam::123456789013:root"}' > config.json.new
53+
cat config.json | jq -r '.rateLimiting = {enabled: true, nodes: 6, serviceUserArn: "arn:aws:iam::123456789013:root"}' > config.json.new
5454
mv config.json.new config.json

lib/api/apiUtils/rateLimit/client.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ class RateLimitClient {
6565
}
6666
);
6767
}
68+
69+
/**
70+
* Returns whether the client is ready for operations
71+
* @returns {boolean}
72+
*/
73+
isReady() {
74+
return this.redis.status === 'ready' || this.redis.status === 'wait';
75+
}
6876
}
6977

7078
let instance;

lib/api/apiUtils/rateLimit/tokenBucket.js

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,24 @@ class WorkerTokenBucket {
107107
config.rateLimiting.bucket?.defaultConfig?.requestsPerSecond?.burstCapacity || 1;
108108
const burstCapacity = burstCapacitySeconds * 1000;
109109

110-
// Request tokens from Redis (atomic GCRA enforcement)
111-
const granted = await util.promisify(redisClient.grantTokens.bind(redisClient))(
112-
this.bucketName,
113-
requested,
114-
interval,
115-
burstCapacity,
116-
);
110+
let granted = requested;
111+
if (redisClient.isReady()) {
112+
// Request tokens from Redis (atomic GCRA enforcement)
113+
granted = await util.promisify(redisClient.grantTokens.bind(redisClient))(
114+
this.bucketName,
115+
requested,
116+
interval,
117+
burstCapacity,
118+
);
119+
} else {
120+
// Connection to redis has failed in some way.
121+
// Client will be reconnecting in the background.
122+
// We grant the requested amount of tokens anyway to avoid degrading service availability.
123+
this.log.warn(
124+
'rate limit redis client not connected. granting tokens anyway to avoid service degradation',
125+
{ bucketName: this.bucketName },
126+
);
127+
}
117128

118129
// Add granted tokens to buffer
119130
this.tokens += granted;

lib/api/bucketPutRateLimit.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ function validateRateLimitConfig(requestConfig, callback) {
3232
return callback(errorInstances.InvalidArgument
3333
.customizeDescription(
3434
`RequestsPerSecond (${limit}) must be >= ` +
35-
`(nodes × workers = ${nodes} × ${workers} = ${minLimit}) or 0 (unlimited). ` +
35+
`(nodes x workers = ${nodes} x ${workers} = ${minLimit}) or 0 (unlimited). ` +
3636
'Each worker enforces limit/nodes/workers locally. ' +
37-
`With limit < ${minLimit}, per-worker rate would be < 1 req/s, ` +
37+
`With limit less than ${minLimit}, per-worker rate would be less than 1 req/s, ` +
3838
'effectively blocking traffic.'
3939
));
4040
}

lib/server.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,9 @@ class S3Server {
376376

377377
// Start rate limit background job
378378
if (this.config.rateLimiting?.enabled) {
379-
startCleanupJob(log);
379+
startCleanupJob(logger);
380380
// Start token refill job for token reservation system
381-
startRefillJob(log);
381+
startRefillJob(logger);
382382
}
383383

384384
// Start API server(s)

tests/functional/aws-node-sdk/test/bucket/putBucketRateLimit.js

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -113,26 +113,30 @@ skipIfRateLimitDisabled('Test put bucket rate limit', () => {
113113
});
114114

115115
describe('validation against node and worker count', () => {
116-
it.skip('should reject limits less than (nodes × workers)', async () => {
117-
const nodes = config.rateLimiting?.nodes || 1;
118-
const workers = config.clusters || 1;
119-
const minLimit = nodes * workers;
120116

117+
const nodes = config.rateLimiting?.nodes || 1;
118+
const workers = config.clusters || 1;
119+
const minLimit = nodes * workers;
120+
const skipIfSingleNode = nodes === 1 ? it.skip : it;
121+
122+
// Test requires multiple nodes to pass
123+
// With only 1 node and 1 worker the minLimit is 1.
124+
// This leaves no invalid values to test as 0 is also a valid setting (unlimited)
125+
skipIfSingleNode('should reject limits less than (nodes x workers)', async () => {
126+
let error;
121127
try {
122128
const invalidConfig = { RequestsPerSecond: minLimit - 1 };
123129
await sendRateLimitRequest('PUT', '127.0.0.1:8000',
124130
`/${bucket}/?rate-limit`, JSON.stringify(invalidConfig));
125-
assert.fail('Should have thrown an error');
126131
} catch (err) {
127-
assert.strictEqual(err.Error.Code[0], 'InvalidArgument');
132+
error = err;
133+
} finally {
134+
assert(error !== undefined, 'error expected');
135+
assert.strictEqual(error.Error.Code[0], 'InvalidArgument');
128136
}
129137
});
130138

131-
it.skip('should accept limits equal to (nodes × workers)', async () => {
132-
const nodes = config.rateLimiting?.nodes || 1;
133-
const workers = config.clusters || 1;
134-
const minLimit = nodes * workers;
135-
139+
it('should accept limits equal to (nodes x workers)', async () => {
136140
try {
137141
const validConfig = { RequestsPerSecond: minLimit };
138142
await sendRateLimitRequest('PUT', '127.0.0.1:8000',
@@ -146,11 +150,7 @@ skipIfRateLimitDisabled('Test put bucket rate limit', () => {
146150
}
147151
});
148152

149-
it('should accept limits greater than (nodes × workers)', async () => {
150-
const nodes = config.rateLimiting?.nodes || 1;
151-
const workers = config.clusters || 1;
152-
const minLimit = nodes * workers;
153-
153+
it('should accept limits greater than (nodes x workers)', async () => {
154154
try {
155155
const validConfig = { RequestsPerSecond: minLimit + 1000 };
156156
await sendRateLimitRequest('PUT', '127.0.0.1:8000',
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
const assert = require('assert');
2+
const { RateLimitClient } = require('../../../../../lib/api/apiUtils/rateLimit/client');
3+
const { config } = require('../../../../../lib/Config');
4+
const { skipIfRateLimitDisabled } = require('./tooling');
5+
6+
const testBucket = 'rate-limit-test-bucket';
7+
8+
skipIfRateLimitDisabled('RateLimitClient', () => {
9+
let client;
10+
11+
before(async () => {
12+
// Create a test client using the same config as the application
13+
client = new RateLimitClient(config.localCache);
14+
15+
// Connect the client (since lazyConnect is true)
16+
await client.redis.connect();
17+
});
18+
19+
after(async () => client.redis.quit().catch(() => {}));
20+
21+
beforeEach(async () => {
22+
const keys = await client.redis.keys('ratelimit:bucket:*');
23+
if (keys.length > 0) {
24+
await client.redis.del(...keys);
25+
}
26+
});
27+
28+
describe('isReady', () => {
29+
it('should return true when client is connected to redis', () => {
30+
assert.strictEqual(client.isReady(), true);
31+
});
32+
33+
it('should return true when the client is waiting to connect or the first time', () => {
34+
const client = new RateLimitClient(config.localCache);
35+
assert.strictEqual(client.isReady(), true);
36+
});
37+
});
38+
39+
describe('grantTokens', () => {
40+
it('should grant requested tokens when quota is available', done => {
41+
const requested = 5;
42+
const interval = 100; // 100ms per request = 10 req/s
43+
const burstCapacity = 1000; // 1000ms burst capacity
44+
45+
client.grantTokens(testBucket, requested, interval, burstCapacity, (err, granted) => {
46+
assert.ifError(err);
47+
assert.strictEqual(granted, requested);
48+
done();
49+
});
50+
});
51+
52+
it('should grant tokens multiple times within burst capacity', done => {
53+
const requested = 2;
54+
const interval = 100; // 100ms per request
55+
const burstCapacity = 1000; // 1000ms burst capacity
56+
57+
// First request
58+
client.grantTokens(testBucket, requested, interval, burstCapacity, (err, granted1) => {
59+
assert.ifError(err);
60+
assert.strictEqual(granted1, requested);
61+
62+
// Second request immediately after
63+
client.grantTokens(testBucket, requested, interval, burstCapacity, (err, granted2) => {
64+
assert.ifError(err);
65+
assert.strictEqual(granted2, requested);
66+
done();
67+
});
68+
});
69+
});
70+
71+
it('should grant partial tokens when request exceeds available capacity', done => {
72+
const interval = 100; // 100ms per request
73+
const burstCapacity = 500; // 500ms burst capacity = max 5 tokens
74+
75+
// Request more tokens than available in burst
76+
client.grantTokens(testBucket, 10, interval, burstCapacity, (err, granted) => {
77+
assert.ifError(err);
78+
// Should grant partial tokens (5 tokens max with 500ms burst)
79+
assert(granted > 0, 'Should grant at least some tokens');
80+
assert(granted <= 5, 'Should not grant more than burst capacity allows');
81+
done();
82+
});
83+
});
84+
85+
it('should deny tokens (return 0) when quota is exhausted', done => {
86+
const interval = 100; // 100ms per request
87+
const burstCapacity = 100; // 100ms burst capacity = max 1 token
88+
89+
// First request consumes the burst capacity
90+
client.grantTokens(testBucket, 1, interval, burstCapacity, (err, granted1) => {
91+
assert.ifError(err);
92+
assert.strictEqual(granted1, 1);
93+
94+
// Second request immediately after should be denied
95+
client.grantTokens(testBucket, 1, interval, burstCapacity, (err, granted2) => {
96+
assert.ifError(err);
97+
assert.strictEqual(granted2, 0, 'Should deny tokens when quota exhausted');
98+
done();
99+
});
100+
});
101+
});
102+
});
103+
});

0 commit comments

Comments
 (0)