Skip to content

Commit a1ef0e5

Browse files
authored
fix(retry): split read/write defaults, require idempotency key for write DEADLINE_EXCEEDED retries
Writes retried on DEADLINE_EXCEEDED can double-apply because the server may have already committed the change before the client's timeout fired. This change makes the safe behavior the default and puts the opt-in cost on the caller. - Read operations retry on UNAVAILABLE, DEADLINE_EXCEEDED, and RESOURCE_EXHAUSTED — safe since reads have no side effects. - Write operations retry on UNAVAILABLE only by default. - Write retries on DEADLINE_EXCEEDED are unlocked by passing an `idempotencyKey` option, signalling the operation is safe to repeat. - `retryableCodes` in ClientOptions still overrides both defaults when set explicitly. Closes #46
1 parent bfc9cf8 commit a1ef0e5

5 files changed

Lines changed: 129 additions & 16 deletions

File tree

docs/configuration.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,22 @@ certificate store.
9393

9494
The SDK retries transient gRPC errors with exponential backoff and jitter.
9595

96+
### Read vs write retry defaults
97+
98+
Read operations (`get`, `getAll`) retry on `UNAVAILABLE`, `DEADLINE_EXCEEDED`, and `RESOURCE_EXHAUSTED`. These are safe to retry because reads have no side effects.
99+
100+
Write operations (`set`, `setMany`, `setNull`) retry only on `UNAVAILABLE` by default. `DEADLINE_EXCEEDED` is excluded because the server may have already applied the write — retrying without a guarantee of idempotency can cause duplicate mutations.
101+
102+
To enable `DEADLINE_EXCEEDED` retries for a write, pass an `idempotencyKey`. This signals that the caller has ensured the write is safe to repeat (e.g. the value is the same as what the server would have written):
103+
104+
```typescript
105+
await client.set('tenant-id', 'payments.fee', '0.05', { idempotencyKey: 'set-fee-2026-05-20' });
106+
await client.setMany('tenant-id', { 'payments.fee': '0.05' }, { idempotencyKey: 'batch-1' });
107+
await client.setNull('tenant-id', 'payments.fee', { idempotencyKey: 'clear-fee' });
108+
```
109+
110+
If you set `retryableCodes` in `ClientOptions.retry`, that list overrides both read and write defaults for all operations.
111+
96112
### RetryConfig
97113

98114
| Option | Type | Default | Description |
@@ -101,7 +117,7 @@ The SDK retries transient gRPC errors with exponential backoff and jitter.
101117
| `initialBackoff` | `number` | `100` | Initial backoff in milliseconds |
102118
| `maxBackoff` | `number` | `5000` | Maximum backoff in milliseconds |
103119
| `multiplier` | `number` | `2` | Backoff multiplier between attempts |
104-
| `retryableCodes` | `GrpcStatus[]` | `[UNAVAILABLE, DEADLINE_EXCEEDED]` | gRPC codes that trigger a retry |
120+
| `retryableCodes` | `GrpcStatus[]` | see above | gRPC codes that trigger a retry (overrides read/write split) |
105121

106122
### Examples
107123

src/client.ts

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ import {
2626
type GetServerInfoResponse,
2727
ServerServiceClient as GrpcServerServiceClient,
2828
} from "./generated/centralconfig/v1/server_service.js";
29-
import { withRetry } from "./retry.js";
29+
import {
30+
READ_RETRYABLE_CODES,
31+
WRITE_IDEMPOTENT_RETRYABLE_CODES,
32+
WRITE_RETRYABLE_CODES,
33+
withRetry,
34+
} from "./retry.js";
3035
import type { ClientOptions, RetryConfig, ServerInfo } from "./types.js";
3136
import { ConfigWatcher } from "./watcher.js";
3237

@@ -232,7 +237,7 @@ export class ConfigClient {
232237
tenantId: string,
233238
fieldPath: string,
234239
value: string,
235-
options?: { timeout?: number },
240+
options?: { timeout?: number; idempotencyKey?: string },
236241
): Promise<void> {
237242
const fn = async () => {
238243
await this.callSetField(
@@ -241,19 +246,22 @@ export class ConfigClient {
241246
);
242247
};
243248

244-
return this.withRetryAndMap(fn);
249+
const codes = options?.idempotencyKey
250+
? WRITE_IDEMPOTENT_RETRYABLE_CODES
251+
: WRITE_RETRYABLE_CODES;
252+
return this.withRetryAndMap(fn, codes);
245253
}
246254

247255
/**
248256
* Atomically set multiple config values.
249257
*
250258
* @param values - Record mapping field paths to string values.
251-
* @param options - Optional description for the audit log.
259+
* @param options - Optional description for the audit log and idempotency key for safe DEADLINE_EXCEEDED retries.
252260
*/
253261
async setMany(
254262
tenantId: string,
255263
values: Record<string, string>,
256-
options?: { description?: string; timeout?: number },
264+
options?: { description?: string; timeout?: number; idempotencyKey?: string },
257265
): Promise<void> {
258266
const fn = async () => {
259267
const updates = Object.entries(values).map(([fieldPath, v]) => ({
@@ -266,7 +274,10 @@ export class ConfigClient {
266274
);
267275
};
268276

269-
return this.withRetryAndMap(fn);
277+
const codes = options?.idempotencyKey
278+
? WRITE_IDEMPOTENT_RETRYABLE_CODES
279+
: WRITE_RETRYABLE_CODES;
280+
return this.withRetryAndMap(fn, codes);
270281
}
271282

272283
/**
@@ -275,13 +286,16 @@ export class ConfigClient {
275286
async setNull(
276287
tenantId: string,
277288
fieldPath: string,
278-
options?: { timeout?: number },
289+
options?: { timeout?: number; idempotencyKey?: string },
279290
): Promise<void> {
280291
const fn = async () => {
281292
await this.callSetField({ tenantId, fieldPath, value: undefined }, options?.timeout);
282293
};
283294

284-
return this.withRetryAndMap(fn);
295+
const codes = options?.idempotencyKey
296+
? WRITE_IDEMPOTENT_RETRYABLE_CODES
297+
: WRITE_RETRYABLE_CODES;
298+
return this.withRetryAndMap(fn, codes);
285299
}
286300

287301
/**
@@ -329,9 +343,16 @@ export class ConfigClient {
329343
return { version: resp.version, commit: resp.commit, features: resp.features };
330344
}
331345

332-
private async withRetryAndMap<T>(fn: () => Promise<T>): Promise<T> {
346+
private async withRetryAndMap<T>(
347+
fn: () => Promise<T>,
348+
defaultCodes: readonly number[] = READ_RETRYABLE_CODES,
349+
): Promise<T> {
350+
let config: RetryConfig | false = this.retryConfig;
351+
if (config !== false && !config.retryableCodes) {
352+
config = { ...config, retryableCodes: [...defaultCodes] };
353+
}
333354
try {
334-
return await withRetry(this.retryConfig, fn);
355+
return await withRetry(config, fn);
335356
} catch (err) {
336357
if (isServiceError(err)) {
337358
throw mapGrpcError(err);

src/retry.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
/**
22
* Retry logic with exponential backoff and jitter.
33
*
4-
* Retries on UNAVAILABLE and DEADLINE_EXCEEDED by default.
4+
* Read operations retry on UNAVAILABLE, DEADLINE_EXCEEDED, and RESOURCE_EXHAUSTED.
5+
* Write operations retry only on UNAVAILABLE by default to avoid double-apply.
6+
* Write retries on DEADLINE_EXCEEDED require an idempotency key.
57
*/
68

79
import { type ServiceError, status } from "@grpc/grpc-js";
@@ -11,7 +13,19 @@ const DEFAULT_MAX_ATTEMPTS = 3;
1113
const DEFAULT_INITIAL_BACKOFF = 100; // ms
1214
const DEFAULT_MAX_BACKOFF = 5000; // ms
1315
const DEFAULT_MULTIPLIER = 2;
14-
const DEFAULT_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE, status.DEADLINE_EXCEEDED];
16+
17+
export const READ_RETRYABLE_CODES: readonly number[] = [
18+
status.UNAVAILABLE,
19+
status.DEADLINE_EXCEEDED,
20+
status.RESOURCE_EXHAUSTED,
21+
];
22+
23+
export const WRITE_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE];
24+
25+
export const WRITE_IDEMPOTENT_RETRYABLE_CODES: readonly number[] = [
26+
status.UNAVAILABLE,
27+
status.DEADLINE_EXCEEDED,
28+
];
1529

1630
function isServiceError(err: unknown): err is ServiceError {
1731
return err instanceof Error && typeof (err as ServiceError).code === "number";
@@ -39,7 +53,7 @@ export async function withRetry<T>(
3953
const initialBackoff = config.initialBackoff ?? DEFAULT_INITIAL_BACKOFF;
4054
const maxBackoff = config.maxBackoff ?? DEFAULT_MAX_BACKOFF;
4155
const multiplier = config.multiplier ?? DEFAULT_MULTIPLIER;
42-
const retryableCodes = config.retryableCodes ?? DEFAULT_RETRYABLE_CODES;
56+
const retryableCodes = config.retryableCodes ?? READ_RETRYABLE_CODES;
4357

4458
let lastErr: Error | undefined;
4559
let backoff = initialBackoff;

src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ export interface RetryConfig {
4545
readonly maxBackoff?: number;
4646
/** Backoff multiplier between attempts. Default: 2. */
4747
readonly multiplier?: number;
48-
/** gRPC status codes that trigger a retry. Default: [UNAVAILABLE, DEADLINE_EXCEEDED]. */
48+
/** gRPC status codes that trigger a retry. Overrides the read/write split defaults. */
4949
readonly retryableCodes?: (typeof GrpcStatus)[keyof typeof GrpcStatus][];
5050
}
5151

test/retry.test.ts

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { Metadata, type ServiceError, status } from "@grpc/grpc-js";
22
import { describe, expect, it, vi } from "vitest";
3-
import { withRetry } from "../src/retry.js";
3+
import {
4+
READ_RETRYABLE_CODES,
5+
WRITE_IDEMPOTENT_RETRYABLE_CODES,
6+
WRITE_RETRYABLE_CODES,
7+
withRetry,
8+
} from "../src/retry.js";
49

510
function makeServiceError(code: number, details: string): ServiceError {
611
const err = new Error(details) as ServiceError;
@@ -93,4 +98,61 @@ describe("withRetry", () => {
9398
await expect(withRetry({ maxAttempts: 1, initialBackoff: 1 }, fn)).rejects.toThrow("down");
9499
expect(fn).toHaveBeenCalledTimes(1);
95100
});
101+
102+
it("default config retries on RESOURCE_EXHAUSTED (read default)", async () => {
103+
const fn = vi
104+
.fn()
105+
.mockRejectedValueOnce(makeServiceError(status.RESOURCE_EXHAUSTED, "quota"))
106+
.mockResolvedValue("ok");
107+
108+
const result = await withRetry({ maxAttempts: 3, initialBackoff: 1 }, fn);
109+
expect(result).toBe("ok");
110+
expect(fn).toHaveBeenCalledTimes(2);
111+
});
112+
});
113+
114+
describe("retry code sets", () => {
115+
it("READ_RETRYABLE_CODES includes UNAVAILABLE, DEADLINE_EXCEEDED, RESOURCE_EXHAUSTED", () => {
116+
expect(READ_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
117+
expect(READ_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED);
118+
expect(READ_RETRYABLE_CODES).toContain(status.RESOURCE_EXHAUSTED);
119+
});
120+
121+
it("WRITE_RETRYABLE_CODES includes only UNAVAILABLE", () => {
122+
expect(WRITE_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
123+
expect(WRITE_RETRYABLE_CODES).not.toContain(status.DEADLINE_EXCEEDED);
124+
expect(WRITE_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED);
125+
});
126+
127+
it("WRITE_IDEMPOTENT_RETRYABLE_CODES includes UNAVAILABLE and DEADLINE_EXCEEDED", () => {
128+
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
129+
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED);
130+
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED);
131+
});
132+
133+
it("withRetry uses WRITE_RETRYABLE_CODES: does not retry DEADLINE_EXCEEDED", async () => {
134+
const fn = vi.fn().mockRejectedValue(makeServiceError(status.DEADLINE_EXCEEDED, "timeout"));
135+
136+
await expect(
137+
withRetry(
138+
{ maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_RETRYABLE_CODES] },
139+
fn,
140+
),
141+
).rejects.toThrow("timeout");
142+
expect(fn).toHaveBeenCalledTimes(1);
143+
});
144+
145+
it("withRetry uses WRITE_IDEMPOTENT_RETRYABLE_CODES: retries DEADLINE_EXCEEDED", async () => {
146+
const fn = vi
147+
.fn()
148+
.mockRejectedValueOnce(makeServiceError(status.DEADLINE_EXCEEDED, "timeout"))
149+
.mockResolvedValue("ok");
150+
151+
const result = await withRetry(
152+
{ maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_IDEMPOTENT_RETRYABLE_CODES] },
153+
fn,
154+
);
155+
expect(result).toBe("ok");
156+
expect(fn).toHaveBeenCalledTimes(2);
157+
});
96158
});

0 commit comments

Comments
 (0)