diff --git a/docs/configuration.md b/docs/configuration.md index 3bbfe03..ed39c4d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -93,6 +93,22 @@ certificate store. The SDK retries transient gRPC errors with exponential backoff and jitter. +### Read vs write retry defaults + +Read operations (`get`, `getAll`) retry on `UNAVAILABLE`, `DEADLINE_EXCEEDED`, and `RESOURCE_EXHAUSTED`. These are safe to retry because reads have no side effects. + +Write operations (`set`, `setMany`, `setNull`) retry only on `UNAVAILABLE` by default. `DEADLINE_EXCEEDED` is excluded because the server may have already applied the write — retrying without a guarantee of idempotency can cause duplicate mutations. + +To enable `DEADLINE_EXCEEDED` retries for a write, pass an `idempotencyKey`. This signals that the caller has ensured the write is safe to repeat (e.g. the value is the same as what the server would have written): + +```typescript +await client.set('tenant-id', 'payments.fee', '0.05', { idempotencyKey: 'set-fee-2026-05-20' }); +await client.setMany('tenant-id', { 'payments.fee': '0.05' }, { idempotencyKey: 'batch-1' }); +await client.setNull('tenant-id', 'payments.fee', { idempotencyKey: 'clear-fee' }); +``` + +If you set `retryableCodes` in `ClientOptions.retry`, that list overrides both read and write defaults for all operations. + ### RetryConfig | Option | Type | Default | Description | @@ -101,7 +117,7 @@ The SDK retries transient gRPC errors with exponential backoff and jitter. | `initialBackoff` | `number` | `100` | Initial backoff in milliseconds | | `maxBackoff` | `number` | `5000` | Maximum backoff in milliseconds | | `multiplier` | `number` | `2` | Backoff multiplier between attempts | -| `retryableCodes` | `GrpcStatus[]` | `[UNAVAILABLE, DEADLINE_EXCEEDED]` | gRPC codes that trigger a retry | +| `retryableCodes` | `GrpcStatus[]` | see above | gRPC codes that trigger a retry (overrides read/write split) | ### Examples diff --git a/src/client.ts b/src/client.ts index f014e88..678ebdc 100644 --- a/src/client.ts +++ b/src/client.ts @@ -26,7 +26,12 @@ import { type GetServerInfoResponse, ServerServiceClient as GrpcServerServiceClient, } from "./generated/centralconfig/v1/server_service.js"; -import { withRetry } from "./retry.js"; +import { + READ_RETRYABLE_CODES, + WRITE_IDEMPOTENT_RETRYABLE_CODES, + WRITE_RETRYABLE_CODES, + withRetry, +} from "./retry.js"; import type { ClientOptions, RetryConfig, ServerInfo } from "./types.js"; import { ConfigWatcher } from "./watcher.js"; @@ -232,7 +237,7 @@ export class ConfigClient { tenantId: string, fieldPath: string, value: string, - options?: { timeout?: number }, + options?: { timeout?: number; idempotencyKey?: string }, ): Promise { const fn = async () => { await this.callSetField( @@ -241,19 +246,22 @@ export class ConfigClient { ); }; - return this.withRetryAndMap(fn); + const codes = options?.idempotencyKey + ? WRITE_IDEMPOTENT_RETRYABLE_CODES + : WRITE_RETRYABLE_CODES; + return this.withRetryAndMap(fn, codes); } /** * Atomically set multiple config values. * * @param values - Record mapping field paths to string values. - * @param options - Optional description for the audit log. + * @param options - Optional description for the audit log and idempotency key for safe DEADLINE_EXCEEDED retries. */ async setMany( tenantId: string, values: Record, - options?: { description?: string; timeout?: number }, + options?: { description?: string; timeout?: number; idempotencyKey?: string }, ): Promise { const fn = async () => { const updates = Object.entries(values).map(([fieldPath, v]) => ({ @@ -266,7 +274,10 @@ export class ConfigClient { ); }; - return this.withRetryAndMap(fn); + const codes = options?.idempotencyKey + ? WRITE_IDEMPOTENT_RETRYABLE_CODES + : WRITE_RETRYABLE_CODES; + return this.withRetryAndMap(fn, codes); } /** @@ -275,13 +286,16 @@ export class ConfigClient { async setNull( tenantId: string, fieldPath: string, - options?: { timeout?: number }, + options?: { timeout?: number; idempotencyKey?: string }, ): Promise { const fn = async () => { await this.callSetField({ tenantId, fieldPath, value: undefined }, options?.timeout); }; - return this.withRetryAndMap(fn); + const codes = options?.idempotencyKey + ? WRITE_IDEMPOTENT_RETRYABLE_CODES + : WRITE_RETRYABLE_CODES; + return this.withRetryAndMap(fn, codes); } /** @@ -329,9 +343,16 @@ export class ConfigClient { return { version: resp.version, commit: resp.commit, features: resp.features }; } - private async withRetryAndMap(fn: () => Promise): Promise { + private async withRetryAndMap( + fn: () => Promise, + defaultCodes: readonly number[] = READ_RETRYABLE_CODES, + ): Promise { + let config: RetryConfig | false = this.retryConfig; + if (config !== false && !config.retryableCodes) { + config = { ...config, retryableCodes: [...defaultCodes] }; + } try { - return await withRetry(this.retryConfig, fn); + return await withRetry(config, fn); } catch (err) { if (isServiceError(err)) { throw mapGrpcError(err); diff --git a/src/retry.ts b/src/retry.ts index 7c686d9..f8c9daa 100644 --- a/src/retry.ts +++ b/src/retry.ts @@ -1,7 +1,9 @@ /** * Retry logic with exponential backoff and jitter. * - * Retries on UNAVAILABLE and DEADLINE_EXCEEDED by default. + * Read operations retry on UNAVAILABLE, DEADLINE_EXCEEDED, and RESOURCE_EXHAUSTED. + * Write operations retry only on UNAVAILABLE by default to avoid double-apply. + * Write retries on DEADLINE_EXCEEDED require an idempotency key. */ import { type ServiceError, status } from "@grpc/grpc-js"; @@ -11,7 +13,19 @@ const DEFAULT_MAX_ATTEMPTS = 3; const DEFAULT_INITIAL_BACKOFF = 100; // ms const DEFAULT_MAX_BACKOFF = 5000; // ms const DEFAULT_MULTIPLIER = 2; -const DEFAULT_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE, status.DEADLINE_EXCEEDED]; + +export const READ_RETRYABLE_CODES: readonly number[] = [ + status.UNAVAILABLE, + status.DEADLINE_EXCEEDED, + status.RESOURCE_EXHAUSTED, +]; + +export const WRITE_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE]; + +export const WRITE_IDEMPOTENT_RETRYABLE_CODES: readonly number[] = [ + status.UNAVAILABLE, + status.DEADLINE_EXCEEDED, +]; function isServiceError(err: unknown): err is ServiceError { return err instanceof Error && typeof (err as ServiceError).code === "number"; @@ -39,7 +53,7 @@ export async function withRetry( const initialBackoff = config.initialBackoff ?? DEFAULT_INITIAL_BACKOFF; const maxBackoff = config.maxBackoff ?? DEFAULT_MAX_BACKOFF; const multiplier = config.multiplier ?? DEFAULT_MULTIPLIER; - const retryableCodes = config.retryableCodes ?? DEFAULT_RETRYABLE_CODES; + const retryableCodes = config.retryableCodes ?? READ_RETRYABLE_CODES; let lastErr: Error | undefined; let backoff = initialBackoff; diff --git a/src/types.ts b/src/types.ts index b27d44d..946b632 100644 --- a/src/types.ts +++ b/src/types.ts @@ -45,7 +45,7 @@ export interface RetryConfig { readonly maxBackoff?: number; /** Backoff multiplier between attempts. Default: 2. */ readonly multiplier?: number; - /** gRPC status codes that trigger a retry. Default: [UNAVAILABLE, DEADLINE_EXCEEDED]. */ + /** gRPC status codes that trigger a retry. Overrides the read/write split defaults. */ readonly retryableCodes?: (typeof GrpcStatus)[keyof typeof GrpcStatus][]; } diff --git a/test/retry.test.ts b/test/retry.test.ts index 595a229..4d336a9 100644 --- a/test/retry.test.ts +++ b/test/retry.test.ts @@ -1,6 +1,11 @@ import { Metadata, type ServiceError, status } from "@grpc/grpc-js"; import { describe, expect, it, vi } from "vitest"; -import { withRetry } from "../src/retry.js"; +import { + READ_RETRYABLE_CODES, + WRITE_IDEMPOTENT_RETRYABLE_CODES, + WRITE_RETRYABLE_CODES, + withRetry, +} from "../src/retry.js"; function makeServiceError(code: number, details: string): ServiceError { const err = new Error(details) as ServiceError; @@ -93,4 +98,61 @@ describe("withRetry", () => { await expect(withRetry({ maxAttempts: 1, initialBackoff: 1 }, fn)).rejects.toThrow("down"); expect(fn).toHaveBeenCalledTimes(1); }); + + it("default config retries on RESOURCE_EXHAUSTED (read default)", async () => { + const fn = vi + .fn() + .mockRejectedValueOnce(makeServiceError(status.RESOURCE_EXHAUSTED, "quota")) + .mockResolvedValue("ok"); + + const result = await withRetry({ maxAttempts: 3, initialBackoff: 1 }, fn); + expect(result).toBe("ok"); + expect(fn).toHaveBeenCalledTimes(2); + }); +}); + +describe("retry code sets", () => { + it("READ_RETRYABLE_CODES includes UNAVAILABLE, DEADLINE_EXCEEDED, RESOURCE_EXHAUSTED", () => { + expect(READ_RETRYABLE_CODES).toContain(status.UNAVAILABLE); + expect(READ_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED); + expect(READ_RETRYABLE_CODES).toContain(status.RESOURCE_EXHAUSTED); + }); + + it("WRITE_RETRYABLE_CODES includes only UNAVAILABLE", () => { + expect(WRITE_RETRYABLE_CODES).toContain(status.UNAVAILABLE); + expect(WRITE_RETRYABLE_CODES).not.toContain(status.DEADLINE_EXCEEDED); + expect(WRITE_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED); + }); + + it("WRITE_IDEMPOTENT_RETRYABLE_CODES includes UNAVAILABLE and DEADLINE_EXCEEDED", () => { + expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.UNAVAILABLE); + expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED); + expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED); + }); + + it("withRetry uses WRITE_RETRYABLE_CODES: does not retry DEADLINE_EXCEEDED", async () => { + const fn = vi.fn().mockRejectedValue(makeServiceError(status.DEADLINE_EXCEEDED, "timeout")); + + await expect( + withRetry( + { maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_RETRYABLE_CODES] }, + fn, + ), + ).rejects.toThrow("timeout"); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it("withRetry uses WRITE_IDEMPOTENT_RETRYABLE_CODES: retries DEADLINE_EXCEEDED", async () => { + const fn = vi + .fn() + .mockRejectedValueOnce(makeServiceError(status.DEADLINE_EXCEEDED, "timeout")) + .mockResolvedValue("ok"); + + const result = await withRetry( + { maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_IDEMPOTENT_RETRYABLE_CODES] }, + fn, + ); + expect(result).toBe("ok"); + expect(fn).toHaveBeenCalledTimes(2); + }); });