Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ certificate store.

The SDK retries transient gRPC errors with exponential backoff and jitter.

### Read vs write retry defaults

Read operations (`get`, `getAll`) retry on `UNAVAILABLE`, `DEADLINE_EXCEEDED`, and `RESOURCE_EXHAUSTED`. These are safe to retry because reads have no side effects.

Write operations (`set`, `setMany`, `setNull`) retry only on `UNAVAILABLE` by default. `DEADLINE_EXCEEDED` is excluded because the server may have already applied the write — retrying without a guarantee of idempotency can cause duplicate mutations.

To enable `DEADLINE_EXCEEDED` retries for a write, pass an `idempotencyKey`. This signals that the caller has ensured the write is safe to repeat (e.g. the value is the same as what the server would have written):

```typescript
await client.set('tenant-id', 'payments.fee', '0.05', { idempotencyKey: 'set-fee-2026-05-20' });
await client.setMany('tenant-id', { 'payments.fee': '0.05' }, { idempotencyKey: 'batch-1' });
await client.setNull('tenant-id', 'payments.fee', { idempotencyKey: 'clear-fee' });
```

If you set `retryableCodes` in `ClientOptions.retry`, that list overrides both read and write defaults for all operations.

### RetryConfig

| Option | Type | Default | Description |
Expand All @@ -101,7 +117,7 @@ The SDK retries transient gRPC errors with exponential backoff and jitter.
| `initialBackoff` | `number` | `100` | Initial backoff in milliseconds |
| `maxBackoff` | `number` | `5000` | Maximum backoff in milliseconds |
| `multiplier` | `number` | `2` | Backoff multiplier between attempts |
| `retryableCodes` | `GrpcStatus[]` | `[UNAVAILABLE, DEADLINE_EXCEEDED]` | gRPC codes that trigger a retry |
| `retryableCodes` | `GrpcStatus[]` | see above | gRPC codes that trigger a retry (overrides read/write split) |

### Examples

Expand Down
41 changes: 31 additions & 10 deletions src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ import {
type GetServerInfoResponse,
ServerServiceClient as GrpcServerServiceClient,
} from "./generated/centralconfig/v1/server_service.js";
import { withRetry } from "./retry.js";
import {
READ_RETRYABLE_CODES,
WRITE_IDEMPOTENT_RETRYABLE_CODES,
WRITE_RETRYABLE_CODES,
withRetry,
} from "./retry.js";
import type { ClientOptions, RetryConfig, ServerInfo } from "./types.js";
import { ConfigWatcher } from "./watcher.js";

Expand Down Expand Up @@ -232,7 +237,7 @@ export class ConfigClient {
tenantId: string,
fieldPath: string,
value: string,
options?: { timeout?: number },
options?: { timeout?: number; idempotencyKey?: string },
): Promise<void> {
const fn = async () => {
await this.callSetField(
Expand All @@ -241,19 +246,22 @@ export class ConfigClient {
);
};

return this.withRetryAndMap(fn);
const codes = options?.idempotencyKey
? WRITE_IDEMPOTENT_RETRYABLE_CODES
: WRITE_RETRYABLE_CODES;
return this.withRetryAndMap(fn, codes);
}

/**
* Atomically set multiple config values.
*
* @param values - Record mapping field paths to string values.
* @param options - Optional description for the audit log.
* @param options - Optional description for the audit log and idempotency key for safe DEADLINE_EXCEEDED retries.
*/
async setMany(
tenantId: string,
values: Record<string, string>,
options?: { description?: string; timeout?: number },
options?: { description?: string; timeout?: number; idempotencyKey?: string },
): Promise<void> {
const fn = async () => {
const updates = Object.entries(values).map(([fieldPath, v]) => ({
Expand All @@ -266,7 +274,10 @@ export class ConfigClient {
);
};

return this.withRetryAndMap(fn);
const codes = options?.idempotencyKey
? WRITE_IDEMPOTENT_RETRYABLE_CODES
: WRITE_RETRYABLE_CODES;
return this.withRetryAndMap(fn, codes);
}

/**
Expand All @@ -275,13 +286,16 @@ export class ConfigClient {
async setNull(
tenantId: string,
fieldPath: string,
options?: { timeout?: number },
options?: { timeout?: number; idempotencyKey?: string },
): Promise<void> {
const fn = async () => {
await this.callSetField({ tenantId, fieldPath, value: undefined }, options?.timeout);
};

return this.withRetryAndMap(fn);
const codes = options?.idempotencyKey
? WRITE_IDEMPOTENT_RETRYABLE_CODES
: WRITE_RETRYABLE_CODES;
return this.withRetryAndMap(fn, codes);
}

/**
Expand Down Expand Up @@ -329,9 +343,16 @@ export class ConfigClient {
return { version: resp.version, commit: resp.commit, features: resp.features };
}

private async withRetryAndMap<T>(fn: () => Promise<T>): Promise<T> {
private async withRetryAndMap<T>(
fn: () => Promise<T>,
defaultCodes: readonly number[] = READ_RETRYABLE_CODES,
): Promise<T> {
let config: RetryConfig | false = this.retryConfig;
if (config !== false && !config.retryableCodes) {
config = { ...config, retryableCodes: [...defaultCodes] };
}
try {
return await withRetry(this.retryConfig, fn);
return await withRetry(config, fn);
} catch (err) {
if (isServiceError(err)) {
throw mapGrpcError(err);
Expand Down
20 changes: 17 additions & 3 deletions src/retry.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
/**
* Retry logic with exponential backoff and jitter.
*
* Retries on UNAVAILABLE and DEADLINE_EXCEEDED by default.
* Read operations retry on UNAVAILABLE, DEADLINE_EXCEEDED, and RESOURCE_EXHAUSTED.
* Write operations retry only on UNAVAILABLE by default to avoid double-apply.
* Write retries on DEADLINE_EXCEEDED require an idempotency key.
*/

import { type ServiceError, status } from "@grpc/grpc-js";
Expand All @@ -11,7 +13,19 @@ const DEFAULT_MAX_ATTEMPTS = 3;
const DEFAULT_INITIAL_BACKOFF = 100; // ms
const DEFAULT_MAX_BACKOFF = 5000; // ms
const DEFAULT_MULTIPLIER = 2;
const DEFAULT_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE, status.DEADLINE_EXCEEDED];

export const READ_RETRYABLE_CODES: readonly number[] = [
status.UNAVAILABLE,
status.DEADLINE_EXCEEDED,
status.RESOURCE_EXHAUSTED,
];

export const WRITE_RETRYABLE_CODES: readonly number[] = [status.UNAVAILABLE];

export const WRITE_IDEMPOTENT_RETRYABLE_CODES: readonly number[] = [
status.UNAVAILABLE,
status.DEADLINE_EXCEEDED,
];

function isServiceError(err: unknown): err is ServiceError {
return err instanceof Error && typeof (err as ServiceError).code === "number";
Expand Down Expand Up @@ -39,7 +53,7 @@ export async function withRetry<T>(
const initialBackoff = config.initialBackoff ?? DEFAULT_INITIAL_BACKOFF;
const maxBackoff = config.maxBackoff ?? DEFAULT_MAX_BACKOFF;
const multiplier = config.multiplier ?? DEFAULT_MULTIPLIER;
const retryableCodes = config.retryableCodes ?? DEFAULT_RETRYABLE_CODES;
const retryableCodes = config.retryableCodes ?? READ_RETRYABLE_CODES;

let lastErr: Error | undefined;
let backoff = initialBackoff;
Expand Down
2 changes: 1 addition & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export interface RetryConfig {
readonly maxBackoff?: number;
/** Backoff multiplier between attempts. Default: 2. */
readonly multiplier?: number;
/** gRPC status codes that trigger a retry. Default: [UNAVAILABLE, DEADLINE_EXCEEDED]. */
/** gRPC status codes that trigger a retry. Overrides the read/write split defaults. */
readonly retryableCodes?: (typeof GrpcStatus)[keyof typeof GrpcStatus][];
}

Expand Down
64 changes: 63 additions & 1 deletion test/retry.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { Metadata, type ServiceError, status } from "@grpc/grpc-js";
import { describe, expect, it, vi } from "vitest";
import { withRetry } from "../src/retry.js";
import {
READ_RETRYABLE_CODES,
WRITE_IDEMPOTENT_RETRYABLE_CODES,
WRITE_RETRYABLE_CODES,
withRetry,
} from "../src/retry.js";

function makeServiceError(code: number, details: string): ServiceError {
const err = new Error(details) as ServiceError;
Expand Down Expand Up @@ -93,4 +98,61 @@ describe("withRetry", () => {
await expect(withRetry({ maxAttempts: 1, initialBackoff: 1 }, fn)).rejects.toThrow("down");
expect(fn).toHaveBeenCalledTimes(1);
});

it("default config retries on RESOURCE_EXHAUSTED (read default)", async () => {
const fn = vi
.fn()
.mockRejectedValueOnce(makeServiceError(status.RESOURCE_EXHAUSTED, "quota"))
.mockResolvedValue("ok");

const result = await withRetry({ maxAttempts: 3, initialBackoff: 1 }, fn);
expect(result).toBe("ok");
expect(fn).toHaveBeenCalledTimes(2);
});
});

describe("retry code sets", () => {
it("READ_RETRYABLE_CODES includes UNAVAILABLE, DEADLINE_EXCEEDED, RESOURCE_EXHAUSTED", () => {
expect(READ_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
expect(READ_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED);
expect(READ_RETRYABLE_CODES).toContain(status.RESOURCE_EXHAUSTED);
});

it("WRITE_RETRYABLE_CODES includes only UNAVAILABLE", () => {
expect(WRITE_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
expect(WRITE_RETRYABLE_CODES).not.toContain(status.DEADLINE_EXCEEDED);
expect(WRITE_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED);
});

it("WRITE_IDEMPOTENT_RETRYABLE_CODES includes UNAVAILABLE and DEADLINE_EXCEEDED", () => {
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.UNAVAILABLE);
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).toContain(status.DEADLINE_EXCEEDED);
expect(WRITE_IDEMPOTENT_RETRYABLE_CODES).not.toContain(status.RESOURCE_EXHAUSTED);
});

it("withRetry uses WRITE_RETRYABLE_CODES: does not retry DEADLINE_EXCEEDED", async () => {
const fn = vi.fn().mockRejectedValue(makeServiceError(status.DEADLINE_EXCEEDED, "timeout"));

await expect(
withRetry(
{ maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_RETRYABLE_CODES] },
fn,
),
).rejects.toThrow("timeout");
expect(fn).toHaveBeenCalledTimes(1);
});

it("withRetry uses WRITE_IDEMPOTENT_RETRYABLE_CODES: retries DEADLINE_EXCEEDED", async () => {
const fn = vi
.fn()
.mockRejectedValueOnce(makeServiceError(status.DEADLINE_EXCEEDED, "timeout"))
.mockResolvedValue("ok");

const result = await withRetry(
{ maxAttempts: 3, initialBackoff: 1, retryableCodes: [...WRITE_IDEMPOTENT_RETRYABLE_CODES] },
fn,
);
expect(result).toBe("ok");
expect(fn).toHaveBeenCalledTimes(2);
});
});