refactor(ymax-resolver): update resolver retry helper#12676
Conversation
gibson042
left a comment
There was a problem hiding this comment.
Pretty good! I'm just calling for a handful of tweaks.
| @@ -0,0 +1,132 @@ | |||
| /** | |||
| * Retry helper for chain submissions (resolver settlements, plan submissions). | |||
There was a problem hiding this comment.
I don't see anything in this file that is specific to chain submissions.
| * Retry helper for chain submissions (resolver settlements, plan submissions). | |
| * Helpers for automatic retries with exponential backoff. |
| * Ops alerting filters match on this prefix. | ||
| */ | ||
| alertingPrefix: string; | ||
| signal?: AbortSignal; |
There was a problem hiding this comment.
signal is generic and should be in WithRetriesOpts for consumption by withRetries as well (even though withRetriesForAlerting will probably handle it directly rather than passing it along).
| const elapsedMs = now() - start; | ||
| const shouldAlert = now() - lastAlertAt >= alertIntervalMs; | ||
| if (shouldAlert) lastAlertAt = now(); |
There was a problem hiding this comment.
| const elapsedMs = now() - start; | |
| const shouldAlert = now() - lastAlertAt >= alertIntervalMs; | |
| if (shouldAlert) lastAlertAt = now(); | |
| const t = now(); | |
| const elapsedMs = t - start; | |
| const shouldAlert = t - lastAlertAt >= alertIntervalMs; | |
| if (shouldAlert) lastAlertAt = t; |
| * | ||
| * Unbounded retries with exponential backoff. Every failed attempt is logged, | ||
| * but only the first failure and then one every `alertIntervalMs` carry the | ||
| * caller-supplied `errorCode` prefix — the ops alerting filter matches on that | ||
| * code, so this in-code throttling keeps pages to one per interval while the | ||
| * submission stays stuck. | ||
| * | ||
| * Each retry is a fresh call through the sequencing wallet's queue, so other | ||
| * in-flight submissions interleave normally between attempts. |
There was a problem hiding this comment.
| * | |
| * Unbounded retries with exponential backoff. Every failed attempt is logged, | |
| * but only the first failure and then one every `alertIntervalMs` carry the | |
| * caller-supplied `errorCode` prefix — the ops alerting filter matches on that | |
| * code, so this in-code throttling keeps pages to one per interval while the | |
| * submission stays stuck. | |
| * | |
| * Each retry is a fresh call through the sequencing wallet's queue, so other | |
| * in-flight submissions interleave normally between attempts. |
| const makeHarness = () => { | ||
| const logs: unknown[][] = []; | ||
| const sleeps: number[] = []; | ||
| let nowMs = 1_000_000; | ||
| const log = (...args: unknown[]) => logs.push(args); | ||
| const setTimeout = ((fn: () => void, ms: number) => { | ||
| sleeps.push(ms); | ||
| nowMs += ms; | ||
| queueMicrotask(fn); | ||
| return 0 as unknown as NodeJS.Timeout; | ||
| }) as typeof globalThis.setTimeout; | ||
| const now = () => nowMs; | ||
| return { logs, sleeps, log, setTimeout, now }; | ||
| }; | ||
|
|
||
| const baseOpts = ( | ||
| h: ReturnType<typeof makeHarness>, | ||
| ): WithRetriesForAlertingOpts => ({ | ||
| log: h.log, | ||
| setTimeout: h.setTimeout, | ||
| now: h.now, | ||
| alertingPrefix: TEST_ALERTING_PREFIX, | ||
| }); |
There was a problem hiding this comment.
AFAICT, baseOpts is only called with the output from makeHarness... let's just combine them.
| const makeHarness = () => { | |
| const logs: unknown[][] = []; | |
| const sleeps: number[] = []; | |
| let nowMs = 1_000_000; | |
| const log = (...args: unknown[]) => logs.push(args); | |
| const setTimeout = ((fn: () => void, ms: number) => { | |
| sleeps.push(ms); | |
| nowMs += ms; | |
| queueMicrotask(fn); | |
| return 0 as unknown as NodeJS.Timeout; | |
| }) as typeof globalThis.setTimeout; | |
| const now = () => nowMs; | |
| return { logs, sleeps, log, setTimeout, now }; | |
| }; | |
| const baseOpts = ( | |
| h: ReturnType<typeof makeHarness>, | |
| ): WithRetriesForAlertingOpts => ({ | |
| log: h.log, | |
| setTimeout: h.setTimeout, | |
| now: h.now, | |
| alertingPrefix: TEST_ALERTING_PREFIX, | |
| }); | |
| const makeHarness = () => { | |
| const logs: unknown[][] = []; | |
| const sleeps: number[] = []; | |
| let nowMs = 1_000_000; | |
| const log = (...args: unknown[]) => logs.push(args); | |
| const setTimeout = ((fn: () => void, ms: number) => { | |
| sleeps.push(ms); | |
| nowMs += ms; | |
| queueMicrotask(fn); | |
| return 0 as unknown as NodeJS.Timeout; | |
| }) as typeof globalThis.setTimeout; | |
| const now = () => nowMs; | |
| const baseOpts: WithRetriesForAlertingOpts = { | |
| log, | |
| setTimeout, | |
| now, | |
| alertingPrefix: TEST_ALERTING_PREFIX, | |
| }; | |
| return { logs, sleeps, log, setTimeout, now, baseOpts }; | |
| }; |
| expectCalls: number; | ||
| expectResult: unknown; | ||
| expectSleeps: number[]; | ||
| expectLogs: unknown[][]; |
There was a problem hiding this comment.
Nit: expectSleeps and expectLogs are used for t.deepEqual, implying that expectCalls would be as well. Let's rename it to expectCallCount.
| expectCalls: number; | |
| expectResult: unknown; | |
| expectSleeps: number[]; | |
| expectLogs: unknown[][]; | |
| expectCallCount: number; | |
| expectResult: unknown; | |
| expectSleeps: number[]; | |
| expectLogs: unknown[][]; |
| expectLogs: [ | ||
| [ | ||
| `${TEST_ALERTING_PREFIX} [tx4] settlement attempt 1 failed after 0ms, retrying in 5000ms`, | ||
| failure, | ||
| ], | ||
| [ | ||
| '[tx4] settlement attempt 2 failed after 5000ms, retrying in 10000ms', | ||
| failure, | ||
| ], |
There was a problem hiding this comment.
These tests would be easier to read if they used makeExpectedLog as well.
| ]); | ||
| }); | ||
|
|
||
| test('withRetries: returns the callback result on first success', async t => { |
There was a problem hiding this comment.
Please move the withRetries tests up to the top; I initially commented on them being redundant.
5cb3784 to
514c7d0
Compare
| const makeExpectedLogFor = | ||
| (label: string) => | ||
| ( | ||
| attempt: number, | ||
| elapsedMs: number, | ||
| nextDelayMs?: number, | ||
| errLevel?: 'quiet' | 'noisy', | ||
| ) => { | ||
| if (nextDelayMs === undefined) { | ||
| return [`${label} succeeded after ${attempt} attempt(s)`]; | ||
| } | ||
| const baseMsg = `${label} attempt ${attempt} failed after ${elapsedMs}ms, retrying in ${nextDelayMs}ms`; | ||
| return [ | ||
| errLevel === 'noisy' ? `${TEST_ALERTING_PREFIX} ${baseMsg}` : baseMsg, | ||
| failure, | ||
| ]; | ||
| }; |
There was a problem hiding this comment.
Readability suggestion:
| const makeExpectedLogFor = | |
| (label: string) => | |
| ( | |
| attempt: number, | |
| elapsedMs: number, | |
| nextDelayMs?: number, | |
| errLevel?: 'quiet' | 'noisy', | |
| ) => { | |
| if (nextDelayMs === undefined) { | |
| return [`${label} succeeded after ${attempt} attempt(s)`]; | |
| } | |
| const baseMsg = `${label} attempt ${attempt} failed after ${elapsedMs}ms, retrying in ${nextDelayMs}ms`; | |
| return [ | |
| errLevel === 'noisy' ? `${TEST_ALERTING_PREFIX} ${baseMsg}` : baseMsg, | |
| failure, | |
| ]; | |
| }; | |
| const makeExpectLog = (label: string) => { | |
| const expectLog = ( | |
| attempt: number, | |
| elapsedMs: number, | |
| nextDelayMs?: number, | |
| errLevel?: 'quiet' | 'noisy', | |
| ) => { | |
| if (nextDelayMs === undefined) { | |
| return [`${label} succeeded after ${attempt} attempt(s)`]; | |
| } | |
| const baseMsg = `${label} attempt ${attempt} failed after ${elapsedMs}ms, retrying in ${nextDelayMs}ms`; | |
| return [ | |
| errLevel === 'noisy' ? `${TEST_ALERTING_PREFIX} ${baseMsg}` : baseMsg, | |
| failure, | |
| ]; | |
| }; | |
| return expectLog; | |
| }; |
514c7d0 to
ebe608a
Compare
closes: https://linear.app/agoric/issue/AGO-487/follow-up-for-12626-feedback
refs: #12626
Description
Follow-up to PR #12626 addressing @gibson042 review feedback.
Upgrade Considerations