-
Notifications
You must be signed in to change notification settings - Fork 236
[cli] [core] Probe deployment specVersion before CLI start #1629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8399d51
b6b6dcf
dd2699b
ddc8c31
cf3dd88
50407fe
330d70d
82dcbe2
ce9ba33
57cb399
419de89
ff8a79c
7b42568
e2d9425
7f39009
6b3c6b1
51fb2ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| "@workflow/core": patch | ||
| "@workflow/cli": patch | ||
| --- | ||
|
|
||
| CLI `start` command probes deployment specVersion via health check before choosing queue transport. Health check always uses JSON transport for compatibility with old deployments. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,12 +4,17 @@ import type { | |
| ValidQueueName, | ||
| World, | ||
| } from '@workflow/world'; | ||
| import { HealthCheckPayloadSchema, SPEC_VERSION_CURRENT } from '@workflow/world'; | ||
| import { | ||
| HealthCheckPayloadSchema, | ||
| SPEC_VERSION_CURRENT, | ||
| SPEC_VERSION_LEGACY, | ||
| } from '@workflow/world'; | ||
| import { monotonicFactory } from 'ulid'; | ||
|
|
||
| import { runtimeLogger } from '../logger.js'; | ||
| import * as Attribute from '../telemetry/semantic-conventions.js'; | ||
| import { getSpanKind, trace } from '../telemetry.js'; | ||
| import { version as workflowCoreVersion } from '../version.js'; | ||
| import { getWorld } from './world.js'; | ||
|
|
||
| /** Default timeout for health checks in milliseconds */ | ||
|
|
@@ -99,6 +104,7 @@ export async function handleHealthCheckMessage( | |
| endpoint, | ||
| correlationId: healthCheck.correlationId, | ||
| specVersion: SPEC_VERSION_CURRENT, | ||
| workflowCoreVersion, | ||
| timestamp: Date.now(), | ||
| }); | ||
| // Use a fake runId that passes validation. | ||
|
|
@@ -114,6 +120,8 @@ export type HealthCheckEndpoint = 'workflow' | 'step'; | |
| export interface HealthCheckOptions { | ||
| /** Timeout in milliseconds to wait for health check response. Default: 30000 (30s) */ | ||
| timeout?: number; | ||
| /** Deployment ID to send the health check to. Falls back to process.env.VERCEL_DEPLOYMENT_ID. */ | ||
| deploymentId?: string; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -186,6 +194,12 @@ function parseHealthCheckResponse( | |
| try { | ||
| response = JSON.parse(responseText); | ||
| } catch { | ||
| // Old deployments (specVersion < 3) return plain text like | ||
| // 'Workflow SDK "..." endpoint is healthy'. Treat any non-empty | ||
| // text response as a healthy deployment with unknown specVersion. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking: Good catch handling the old if (responseText.includes('healthy') || responseText.includes('endpoint')) {
return { healthy: true };
}
return null;This way, random error pages don't get misidentified as healthy deployments. |
||
| if (responseText.length > 0) { | ||
| return { healthy: true }; | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
|
|
@@ -225,10 +239,16 @@ export async function healthCheck( | |
| const startTime = Date.now(); | ||
|
|
||
| try { | ||
| await world.queue(queueName, { | ||
| __healthCheck: true, | ||
| correlationId, | ||
| }); | ||
| await world.queue( | ||
| queueName, | ||
| { __healthCheck: true, correlationId }, | ||
| { | ||
| // Use JSON transport so the health check works against both | ||
| // old (JSON-only) and new (dual) deployments. | ||
| specVersion: SPEC_VERSION_LEGACY, | ||
| deploymentId: options?.deploymentId, | ||
| } | ||
| ); | ||
|
|
||
| while (Date.now() - startTime < timeout) { | ||
| try { | ||
|
|
@@ -375,6 +395,7 @@ export function withHealthCheck( | |
| healthy: true, | ||
| endpoint: url.pathname, | ||
| specVersion: SPEC_VERSION_CURRENT, | ||
| workflowCoreVersion, | ||
| }), | ||
| { | ||
| status: 200, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-blocking: The 10s health check timeout adds latency to every CLI
startinvocation. When the deployment is warm, this is negligible (~1-2s). But on cold-start (or if the deployment is down), the user waits 10s with no feedback before the fallback kicks in.Consider:
logger.info('Probing deployment version...')so the user understands the delay