Skip to content

Commit 9bdcdb2

Browse files
feat: surface root cause analysis in checks get command [AI-190] (#1271)
Surfaces information about RCA where available
1 parent fe4ada8 commit 9bdcdb2

15 files changed

Lines changed: 749 additions & 10 deletions

File tree

packages/cli/e2e/__tests__/help.spec.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ describe('help', () => {
6060
incidents Create and manage status page incidents.
6161
login Login to your Checkly account or create a new one.
6262
logout Log out and clear any local credentials.
63+
rca Trigger and retrieve root cause analyses.
6364
rules Generate a rules file to use with AI IDEs and Copilots.
6465
runtimes List all supported runtimes and dependencies.
6566
skills Show Checkly AI skills, actions and their references.

packages/cli/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@
7979
"import": {
8080
"description": "Import existing resources from your Checkly account to your project."
8181
},
82+
"rca": {
83+
"description": "Trigger and retrieve root cause analyses."
84+
},
8285
"status-pages": {
8386
"description": "List and manage status pages in your Checkly account."
8487
}

packages/cli/src/commands/__tests__/init.spec.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { join } from 'path'
12
import { describe, it, expect, vi, beforeEach } from 'vitest'
23
import Init from '../init'
34

@@ -143,7 +144,7 @@ describe('Init command', () => {
143144
await cmd.run()
144145

145146
expect(vi.mocked(writeFileSync)).toHaveBeenCalledWith(
146-
'/tmp/My App/package.json',
147+
join('/tmp/My App', 'package.json'),
147148
expect.stringContaining('"name": "my-app"'),
148149
)
149150
} finally {

packages/cli/src/commands/checks/get.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
formatErrorGroups,
1212
} from '../../formatters/checks'
1313
import { formatResultDetail } from '../../formatters/check-result-detail'
14+
import { formatRcaDetail, formatRcaHint, transformErrorGroupForJson } from '../../formatters/rca'
1415
import { quickRangeValues, type QuickRange, type GroupBy } from '../../rest/analytics'
1516
import { formatAnalyticsSection } from '../../formatters/analytics'
1617

@@ -102,7 +103,8 @@ export default class ChecksGet extends AuthCommand {
102103

103104
if (flags.output === 'json') {
104105
const analytics = analyticsResp ?? null
105-
this.log(JSON.stringify({ check, status, results, nextId, errorGroups, analytics }, null, 2))
106+
const errorGroupsJson = errorGroups.map(transformErrorGroupForJson)
107+
this.log(JSON.stringify({ check, status, results, nextId, errorGroups: errorGroupsJson, analytics }, null, 2))
106108
return
107109
}
108110

@@ -190,7 +192,7 @@ export default class ChecksGet extends AuthCommand {
190192
])
191193

192194
if (outputFormat === 'json') {
193-
this.log(JSON.stringify(errorGroup, null, 2))
195+
this.log(JSON.stringify(transformErrorGroupForJson(errorGroup), null, 2))
194196
return
195197
}
196198

@@ -225,6 +227,12 @@ export default class ChecksGet extends AuthCommand {
225227
if (check.scriptPath) {
226228
lines.push(`| Source file | ${check.scriptPath} |`)
227229
}
230+
const rcasMd = errorGroup.rootCauseAnalyses ?? []
231+
if (rcasMd.length > 0) {
232+
lines.push('', formatRcaDetail(rcasMd[0], fmt))
233+
const hintMd = formatRcaHint(rcasMd.length, fmt)
234+
if (hintMd) lines.push('', hintMd)
235+
}
228236
this.log(lines.join('\n'))
229237
return
230238
}
@@ -250,6 +258,14 @@ export default class ChecksGet extends AuthCommand {
250258
output.push(`${chalk.dim('Source file:')} ${chalk.cyan(check.scriptPath)}`)
251259
}
252260

261+
const rcas = errorGroup.rootCauseAnalyses ?? []
262+
if (rcas.length > 0) {
263+
output.push('')
264+
output.push(formatRcaDetail(rcas[0], fmt))
265+
const hint = formatRcaHint(rcas.length, fmt)
266+
if (hint) output.push('', hint)
267+
}
268+
253269
output.push('')
254270
output.push(` ${chalk.dim('Back to check:')} checkly checks get ${checkId}`)
255271
output.push(` ${chalk.dim('Back to list:')} checkly checks list`)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { Args, Flags } from '@oclif/core'
2+
import { AuthCommand } from '../authCommand'
3+
import { outputFlag } from '../../helpers/flags'
4+
import * as api from '../../rest/api'
5+
import { formatRcaCompleted } from '../../formatters/rca'
6+
7+
export default class RcaGet extends AuthCommand {
8+
static hidden = false
9+
static readOnly = true
10+
static idempotent = true
11+
static description = 'Retrieve a root cause analysis by ID.'
12+
13+
static args = {
14+
id: Args.string({
15+
description: 'The RCA ID to retrieve.',
16+
required: true,
17+
}),
18+
}
19+
20+
static flags = {
21+
watch: Flags.boolean({
22+
char: 'w',
23+
description: 'Wait for the analysis to complete if still generating.',
24+
default: false,
25+
}),
26+
output: outputFlag({ default: 'detail' }),
27+
}
28+
29+
async run (): Promise<void> {
30+
const { args, flags } = await this.parse(RcaGet)
31+
this.style.outputFormat = flags.output
32+
33+
try {
34+
// Fetch the RCA — 202 means still generating, 200 means complete
35+
const response = await api.rca.get(args.id)
36+
37+
if (response.status !== 202) {
38+
const fmt = flags.output === 'json' ? 'json' : flags.output === 'md' ? 'md' : 'terminal'
39+
this.log(formatRcaCompleted(response.data, fmt))
40+
return
41+
}
42+
43+
// Still generating
44+
if (!flags.watch) {
45+
if (flags.output === 'json') {
46+
this.log(JSON.stringify({ id: args.id, status: 'pending' }, null, 2))
47+
} else {
48+
this.log('Root cause analysis is still being generated.')
49+
this.log(`Use ${this.config.bin} rca get ${args.id} --watch to wait for completion.`)
50+
}
51+
return
52+
}
53+
54+
if (flags.output !== 'detail') {
55+
process.stderr.write(`--watch is not supported with --output ${flags.output}, ignoring\n`)
56+
if (flags.output === 'json') {
57+
this.log(JSON.stringify({ id: args.id, status: 'pending' }, null, 2))
58+
} else {
59+
this.log('Root cause analysis is still being generated.')
60+
}
61+
return
62+
}
63+
64+
// Watch mode: poll until complete
65+
this.style.actionStart('Waiting for root cause analysis...')
66+
67+
const rca = await api.rca.pollUntilComplete(args.id)
68+
69+
this.style.actionSuccess()
70+
this.log(formatRcaCompleted(rca, 'terminal'))
71+
} catch (err: any) {
72+
this.style.longError('Failed to retrieve root cause analysis.', err)
73+
process.exitCode = 1
74+
}
75+
}
76+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { Flags } from '@oclif/core'
2+
import chalk from 'chalk'
3+
import { AuthCommand } from '../authCommand'
4+
import { outputFlag } from '../../helpers/flags'
5+
import * as api from '../../rest/api'
6+
import { NotFoundError, InadequateEntitlementsError } from '../../rest/errors'
7+
import { formatRcaPending, formatRcaCompleted } from '../../formatters/rca'
8+
9+
export default class RcaRun extends AuthCommand {
10+
static hidden = false
11+
static readOnly = false
12+
static idempotent = false
13+
static description = 'Trigger a root cause analysis for an error group.'
14+
15+
static flags = {
16+
'error-group': Flags.string({
17+
char: 'e',
18+
description: 'The error group ID to analyze.',
19+
required: true,
20+
}),
21+
'watch': Flags.boolean({
22+
char: 'w',
23+
description: 'Wait for the analysis to complete and display the result.',
24+
default: false,
25+
}),
26+
'output': outputFlag({ default: 'detail' }),
27+
}
28+
29+
async run (): Promise<void> {
30+
const { flags } = await this.parse(RcaRun)
31+
this.style.outputFormat = flags.output
32+
33+
try {
34+
// Fetch the error group to get the checkId for navigation hints
35+
const { data: errorGroup } = await api.errorGroups.get(flags['error-group'])
36+
37+
// Trigger the RCA
38+
const { data: { id: rcaId } } = await api.rca.trigger(flags['error-group'])
39+
40+
const pendingInfo = {
41+
rcaId,
42+
errorGroupId: flags['error-group'],
43+
checkId: errorGroup.checkId,
44+
}
45+
46+
// If not watching, show pending state and exit
47+
if (!flags.watch || flags.output === 'json' || flags.output === 'md') {
48+
if (flags.watch && flags.output !== 'detail') {
49+
process.stderr.write(`--watch is not supported with --output ${flags.output}, ignoring\n`)
50+
}
51+
const fmt = flags.output === 'json' ? 'json' : flags.output === 'md' ? 'md' : 'terminal'
52+
this.log(formatRcaPending(pendingInfo, fmt))
53+
return
54+
}
55+
56+
// Watch mode: poll until complete
57+
this.log(chalk.bold('Root cause analysis triggered.'))
58+
this.log(`${chalk.dim('RCA ID:')} ${rcaId}`)
59+
this.log('')
60+
this.style.actionStart('Waiting for root cause analysis...')
61+
62+
const rca = await api.rca.pollUntilComplete(rcaId)
63+
64+
this.style.actionSuccess()
65+
this.log(formatRcaCompleted(rca, 'terminal'))
66+
} catch (err: any) {
67+
if (err instanceof InadequateEntitlementsError) {
68+
this.style.longError(
69+
'Root cause analysis is not available on your current plan.',
70+
'Run `checkly account plan` to check your entitlements.',
71+
)
72+
process.exitCode = 1
73+
return
74+
}
75+
if (err instanceof NotFoundError) {
76+
this.style.shortError(`Error group not found: ${flags['error-group']}`)
77+
process.exitCode = 1
78+
return
79+
}
80+
this.style.longError('Failed to trigger root cause analysis.', err)
81+
process.exitCode = 1
82+
}
83+
}
84+
}

packages/cli/src/formatters/__tests__/__fixtures__/fixtures.ts

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { Check } from '../../rest/checks'
22
import type { CheckStatus } from '../../rest/check-statuses'
33
import type { CheckResult, ApiCheckResult, BrowserCheckResult, MultiStepCheckResult } from '../../rest/check-results'
4-
import type { ErrorGroup } from '../../rest/error-groups'
4+
import type { ErrorGroup, RootCauseAnalysis } from '../../rest/error-groups'
55
import type { CheckWithStatus } from '../checks'
66

77
// --- Check statuses ---
@@ -388,3 +388,75 @@ export const archivedErrorGroup: ErrorGroup = {
388388
lastSeen: '2025-05-15T00:00:00.000Z',
389389
archivedUntilNextEvent: true,
390390
}
391+
392+
// --- Root cause analyses ---
393+
394+
export const sampleRca: RootCauseAnalysis = {
395+
id: 'rca-1',
396+
created_at: '2025-06-15T10:00:00.000Z',
397+
analysis: {
398+
classification: 'INFRASTRUCTURE_ERROR',
399+
rootCause: 'The upstream API returned HTTP 503 Service Unavailable after a long server processing time (~28s TTFB), indicating a transient backend issue.',
400+
userImpact: 'Users in ap-south-1 cannot trigger checks via the API. Requests fail with 503 after ~28 seconds.',
401+
codeFix: 'Add retry logic with exponential backoff for transient 503 responses.',
402+
evidence: [
403+
{
404+
artifacts: [{ name: 'HTTP_REQUEST', type: 'REQUEST' }],
405+
description: 'The HTTP request completed with status 503 Service Unavailable.',
406+
},
407+
{
408+
artifacts: [{ name: 'TIMING_PHASES', type: 'TIMINGS' }],
409+
description: 'DNS and TCP times are sub-2ms while TTFB is ~28.2s.',
410+
},
411+
{
412+
artifacts: [
413+
{ name: 'TRACE_ROUTE', type: 'TRACE' },
414+
{ name: 'PACKET_CAPTURE', type: 'BINARY' },
415+
],
416+
description: 'No sustained network outage; the failure is on the application side.',
417+
},
418+
],
419+
referenceLinks: [
420+
{ url: 'https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/503', title: 'HTTP 503 Service Unavailable' },
421+
],
422+
},
423+
provider: 'openai',
424+
model: 'gpt-5.1',
425+
durationMs: 9459,
426+
userContext: [{ text: 'checkly-backend', type: 'TAG' }],
427+
}
428+
429+
export const sampleRcaMinimal: RootCauseAnalysis = {
430+
id: 'rca-2',
431+
created_at: '2025-06-14T08:00:00.000Z',
432+
analysis: {
433+
classification: 'APPLICATION_ERROR',
434+
rootCause: 'The API endpoint /users returns 404 because the route was removed in a recent deployment.',
435+
userImpact: 'User profile pages fail to load.',
436+
codeFix: null,
437+
evidence: null,
438+
referenceLinks: null,
439+
},
440+
provider: 'openai',
441+
model: 'gpt-5.1',
442+
durationMs: 5000,
443+
userContext: null,
444+
}
445+
446+
export const errorGroupWithRca: ErrorGroup = {
447+
...activeErrorGroup,
448+
id: 'eg-rca-1',
449+
rootCauseAnalyses: [sampleRca],
450+
}
451+
452+
export const errorGroupWithMultipleRcas: ErrorGroup = {
453+
...activeErrorGroup,
454+
id: 'eg-rca-2',
455+
rootCauseAnalyses: [sampleRca, sampleRcaMinimal],
456+
}
457+
458+
export const errorGroupWithoutRca: ErrorGroup = {
459+
...activeErrorGroup,
460+
id: 'eg-no-rca',
461+
rootCauseAnalyses: [],
462+
}

packages/cli/src/formatters/__tests__/__snapshots__/checks.spec.ts.snap

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,17 @@ Muted Check API passing 10m production, api
7777
exports[`formatErrorGroups > renders markdown error groups > error-groups-md 1`] = `
7878
"## Error Groups
7979
80-
| Error | First Seen | Last Seen | ID |
81-
| --- | --- | --- | --- |
82-
| TimeoutError: page.click: Timeout 30000ms exceeded | 2025-06-01T00:00:00.000Z | 2025-06-15T12:00:00.000Z | eg-1 |"
80+
| Error | First Seen | Last Seen | RCA | ID |
81+
| --- | --- | --- | --- | --- |
82+
| TimeoutError: page.click: Timeout 30000ms exceeded | 2025-06-01T00:00:00.000Z | 2025-06-15T12:00:00.000Z | - | eg-1 |"
8383
`;
8484

8585
exports[`formatErrorGroups > renders terminal error groups > error-groups-terminal 1`] = `
8686
"ERROR GROUPS
87-
ERROR FIRST SEEN LAST SEEN
88-
TimeoutError: page.click: Timeout 30000ms exceeded 14d ago 5m ago"
87+
ERROR FIRST SEEN LAST SEEN RCA ERROR GROUP ID
88+
TimeoutError: page.click: Timeout 30000ms exceeded 14d ago 5m ago - eg-1
89+
90+
Run root cause analysis: checkly rca run -e eg-1 -w"
8991
`;
9092

9193
exports[`formatResults > renders markdown table > results-table-md 1`] = `

packages/cli/src/formatters/__tests__/checks.spec.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import {
3131
browserCheckResult,
3232
activeErrorGroup,
3333
archivedErrorGroup,
34+
errorGroupWithRca,
35+
errorGroupWithoutRca,
3436
} from './__fixtures__/fixtures'
3537

3638
// Pin time for timeAgo used in results/error groups
@@ -361,4 +363,27 @@ describe('formatErrorGroups', () => {
361363
it('returns empty string for empty array', () => {
362364
expect(formatErrorGroups([], 'terminal')).toBe('')
363365
})
366+
367+
it('shows RCA column with Yes for error groups with RCA', () => {
368+
const result = stripAnsi(formatErrorGroups([errorGroupWithRca], 'terminal'))
369+
expect(result).toContain('RCA')
370+
expect(result).toContain('Yes')
371+
})
372+
373+
it('shows RCA column with dash for error groups without RCA', () => {
374+
const result = stripAnsi(formatErrorGroups([errorGroupWithoutRca], 'terminal'))
375+
expect(result).toContain('RCA')
376+
expect(result).toContain('-')
377+
})
378+
379+
it('shows RCA column in markdown', () => {
380+
const result = formatErrorGroups([errorGroupWithRca], 'md')
381+
expect(result).toContain('| RCA |')
382+
expect(result).toContain('Yes')
383+
})
384+
385+
it('shows dash in markdown for error groups without RCA', () => {
386+
const result = formatErrorGroups([errorGroupWithoutRca], 'md')
387+
expect(result).toContain('| - |')
388+
})
364389
})

0 commit comments

Comments
 (0)