Skip to content

Commit 9a01bf9

Browse files
authored
feat(telemetry): add wire-format module and export event reader (#959)
Foundation for the telemetry export flow (#903): a single source of truth for the on-disk JSONL shape, a streaming reader for exported events, and the date-range primitives the export UI will build on. - Add `wireFormat.ts` with zod-derived `TelemetryEvent` (DeepReadonly) and shared `serializeTelemetryEventLine` / `parseTelemetryEventLine` so writer and reader cannot drift. `TelemetryFileParseError` lets readers tell parse failures apart from IO failures. - Add `export/files.ts`: `listTelemetryFilesForRange` (date-filtered, chronologically ordered) and `streamTelemetryEvents` (line-by-line `AsyncIterable` with deterministic fd cleanup). - Add `export/range.ts`: preset and custom UTC ranges, UTC date validation, and a calendar-day pre-filter so out-of-range files are skipped without being opened. - Switch `LocalJsonlSink` to the shared serializer; drop the bespoke writer. Refs #903
1 parent 8d1352b commit 9a01bf9

10 files changed

Lines changed: 772 additions & 100 deletions

File tree

src/telemetry/event.ts

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ import * as vscode from "vscode";
33

44
import { toError } from "../error/errorUtils";
55

6+
export type {
7+
SessionContext,
8+
TelemetryContext,
9+
TelemetryEvent,
10+
} from "./wireFormat";
11+
12+
import type { SessionContext, TelemetryEvent } from "./wireFormat";
13+
614
/** Telemetry level, mirrors `coder.telemetry.level`. Ordered: off < local. */
715
export type TelemetryLevel = "off" | "local";
816

@@ -19,47 +27,6 @@ export type CallerMeasurements = Record<string, number> & {
1927
durationMs?: never;
2028
};
2129

22-
/** Session-stable resource attributes. Field names are inspired by OTel
23-
* resource attributes; they are camelCase TypeScript and not a 1:1 mapping. */
24-
export interface SessionContext {
25-
readonly extensionVersion: string;
26-
readonly machineId: string;
27-
readonly sessionId: string;
28-
readonly osType: string;
29-
readonly osVersion: string;
30-
readonly hostArch: string;
31-
readonly platformName: string;
32-
readonly platformVersion: string;
33-
}
34-
35-
/** Per-event context: session attributes plus the current deployment URL. */
36-
export interface TelemetryContext extends SessionContext {
37-
readonly deploymentUrl: string;
38-
}
39-
40-
export interface TelemetryEvent {
41-
readonly eventId: string;
42-
readonly eventName: string;
43-
readonly timestamp: string;
44-
readonly eventSequence: number;
45-
46-
readonly context: TelemetryContext;
47-
48-
readonly properties: Readonly<Record<string, string>>;
49-
readonly measurements: Readonly<Record<string, number>>;
50-
51-
/** Shared by all events in a trace. Maps to OTel `trace_id`. */
52-
readonly traceId?: string;
53-
/** Set on phase children only. Equals the parent event's `eventId`. Maps to OTel `parent_span_id`. */
54-
readonly parentEventId?: string;
55-
56-
readonly error?: Readonly<{
57-
message: string;
58-
type?: string;
59-
code?: string;
60-
}>;
61-
}
62-
6330
/**
6431
* Sink for telemetry events. `write` is sync and must buffer in memory; I/O
6532
* happens in `flush`/`dispose`. The service filters by `minLevel`; sinks can

src/telemetry/export/files.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import { createReadStream } from "node:fs";
2+
import * as fs from "node:fs/promises";
3+
import * as path from "node:path";
4+
import * as readline from "node:readline";
5+
6+
import { toError } from "../../error/errorUtils";
7+
import {
8+
parseTelemetryEventLine,
9+
TelemetryFileParseError,
10+
} from "../wireFormat";
11+
12+
import {
13+
fileDateCanContainRangeEvent,
14+
isTimestampInRange,
15+
type TelemetryDateRange,
16+
} from "./range";
17+
18+
import type { TelemetryEvent } from "../event";
19+
20+
interface TelemetryLogFile {
21+
readonly path: string;
22+
readonly date: string;
23+
readonly session: string;
24+
readonly part: number;
25+
}
26+
27+
/**
28+
* Filename shape written by the sink:
29+
* `telemetry-YYYY-MM-DD-{session}[.{part}].jsonl`. We need the date to filter
30+
* and (session, part) to order files within a day.
31+
*/
32+
const TELEMETRY_FILE_PATTERN =
33+
/^telemetry-(\d{4}-\d{2}-\d{2})-([^.]+)(?:\.(\d+))?\.jsonl$/;
34+
35+
/** Log files that could contain events in `range`, in chronological order. */
36+
export async function listTelemetryFilesForRange(
37+
telemetryDir: string,
38+
range: TelemetryDateRange,
39+
): Promise<string[]> {
40+
let names: string[];
41+
try {
42+
names = await fs.readdir(telemetryDir);
43+
} catch (err) {
44+
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
45+
return [];
46+
}
47+
throw err;
48+
}
49+
50+
return names
51+
.map((name) => parseLogFilename(telemetryDir, name))
52+
.filter(
53+
(file): file is TelemetryLogFile =>
54+
file !== undefined && fileDateCanContainRangeEvent(file.date, range),
55+
)
56+
.sort(compareLogFiles)
57+
.map(({ path: filePath }) => filePath);
58+
}
59+
60+
/**
61+
* Yields events from `filePaths` in order, keeping only those whose timestamp
62+
* falls inside `range`. Reads line-by-line so memory stays flat on big files.
63+
*/
64+
export async function* streamTelemetryEvents(
65+
filePaths: readonly string[],
66+
range: TelemetryDateRange,
67+
): AsyncIterable<TelemetryEvent> {
68+
for (const filePath of filePaths) {
69+
const name = path.basename(filePath);
70+
const stream = createReadStream(filePath, { encoding: "utf8" });
71+
const lines = readline.createInterface({
72+
input: stream,
73+
crlfDelay: Infinity,
74+
});
75+
let lineNumber = 0;
76+
try {
77+
for await (const line of lines) {
78+
lineNumber += 1;
79+
if (line.trim() === "") {
80+
continue;
81+
}
82+
const event = parseTelemetryEventLine(line, name, lineNumber);
83+
if (isTimestampInRange(event.timestamp, range)) {
84+
yield event;
85+
}
86+
}
87+
} catch (err) {
88+
if (err instanceof TelemetryFileParseError) {
89+
throw err;
90+
}
91+
const at = lineNumber > 0 ? `:${lineNumber}` : "";
92+
throw new Error(
93+
`Failed to read telemetry file ${name}${at}: ${toError(err).message}`,
94+
{ cause: err },
95+
);
96+
} finally {
97+
try {
98+
lines.close();
99+
} finally {
100+
stream.destroy();
101+
}
102+
}
103+
}
104+
}
105+
106+
function parseLogFilename(
107+
dir: string,
108+
name: string,
109+
): TelemetryLogFile | undefined {
110+
const match = TELEMETRY_FILE_PATTERN.exec(name);
111+
if (!match) {
112+
return undefined;
113+
}
114+
return {
115+
path: path.join(dir, name),
116+
date: match[1],
117+
session: match[2],
118+
part: match[3] === undefined ? 0 : Number(match[3]),
119+
};
120+
}
121+
122+
function compareLogFiles(a: TelemetryLogFile, b: TelemetryLogFile): number {
123+
return (
124+
a.date.localeCompare(b.date) ||
125+
a.session.localeCompare(b.session) ||
126+
a.part - b.part
127+
);
128+
}

src/telemetry/export/range.ts

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import { z } from "zod";
2+
3+
const DAY_MS = 24 * 60 * 60 * 1000;
4+
const UtcDateSchema = z.iso.date();
5+
6+
/**
7+
* Half-open UTC window `[startMs, endMs)` used to filter telemetry. Either
8+
* bound may be `undefined` (e.g. "all time"). `label` is for the UI and
9+
* `filenamePart` is for export filenames.
10+
*/
11+
export interface TelemetryDateRange {
12+
readonly label: string;
13+
readonly filenamePart: string;
14+
readonly startMs?: number;
15+
readonly endMs?: number;
16+
}
17+
18+
export interface TelemetryRangePreset {
19+
readonly id: TelemetryRangePresetId;
20+
readonly label: string;
21+
readonly detail: string;
22+
}
23+
24+
export type TelemetryRangePresetId = keyof typeof PRESETS;
25+
26+
const PRESETS = {
27+
last24Hours: {
28+
label: "Last 24 hours",
29+
detail: "Export telemetry from the last day.",
30+
filenamePart: "last-24-hours",
31+
durationMs: DAY_MS,
32+
},
33+
last7Days: {
34+
label: "Last 7 days",
35+
detail: "Export telemetry from the last week.",
36+
filenamePart: "last-7-days",
37+
durationMs: 7 * DAY_MS,
38+
},
39+
last30Days: {
40+
label: "Last 30 days",
41+
detail: "Export telemetry from the last month.",
42+
filenamePart: "last-30-days",
43+
durationMs: 30 * DAY_MS,
44+
},
45+
allTime: {
46+
label: "All time",
47+
detail: "Export all stored telemetry.",
48+
filenamePart: "all-time",
49+
durationMs: undefined,
50+
},
51+
} as const;
52+
53+
/** Presets the export UI shows, in display order. */
54+
export const TELEMETRY_RANGE_PRESETS: readonly TelemetryRangePreset[] =
55+
Object.entries(PRESETS).map(([id, p]) => ({
56+
id: id as TelemetryRangePresetId,
57+
label: p.label,
58+
detail: p.detail,
59+
}));
60+
61+
/** Range from a preset id, anchored at `now`. */
62+
export function createPresetDateRange(
63+
id: TelemetryRangePresetId,
64+
now: Date = new Date(),
65+
): TelemetryDateRange {
66+
const { label, filenamePart, durationMs } = PRESETS[id];
67+
if (durationMs === undefined) {
68+
return { label, filenamePart };
69+
}
70+
const endMs = now.getTime();
71+
return { label, filenamePart, startMs: endMs - durationMs, endMs };
72+
}
73+
74+
/**
75+
* UTC range that includes the full 24h of `endDate`; `endMs` lands at
76+
* exclusive midnight of the day after.
77+
*/
78+
export function createCustomDateRange(
79+
startDate: string,
80+
endDate: string,
81+
): TelemetryDateRange {
82+
const startDateMs = parseUtcDate(startDate);
83+
const endDateMs = parseUtcDate(endDate);
84+
if (endDateMs < startDateMs) {
85+
throw new Error("End date must be on or after start date.");
86+
}
87+
return {
88+
label: `${startDate} to ${endDate}`,
89+
filenamePart: `${startDate}_to_${endDate}`,
90+
startMs: startDateMs,
91+
endMs: endDateMs + DAY_MS,
92+
};
93+
}
94+
95+
/** User-facing error string if `value` isn't a UTC date, else `undefined`. */
96+
export function validateUtcDateInput(value: string): string | undefined {
97+
if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) {
98+
return "Use YYYY-MM-DD.";
99+
}
100+
return UtcDateSchema.safeParse(value).success
101+
? undefined
102+
: "Enter a valid calendar date.";
103+
}
104+
105+
/** True if the ISO `timestamp` falls inside the range. */
106+
export function isTimestampInRange(
107+
timestamp: string,
108+
range: TelemetryDateRange,
109+
): boolean {
110+
const ms = Date.parse(timestamp);
111+
if (!Number.isFinite(ms)) {
112+
throw new Error(`Invalid telemetry timestamp '${timestamp}'.`);
113+
}
114+
return (
115+
(range.startMs === undefined || ms >= range.startMs) &&
116+
(range.endMs === undefined || ms < range.endMs)
117+
);
118+
}
119+
120+
/**
121+
* Coarse calendar-day filter: could a file dated `date` (YYYY-MM-DD) hold any
122+
* event in `range`? Lets us skip files without reading them.
123+
*/
124+
export function fileDateCanContainRangeEvent(
125+
date: string,
126+
range: TelemetryDateRange,
127+
): boolean {
128+
const startDate =
129+
range.startMs === undefined ? undefined : utcDateString(range.startMs);
130+
const endDate =
131+
range.endMs === undefined ? undefined : utcDateString(range.endMs - 1);
132+
return (
133+
(startDate === undefined || date >= startDate) &&
134+
(endDate === undefined || date <= endDate)
135+
);
136+
}
137+
138+
function parseUtcDate(value: string): number {
139+
const error = validateUtcDateInput(value);
140+
if (error !== undefined) {
141+
throw new Error(`Invalid date '${value}': ${error}`);
142+
}
143+
const [y, m, d] = value.split("-").map(Number);
144+
return Date.UTC(y, m - 1, d);
145+
}
146+
147+
function utcDateString(ms: number): string {
148+
return new Date(ms).toISOString().slice(0, 10);
149+
}

src/telemetry/sinks/localJsonlSink.ts

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
cleanupFiles,
1313
type FileCleanupCandidate,
1414
} from "../../util/fileCleanup";
15+
import { serializeTelemetryEventLine } from "../wireFormat";
1516

1617
import type { Logger } from "../../logging/logger";
1718
import type { TelemetryEvent, TelemetryLevel, TelemetrySink } from "../event";
@@ -88,7 +89,7 @@ export class LocalJsonlSink implements TelemetrySink, vscode.Disposable {
8889
}
8990
let line: string;
9091
try {
91-
line = serializeEvent(event);
92+
line = serializeTelemetryEventLine(event);
9293
} catch (err) {
9394
this.#logger.warn(`Telemetry sink '${this.name}' serialize failed`, err);
9495
return;
@@ -313,32 +314,3 @@ function toSessionSlug(sessionId: string): string {
313314
const cleaned = sessionId.replace(/[^a-zA-Z0-9]/g, "");
314315
return cleaned.slice(0, 8) || "anon0000";
315316
}
316-
317-
function serializeEvent(event: TelemetryEvent): string {
318-
return (
319-
JSON.stringify({
320-
event_id: event.eventId,
321-
event_name: event.eventName,
322-
timestamp: event.timestamp,
323-
event_sequence: event.eventSequence,
324-
context: {
325-
extension_version: event.context.extensionVersion,
326-
machine_id: event.context.machineId,
327-
session_id: event.context.sessionId,
328-
os_type: event.context.osType,
329-
os_version: event.context.osVersion,
330-
host_arch: event.context.hostArch,
331-
platform_name: event.context.platformName,
332-
platform_version: event.context.platformVersion,
333-
deployment_url: event.context.deploymentUrl,
334-
},
335-
properties: event.properties,
336-
measurements: event.measurements,
337-
...(event.traceId !== undefined && { trace_id: event.traceId }),
338-
...(event.parentEventId !== undefined && {
339-
parent_event_id: event.parentEventId,
340-
}),
341-
...(event.error !== undefined && { error: event.error }),
342-
}) + "\n"
343-
);
344-
}

0 commit comments

Comments
 (0)