diff --git a/.server-changes/trace-page-payload-diet.md b/.server-changes/trace-page-payload-diet.md new file mode 100644 index 00000000000..9f84e4b22db --- /dev/null +++ b/.server-changes/trace-page-payload-diet.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary and detailed summary span limits on both event store paths. diff --git a/apps/webapp/app/components/primitives/TreeView/TreeView.tsx b/apps/webapp/app/components/primitives/TreeView/TreeView.tsx index 5f720c24fe9..39a236aef6f 100644 --- a/apps/webapp/app/components/primitives/TreeView/TreeView.tsx +++ b/apps/webapp/app/components/primitives/TreeView/TreeView.tsx @@ -1,6 +1,14 @@ import { VirtualItem, Virtualizer, useVirtualizer } from "@tanstack/react-virtual"; import { motion } from "framer-motion"; -import { MutableRefObject, RefObject, useCallback, useEffect, useReducer, useRef } from "react"; +import { + MutableRefObject, + RefObject, + useCallback, + useEffect, + useMemo, + useReducer, + useRef, +} from "react"; import { cn } from "~/utils/cn"; import { NodeState, NodesState, reducer } from "./reducer"; import { concreteStateFromInput, selectedIdFromState } from "./utils"; @@ -47,6 +55,16 @@ export function TreeView({ const virtualItems = virtualizer.getVirtualItems(); + // id -> node lookup so each virtual row resolves in O(1) instead of + // scanning the whole tree per row. + const nodesById = useMemo(() => { + const map = new Map>(); + for (const node of tree) { + map.set(node.id, node); + } + return map; + }, [tree]); + const scrollCallback = useCallback( (event: Event) => { if (!onScroll) return; @@ -99,7 +117,7 @@ export function TreeView({ }} > {virtualItems.map((virtualItem) => { - const node = tree.find((node) => node.id === virtualItem.key); + const node = nodesById.get(virtualItem.key as string); if (!node) return null; const state = nodes[node.id]; if (!state) return null; @@ -197,6 +215,16 @@ export function useTree({ concreteStateFromInput({ tree, selectedId, collapsedIds, filter }) ); + // id -> index lookup so getNodeProps resolves in O(1) instead of scanning + // the whole tree per rendered row. + const treeIndexById = useMemo(() => { + const map = new Map(); + tree.forEach((node, index) => { + map.set(node.id, index); + }); + return map; + }, [tree]); + //sync external selectedId prop into internal state useEffect(() => { const internalSelectedId = selectedIdFromState(state.nodes); @@ -497,7 +525,7 @@ export function useTree({ (id: string) => { const node = state.nodes[id]; if (!node) return {}; - const treeItemIndex = tree.findIndex((node) => node.id === id); + const treeItemIndex = treeIndexById.get(id) ?? -1; const treeItem = tree[treeItemIndex]; return { "aria-expanded": node.expanded, @@ -506,7 +534,7 @@ export function useTree({ tabIndex: node.selected ? -1 : undefined, }; }, - [state] + [state, treeIndexById] ); return { diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index f5cd30dd27b..79634926835 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -726,6 +726,9 @@ const EnvironmentSchema = z MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000), MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000), MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000), + // Emergency circuit breaker: when set, clamps the trace summary and detailed + // summary span limits on both event store paths to this value. Unset = disabled. + TRACE_VIEW_EMERGENCY_SPAN_CAP: z.coerce.number().int().positive().optional(), TASK_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().default(524_288), // 512KB BATCH_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().optional(), // Defaults to TASK_PAYLOAD_OFFLOAD_THRESHOLD if not set TASK_PAYLOAD_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728), // 3MB diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index d965f74a77d..365206ee75d 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -255,12 +255,16 @@ export class RunPresenter { linkedRunIdBySpanId[n.id] = n.runId; } + // Raw span events are only needed server-side (to derive timelineEvents); + // keep them out of the serialized loader payload. + const { events: spanEvents, ...data } = n.data; + return { ...n, data: { - ...n.data, + ...data, timelineEvents: createTimelineSpanEventsFromSpanEvents( - n.data.events, + spanEvents, user?.admin ?? false, treeRootStartTimeMs ), diff --git a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts index 3a01f8f4397..e0e88e4dd02 100644 --- a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts @@ -1,5 +1,6 @@ import { type PrismaClient, prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; +import { requireUserId } from "~/services/session.server"; import { singleton } from "~/utils/singleton"; import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/sse"; import { throttle } from "~/utils/throttle"; @@ -30,9 +31,23 @@ export class RunStreamPresenter { throw new Response("Missing runParam", { status: 400 }); } + const userId = await requireUserId(context.request); + + // Scope the lookup to organizations the requesting user is a member + // of, matching RunPresenter's run lookup. Unauthorized and missing + // runs are indistinguishable (both 404). const run = await prismaClient.taskRun.findFirst({ where: { friendlyId: runFriendlyId, + project: { + organization: { + members: { + some: { + userId, + }, + }, + }, + }, }, select: { traceId: true, @@ -51,7 +66,15 @@ export class RunStreamPresenter { if (buffer) { try { const entry = await buffer.getEntry(runFriendlyId); - if (entry) { + // Same membership scoping as the PG lookup above — the buffer + // entry carries the owning org's id. + const isMember = entry + ? (await prismaClient.orgMember.findFirst({ + where: { organizationId: entry.orgId, userId }, + select: { id: true }, + })) !== null + : false; + if (entry && isMember) { // Go through the webapp wrapper so this read-side module // shares a single deserialisation path with readFallback — // see the contract comment in syntheticRedirectInfo.server.ts. diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 794938e9807..7c20dd3a2a5 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -2,6 +2,7 @@ import { ClickHouse } from "@internal/clickhouse"; import { createHash } from "crypto"; import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; import { env } from "~/env.server"; +import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server"; import { singleton } from "~/utils/singleton"; import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server"; import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types"; @@ -533,9 +534,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse clickhouse, batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumTraceSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT + ), + maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT + ), maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", @@ -557,9 +561,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse clickhouse: clickhouse, batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumTraceSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT + ), + maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap( + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT + ), maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", diff --git a/apps/webapp/app/utils/timelineSpanEvents.ts b/apps/webapp/app/utils/timelineSpanEvents.ts index 9e09ba0a795..596f0399a6f 100644 --- a/apps/webapp/app/utils/timelineSpanEvents.ts +++ b/apps/webapp/app/utils/timelineSpanEvents.ts @@ -117,7 +117,6 @@ export function createTimelineSpanEventsFromSpanEvents( offset, timestamp, duration, - properties: spanEvent.properties, helpText: getHelpTextForEvent(name), markerVariant, lineVariant: "light" as const, diff --git a/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts b/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts new file mode 100644 index 00000000000..6203e615816 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts @@ -0,0 +1,8 @@ +import { env } from "~/env.server"; + +// Emergency circuit breaker for trace views: when TRACE_VIEW_EMERGENCY_SPAN_CAP +// is set, clamp a trace summary span limit to it. Unset = no clamping. +export function clampToEmergencySpanCap(limit: number): number { + const cap = env.TRACE_VIEW_EMERGENCY_SPAN_CAP; + return cap === undefined ? limit : Math.min(limit, cap); +} diff --git a/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts b/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts index ee0d518e2e7..f0660d8ef1e 100644 --- a/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts +++ b/apps/webapp/app/v3/mollifier/syntheticTrace.server.ts @@ -44,11 +44,14 @@ export function buildSyntheticTraceForBufferedRun(run: SyntheticRun) { const offset = millisecondsToNanoseconds( n.data.startTime.getTime() - treeRootStartTimeMs ); + // Mirror RunPresenter: raw span events stay server-side, only + // timelineEvents ship to the client. + const { events: spanEvents, ...data } = n.data; return { ...n, data: { - ...n.data, - timelineEvents: createTimelineSpanEventsFromSpanEvents(n.data.events, false, treeRootStartTimeMs), + ...data, + timelineEvents: createTimelineSpanEventsFromSpanEvents(spanEvents, false, treeRootStartTimeMs), duration: n.data.isPartial ? null : n.data.duration, offset, isRoot: n.id === spanId, diff --git a/apps/webapp/app/v3/taskEventStore.server.ts b/apps/webapp/app/v3/taskEventStore.server.ts index ff2fa741c67..ed580b40d0e 100644 --- a/apps/webapp/app/v3/taskEventStore.server.ts +++ b/apps/webapp/app/v3/taskEventStore.server.ts @@ -2,6 +2,7 @@ import { Prisma, TaskEvent } from "@trigger.dev/database"; import type { PrismaClient, PrismaReplicaClient } from "~/db.server"; import { env } from "~/env.server"; +import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server"; export type CommonTaskEvent = Omit; export type TraceEvent = Pick< @@ -192,7 +193,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)} `; } else { return await this.readReplica.$queryRaw` @@ -220,7 +221,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)} `; } } @@ -270,7 +271,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)} `; } else { return await this.readReplica.$queryRaw` @@ -299,7 +300,7 @@ export class TaskEventStore { : Prisma.empty } ORDER BY "startTime" ASC - LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT} + LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)} `; } } diff --git a/apps/webapp/test/mollifierSyntheticTrace.test.ts b/apps/webapp/test/mollifierSyntheticTrace.test.ts index ac7425a8fe9..e711eb0ffe2 100644 --- a/apps/webapp/test/mollifierSyntheticTrace.test.ts +++ b/apps/webapp/test/mollifierSyntheticTrace.test.ts @@ -141,9 +141,10 @@ describe("buildSyntheticTraceForBufferedRun", () => { expect(trace.queuedDuration).toBeUndefined(); }); - it("synthesises an empty events list (no timeline events from the buffer)", () => { + it("synthesises an empty timeline and keeps raw span events out of the payload", () => { const trace = buildSyntheticTraceForBufferedRun(makeSyntheticRun()); - expect(trace.events[0].data.events).toEqual([]); + // Raw span events stay server-side (mirrors RunPresenter's payload diet). + expect(trace.events[0].data).not.toHaveProperty("events"); expect(trace.events[0].data.timelineEvents).toEqual([]); }); }); diff --git a/apps/webapp/test/timelineSpanEvents.test.ts b/apps/webapp/test/timelineSpanEvents.test.ts index 5d31a8a1f60..11b62cf5afd 100644 --- a/apps/webapp/test/timelineSpanEvents.test.ts +++ b/apps/webapp/test/timelineSpanEvents.test.ts @@ -181,6 +181,15 @@ describe("createTimelineSpanEventsFromSpanEvents", () => { ); }); + test("should not attach raw span event properties to timeline events", () => { + const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true); + + expect(result.length).toBeGreaterThan(0); + for (const event of result) { + expect(event).not.toHaveProperty("properties"); + } + }); + test("should preserve duration from span events", () => { const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true);