Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .server-changes/trace-page-payload-diet.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
area: webapp
type: improvement
---

Shrinks the run trace page loader payload by keeping raw span events server-side and makes large trace trees render more efficiently. Also adds an optional `TRACE_VIEW_EMERGENCY_SPAN_CAP` env var that clamps trace summary span limits on both event store paths.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
36 changes: 32 additions & 4 deletions apps/webapp/app/components/primitives/TreeView/TreeView.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import { VirtualItem, Virtualizer, useVirtualizer } from "@tanstack/react-virtual";
import { motion } from "framer-motion";
import { MutableRefObject, RefObject, useCallback, useEffect, useReducer, useRef } from "react";
import {
MutableRefObject,
RefObject,
useCallback,
useEffect,
useMemo,
useReducer,
useRef,
} from "react";
import { cn } from "~/utils/cn";
import { NodeState, NodesState, reducer } from "./reducer";
import { concreteStateFromInput, selectedIdFromState } from "./utils";
Expand Down Expand Up @@ -47,6 +55,16 @@ export function TreeView<TData>({

const virtualItems = virtualizer.getVirtualItems();

// id -> node lookup so each virtual row resolves in O(1) instead of
// scanning the whole tree per row.
const nodesById = useMemo(() => {
const map = new Map<string, FlatTreeItem<TData>>();
for (const node of tree) {
map.set(node.id, node);
}
return map;
}, [tree]);

const scrollCallback = useCallback(
(event: Event) => {
if (!onScroll) return;
Expand Down Expand Up @@ -99,7 +117,7 @@ export function TreeView<TData>({
}}
>
{virtualItems.map((virtualItem) => {
const node = tree.find((node) => node.id === virtualItem.key);
const node = nodesById.get(virtualItem.key as string);
if (!node) return null;
const state = nodes[node.id];
if (!state) return null;
Expand Down Expand Up @@ -197,6 +215,16 @@ export function useTree<TData, TFilterValue>({
concreteStateFromInput({ tree, selectedId, collapsedIds, filter })
);

// id -> index lookup so getNodeProps resolves in O(1) instead of scanning
// the whole tree per rendered row.
const treeIndexById = useMemo(() => {
const map = new Map<string, number>();
tree.forEach((node, index) => {
map.set(node.id, index);
});
return map;
}, [tree]);

//sync external selectedId prop into internal state
useEffect(() => {
const internalSelectedId = selectedIdFromState(state.nodes);
Expand Down Expand Up @@ -497,7 +525,7 @@ export function useTree<TData, TFilterValue>({
(id: string) => {
const node = state.nodes[id];
if (!node) return {};
const treeItemIndex = tree.findIndex((node) => node.id === id);
const treeItemIndex = treeIndexById.get(id) ?? -1;
const treeItem = tree[treeItemIndex];
return {
"aria-expanded": node.expanded,
Expand All @@ -506,7 +534,7 @@ export function useTree<TData, TFilterValue>({
tabIndex: node.selected ? -1 : undefined,
};
},
[state]
[state, treeIndexById]
);

return {
Expand Down
3 changes: 3 additions & 0 deletions apps/webapp/app/env.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ const EnvironmentSchema = z
MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000),
MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000),
MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000),
// Emergency circuit breaker: when set, clamps the trace summary and detailed
// summary span limits on both event store paths to this value. Unset = disabled.
TRACE_VIEW_EMERGENCY_SPAN_CAP: z.coerce.number().int().positive().optional(),
TASK_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().default(524_288), // 512KB
BATCH_PAYLOAD_OFFLOAD_THRESHOLD: z.coerce.number().int().optional(), // Defaults to TASK_PAYLOAD_OFFLOAD_THRESHOLD if not set
TASK_PAYLOAD_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728), // 3MB
Expand Down
8 changes: 6 additions & 2 deletions apps/webapp/app/presenters/v3/RunPresenter.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,16 @@ export class RunPresenter {
linkedRunIdBySpanId[n.id] = n.runId;
}

// Raw span events are only needed server-side (to derive timelineEvents);
// keep them out of the serialized loader payload.
const { events: spanEvents, ...data } = n.data;

return {
...n,
data: {
...n.data,
...data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(
n.data.events,
spanEvents,
user?.admin ?? false,
treeRootStartTimeMs
),
Expand Down
25 changes: 24 additions & 1 deletion apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { type PrismaClient, prisma } from "~/db.server";
import { logger } from "~/services/logger.server";
import { requireUserId } from "~/services/session.server";
import { singleton } from "~/utils/singleton";
import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/sse";
import { throttle } from "~/utils/throttle";
Expand Down Expand Up @@ -30,9 +31,23 @@ export class RunStreamPresenter {
throw new Response("Missing runParam", { status: 400 });
}

const userId = await requireUserId(context.request);

// Scope the lookup to organizations the requesting user is a member
// of, matching RunPresenter's run lookup. Unauthorized and missing
// runs are indistinguishable (both 404).
const run = await prismaClient.taskRun.findFirst({
where: {
friendlyId: runFriendlyId,
project: {
organization: {
members: {
some: {
userId,
},
},
},
},
},
select: {
traceId: true,
Expand All @@ -51,7 +66,15 @@ export class RunStreamPresenter {
if (buffer) {
try {
const entry = await buffer.getEntry(runFriendlyId);
if (entry) {
// Same membership scoping as the PG lookup above — the buffer
// entry carries the owning org's id.
const isMember = entry
? (await prismaClient.orgMember.findFirst({
where: { organizationId: entry.orgId, userId },
select: { id: true },
})) !== null
: false;
if (entry && isMember) {
// Go through the webapp wrapper so this read-side module
// shares a single deserialisation path with readFallback —
// see the contract comment in syntheticRedirectInfo.server.ts.
Expand Down
19 changes: 13 additions & 6 deletions apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { ClickHouse } from "@internal/clickhouse";
import { createHash } from "crypto";
import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server";
import { env } from "~/env.server";
import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server";
import { singleton } from "~/utils/singleton";
import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server";
import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types";
Expand Down Expand Up @@ -533,9 +534,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse
clickhouse,
batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE,
flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS,
maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT,
maximumTraceDetailedSummaryViewCount:
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT,
maximumTraceSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT
),
maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT
),
maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING,
insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY,
waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1",
Expand All @@ -557,9 +561,12 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse
clickhouse: clickhouse,
batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE,
flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS,
maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT,
maximumTraceDetailedSummaryViewCount:
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT,
maximumTraceSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT
),
maximumTraceDetailedSummaryViewCount: clampToEmergencySpanCap(
env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT
),
maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING,
insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY,
waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1",
Expand Down
1 change: 0 additions & 1 deletion apps/webapp/app/utils/timelineSpanEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ export function createTimelineSpanEventsFromSpanEvents(
offset,
timestamp,
duration,
properties: spanEvent.properties,
helpText: getHelpTextForEvent(name),
markerVariant,
lineVariant: "light" as const,
Expand Down
8 changes: 8 additions & 0 deletions apps/webapp/app/v3/eventRepository/emergencySpanCap.server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { env } from "~/env.server";

// Emergency circuit breaker for trace views: when TRACE_VIEW_EMERGENCY_SPAN_CAP
// is set, clamp a trace summary span limit to it. Unset = no clamping.
export function clampToEmergencySpanCap(limit: number): number {
const cap = env.TRACE_VIEW_EMERGENCY_SPAN_CAP;
return cap === undefined ? limit : Math.min(limit, cap);
}
7 changes: 5 additions & 2 deletions apps/webapp/app/v3/mollifier/syntheticTrace.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,14 @@ export function buildSyntheticTraceForBufferedRun(run: SyntheticRun) {
const offset = millisecondsToNanoseconds(
n.data.startTime.getTime() - treeRootStartTimeMs
);
// Mirror RunPresenter: raw span events stay server-side, only
// timelineEvents ship to the client.
const { events: spanEvents, ...data } = n.data;
return {
...n,
data: {
...n.data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(n.data.events, false, treeRootStartTimeMs),
...data,
timelineEvents: createTimelineSpanEventsFromSpanEvents(spanEvents, false, treeRootStartTimeMs),
duration: n.data.isPartial ? null : n.data.duration,
offset,
isRoot: n.id === spanId,
Expand Down
9 changes: 5 additions & 4 deletions apps/webapp/app/v3/taskEventStore.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import { Prisma, TaskEvent } from "@trigger.dev/database";
import type { PrismaClient, PrismaReplicaClient } from "~/db.server";
import { env } from "~/env.server";
import { clampToEmergencySpanCap } from "~/v3/eventRepository/emergencySpanCap.server";

export type CommonTaskEvent = Omit<TaskEvent, "id">;
export type TraceEvent = Pick<
Expand Down Expand Up @@ -192,7 +193,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)}
`;
} else {
return await this.readReplica.$queryRaw<TraceEvent[]>`
Expand Down Expand Up @@ -220,7 +221,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_SUMMARY_VIEW_COUNT)}
`;
}
}
Expand Down Expand Up @@ -270,7 +271,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)}
`;
} else {
return await this.readReplica.$queryRaw<DetailedTraceEvent[]>`
Expand Down Expand Up @@ -299,7 +300,7 @@ export class TaskEventStore {
: Prisma.empty
}
ORDER BY "startTime" ASC
LIMIT ${env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT}
LIMIT ${clampToEmergencySpanCap(env.MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT)}
`;
}
}
Expand Down
5 changes: 3 additions & 2 deletions apps/webapp/test/mollifierSyntheticTrace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,10 @@ describe("buildSyntheticTraceForBufferedRun", () => {
expect(trace.queuedDuration).toBeUndefined();
});

it("synthesises an empty events list (no timeline events from the buffer)", () => {
it("synthesises an empty timeline and keeps raw span events out of the payload", () => {
const trace = buildSyntheticTraceForBufferedRun(makeSyntheticRun());
expect(trace.events[0].data.events).toEqual([]);
// Raw span events stay server-side (mirrors RunPresenter's payload diet).
expect(trace.events[0].data).not.toHaveProperty("events");
expect(trace.events[0].data.timelineEvents).toEqual([]);
});
});
9 changes: 9 additions & 0 deletions apps/webapp/test/timelineSpanEvents.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ describe("createTimelineSpanEventsFromSpanEvents", () => {
);
});

test("should not attach raw span event properties to timeline events", () => {
const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true);

expect(result.length).toBeGreaterThan(0);
for (const event of result) {
expect(event).not.toHaveProperty("properties");
}
});

test("should preserve duration from span events", () => {
const result = createTimelineSpanEventsFromSpanEvents(sampleSpanEvents, true);

Expand Down