Skip to content

Commit 0c1cfa8

Browse files
committed
Refactor OpenTelemetry integration and cleanup
- Removed deprecated OpenTelemetry SDK dependencies from package.json and pnpm-lock.yaml. - Updated instrumentation registration to utilize the new @vercel/otel package for improved performance and compatibility. - Enhanced error handling in backend URL configuration to ensure valid JSON parsing. - Cleaned up unused shutdownOTel function and related code in the Prisma client. These changes streamline the OpenTelemetry setup and improve the overall resilience of the backend API.
1 parent cd6e7e4 commit 0c1cfa8

7 files changed

Lines changed: 35 additions & 407 deletions

File tree

.github/workflows/e2e-fallback-tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ jobs:
9191
- name: Start stack-backend on fallback port (8110)
9292
uses: JarvusInnovations/background-action@v1.0.7
9393
with:
94-
run: pnpm -C apps/backend run with-env:test next start --port 8110 --log-order=stream &
94+
run: pnpm -C apps/backend run with-env:test next start --port 8110 &
9595
wait-on: |
9696
http://localhost:8110
9797
tail: true

apps/backend/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@
6969
"@opentelemetry/instrumentation": "^0.53.0",
7070
"@opentelemetry/resources": "^1.26.0",
7171
"@opentelemetry/sdk-logs": "^0.53.0",
72-
"@opentelemetry/sdk-node": "^0.214.0",
7372
"@opentelemetry/sdk-trace-base": "^1.26.0",
7473
"@opentelemetry/sdk-trace-node": "^1.26.0",
7574
"@opentelemetry/semantic-conventions": "^1.27.0",

apps/backend/src/app/api/latest/internal/backend-urls/route.tsx

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,17 @@ let cachedEntries: ReturnType<typeof parseAndValidateConfig> | undefined;
4747
function getCachedConfig() {
4848
if (!cachedEntries) {
4949
const rawEnv = getEnvVariable("STACK_BACKEND_URLS_CONFIG", "");
50-
cachedEntries = rawEnv
51-
? parseAndValidateConfig(JSON.parse(rawEnv))
52-
: [{ probability: 1, urls: getDefaultApiUrls(getEnvVariable("NEXT_PUBLIC_STACK_API_URL")) }];
50+
if (rawEnv) {
51+
let parsed;
52+
try {
53+
parsed = JSON.parse(rawEnv);
54+
} catch (e) {
55+
throw new StackAssertionError(`STACK_BACKEND_URLS_CONFIG is not valid JSON: ${e}`);
56+
}
57+
cachedEntries = parseAndValidateConfig(parsed);
58+
} else {
59+
cachedEntries = [{ probability: 1, urls: getDefaultApiUrls(getEnvVariable("NEXT_PUBLIC_STACK_API_URL")) }];
60+
}
5361
}
5462
return cachedEntries;
5563
}

apps/backend/src/instrumentation.ts

Lines changed: 17 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -5,70 +5,31 @@ import * as Sentry from "@sentry/nextjs";
55
import { getEnvVariable, getNextRuntime, getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
66
import { sentryBaseConfig } from "@stackframe/stack-shared/dist/utils/sentry";
77
import { nicify } from "@stackframe/stack-shared/dist/utils/strings";
8+
import { registerOTel } from '@vercel/otel';
89
import { initPerfStats } from "./lib/dev-perf-stats";
910
import "./polyfills";
1011

1112
// this is a hack for making prisma instrumentation work
1213
// somehow prisma instrumentation accesses global and it makes edge instrumentation complain
1314
globalThis.global = globalThis;
1415

15-
function getOTelInstrumentations() {
16-
return [
17-
new PrismaInstrumentation(),
18-
...getNextRuntime() === "nodejs" ? getNodeAutoInstrumentations({
19-
'@opentelemetry/instrumentation-http': {
20-
enabled: false,
21-
},
22-
}) : [],
23-
];
24-
}
25-
26-
function getDevTraceExporter() {
27-
if (getNodeEnvironment() === "development" && getNextRuntime() === "nodejs") {
28-
return new OTLPTraceExporter({
29-
url: `http://localhost:${getEnvVariable("NEXT_PUBLIC_STACK_PORT_PREFIX", "81")}31/v1/traces`,
30-
});
31-
}
32-
return undefined;
33-
}
34-
35-
let otelSdk: { shutdown(): Promise<void> } | undefined;
36-
37-
export async function shutdownOTel() {
38-
await otelSdk?.shutdown();
39-
}
40-
41-
async function registerOTelProvider() {
42-
const instrumentations = getOTelInstrumentations();
43-
const devExporter = getDevTraceExporter();
44-
45-
if (getEnvVariable("VERCEL", "")) {
46-
// On Vercel: use @vercel/otel which wraps the standard OTEL SDK with Vercel-specific defaults
47-
const { registerOTel } = await import("@vercel/otel");
48-
registerOTel({
49-
serviceName: 'stack-backend',
50-
instrumentations,
51-
...devExporter ? { traceExporter: devExporter } : {},
52-
});
53-
} else if (getNextRuntime() === "nodejs") {
54-
// On Cloud Run / self-hosted: use standard @opentelemetry/sdk-node (Node.js only)
55-
const { NodeSDK } = await import("@opentelemetry/sdk-node");
56-
const otelEndpoint = getEnvVariable("OTEL_EXPORTER_OTLP_ENDPOINT", "");
57-
const exporter = devExporter ?? (otelEndpoint ? new OTLPTraceExporter({ url: otelEndpoint }) : undefined);
58-
const sdk = new NodeSDK({
59-
serviceName: 'stack-backend',
60-
instrumentations,
61-
// Cast needed: @opentelemetry/exporter-trace-otlp-http may be a different major than sdk-node,
62-
// but the runtime interface is compatible
63-
...(exporter ? { traceExporter: exporter as any } : {}),
64-
});
65-
sdk.start();
66-
otelSdk = sdk;
67-
}
68-
}
69-
7016
export async function register() {
71-
await registerOTelProvider();
17+
registerOTel({
18+
serviceName: 'stack-backend',
19+
instrumentations: [
20+
new PrismaInstrumentation(),
21+
...getNextRuntime() === "nodejs" ? getNodeAutoInstrumentations({
22+
'@opentelemetry/instrumentation-http': {
23+
enabled: false,
24+
},
25+
}) : [],
26+
],
27+
...getNodeEnvironment() === "development" && getNextRuntime() === "nodejs" ? {
28+
traceExporter: new OTLPTraceExporter({
29+
url: `http://localhost:${getEnvVariable("NEXT_PUBLIC_STACK_PORT_PREFIX", "81")}31/v1/traces`,
30+
}),
31+
} : {},
32+
});
7233

7334
if (getNextRuntime() === "nodejs") {
7435
(globalThis as any).process.title = `stack-backend:${getEnvVariable("NEXT_PUBLIC_STACK_PORT_PREFIX", "81")} (node/nextjs)`;

apps/backend/src/prisma-client.tsx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import net from "node:net";
1818
import { Pool } from "pg";
1919
import { isPromise } from "util/types";
2020
import { runMigrationNeeded } from "./auto-migrations";
21-
import { shutdownOTel } from "./instrumentation";
2221
import { registerPgPool } from "./lib/dev-perf-stats";
2322
import { Tenancy } from "./lib/tenancies";
2423
import { ensurePolyfilled } from "./polyfills";
@@ -116,7 +115,6 @@ if (!getEnvVariable("VERCEL", "") && !globalVar.__stack_prisma_sigterm_registere
116115
try {
117116
console.log("[SIGTERM] Draining background tasks and database connections...");
118117
await drainInFlightPromises(8000);
119-
await shutdownOTel();
120118
for (const [, entry] of postgresPrismaClientsStore) {
121119
await entry.client.$disconnect();
122120
}
@@ -126,6 +124,7 @@ if (!getEnvVariable("VERCEL", "") && !globalVar.__stack_prisma_sigterm_registere
126124
console.log("[SIGTERM] Completed draining background tasks and database connections.");
127125
} finally {
128126
clearTimeout(keepAlive);
127+
process.exit(0);
129128
}
130129
});
131130
});

packages/stack-shared/src/interface/client-interface.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ export class StackClientInterface {
181181
} catch (probeError) {
182182
if (probeError instanceof KnownError) throw probeError;
183183
// Still down — reduce probe frequency
184-
this._currentProbeRate *= 0.5;
184+
this._currentProbeRate = Math.max(this._currentProbeRate * 0.5, 0.01);
185185
}
186186
}
187187
return await cb(apiUrls[activeIndex], { maxAttempts: 1, skipDiagnostics: false });

0 commit comments

Comments
 (0)