Skip to content

Commit f9e1102

Browse files
authored
fix(spanner): isolate OpenTelemetry background timers in ROOT_CONTEXT to prevent context and memory leaks (#8399)
This PR isolates background maintenance timers in SessionPool and MultiplexedSession within OpenTelemetry's ROOT_CONTEXT. Problem OpenTelemetry uses `AsyncLocalStorage` to propagate context. In Node.js, when a timer like `setInterval` is scheduled, the currently active context is captured and "leaked" into the timer's execution scope. If these background timers (which handle session pings, evictions, and multiplexed session maintenance) are initialized or restarted during a user request, they capture that request's context. This leads to: Memory Leaks: The request context and all associated metadata remain in memory indefinitely. Incorrect Tracing: Background logs or spans may be incorrectly attributed to the parent request that happened to trigger the timer's scheduling. Solution By wrapping the `setInterval` calls themselves in context.with(ROOT_CONTEXT, ...), we ensure that the timers are scheduled within the root context, effectively isolating them from any transient request context .
1 parent 7e6c7be commit f9e1102

6 files changed

Lines changed: 247 additions & 15 deletions

File tree

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/*!
2+
* Copyright 2026 Google LLC. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import * as assert from 'assert';
18+
import {before, after, beforeEach, afterEach, describe, it} from 'mocha';
19+
import * as sinon from 'sinon';
20+
import {context, trace} from '@opentelemetry/api';
21+
import {NodeTracerProvider} from '@opentelemetry/sdk-trace-node';
22+
import {AlwaysOnSampler} from '@opentelemetry/sdk-trace-base';
23+
import {Database} from '../src/database';
24+
import {SessionPool} from '../src/session-pool';
25+
import {MultiplexedSession} from '../src/multiplexed-session';
26+
import {MetricsTracerFactory} from '../src/metrics/metrics-tracer-factory';
27+
import {
28+
ensureInitialContextManagerSet,
29+
_resetTracingEnabledForTest,
30+
} from '../src/instrument';
31+
32+
describe('OpenTelemetry Context Isolation Tests', () => {
33+
const sandbox = sinon.createSandbox();
34+
let provider: NodeTracerProvider;
35+
36+
const MOCK_DATABASE = {
37+
batchCreateSessions: sandbox.stub().resolves([[]]),
38+
databaseRole: 'parent_role',
39+
_observabilityOptions: {},
40+
} as unknown as Database;
41+
42+
before(() => {
43+
_resetTracingEnabledForTest();
44+
ensureInitialContextManagerSet();
45+
46+
provider = new NodeTracerProvider({
47+
sampler: new AlwaysOnSampler(),
48+
});
49+
provider.register();
50+
_resetTracingEnabledForTest();
51+
});
52+
53+
after(async () => {
54+
await provider.shutdown();
55+
});
56+
57+
afterEach(() => {
58+
sandbox.restore();
59+
});
60+
61+
describe('SessionPool background housekeeping timers', () => {
62+
let sessionPool: SessionPool;
63+
64+
beforeEach(() => {
65+
sessionPool = new SessionPool(MOCK_DATABASE, {
66+
min: 0,
67+
max: 10,
68+
idlesAfter: 10,
69+
keepAlive: 30,
70+
});
71+
});
72+
73+
afterEach(() => {
74+
sessionPool._stopHouseKeeping();
75+
});
76+
77+
it('should schedule evict and keep-alive setInterval calls in ROOT_CONTEXT', () => {
78+
const tracer = trace.getTracer('test');
79+
80+
const setIntervalStub = sandbox
81+
.stub(global, 'setInterval')
82+
.callsFake(() => {
83+
const activeSpan = trace.getSpan(context.active());
84+
85+
// Assert that the active context is ROOT_CONTEXT (i.e., no active span)
86+
assert.strictEqual(
87+
activeSpan,
88+
undefined,
89+
'setInterval scheduling must be isolated within ROOT_CONTEXT and not carry any active request span',
90+
);
91+
return {
92+
unref: () => {},
93+
} as unknown as NodeJS.Timeout;
94+
});
95+
96+
// Start an active request context
97+
tracer.startActiveSpan('request-span', span => {
98+
try {
99+
// Start housekeeping under the request context
100+
sessionPool._startHouseKeeping();
101+
} finally {
102+
span.end();
103+
}
104+
});
105+
106+
// Verify that both evict and ping intervals were scheduled
107+
assert.strictEqual(setIntervalStub.callCount, 2);
108+
});
109+
});
110+
111+
describe('MultiplexedSession background maintenance timer', () => {
112+
let multiplexedSession: MultiplexedSession;
113+
114+
beforeEach(() => {
115+
multiplexedSession = new MultiplexedSession(MOCK_DATABASE);
116+
});
117+
118+
afterEach(() => {
119+
if (multiplexedSession._refreshHandle) {
120+
clearInterval(multiplexedSession._refreshHandle);
121+
}
122+
});
123+
124+
it('should schedule multiplexed session maintenance setInterval in ROOT_CONTEXT', () => {
125+
const tracer = trace.getTracer('test');
126+
127+
const setIntervalStub = sandbox
128+
.stub(global, 'setInterval')
129+
.callsFake(() => {
130+
const activeSpan = trace.getSpan(context.active());
131+
132+
// Assert that the active context is ROOT_CONTEXT (i.e., no active span)
133+
assert.strictEqual(
134+
activeSpan,
135+
undefined,
136+
'setInterval scheduling must be isolated within ROOT_CONTEXT and not carry any active request span',
137+
);
138+
return {
139+
unref: () => {},
140+
} as unknown as NodeJS.Timeout;
141+
});
142+
143+
// Start an active request context
144+
tracer.startActiveSpan('request-span', span => {
145+
try {
146+
// Start maintenance under the request context
147+
multiplexedSession._maintain();
148+
} finally {
149+
span.end();
150+
}
151+
});
152+
153+
// Verify that the refresh interval was scheduled
154+
assert.strictEqual(setIntervalStub.callCount, 1);
155+
});
156+
});
157+
158+
describe('MetricsTracerFactory background cleanup timer', () => {
159+
beforeEach(async () => {
160+
MetricsTracerFactory.enabled = true;
161+
await MetricsTracerFactory.resetInstance();
162+
});
163+
164+
afterEach(async () => {
165+
await MetricsTracerFactory.resetInstance();
166+
});
167+
168+
it('should schedule MetricsTracerFactory cleanup setInterval in ROOT_CONTEXT', () => {
169+
const tracer = trace.getTracer('test');
170+
171+
const setIntervalStub = sandbox
172+
.stub(global, 'setInterval')
173+
.callsFake(() => {
174+
const activeSpan = trace.getSpan(context.active());
175+
176+
// Assert that the active context is ROOT_CONTEXT (i.e., no active span)
177+
assert.strictEqual(
178+
activeSpan,
179+
undefined,
180+
'setInterval scheduling must be isolated within ROOT_CONTEXT and not carry any active request span',
181+
);
182+
return {
183+
unref: () => {},
184+
} as unknown as NodeJS.Timeout;
185+
});
186+
187+
// Start an active request context
188+
tracer.startActiveSpan('request-span', span => {
189+
try {
190+
// Instantiate the singleton under a request context
191+
MetricsTracerFactory.getInstance('mock-project-id');
192+
} finally {
193+
span.end();
194+
}
195+
});
196+
197+
// Verify that the cleanup interval was scheduled
198+
assert.strictEqual(setIntervalStub.callCount, 1);
199+
});
200+
});
201+
});

handwritten/spanner/src/helper.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,5 +283,10 @@ export function isError(value: any): boolean {
283283
* @returns {Boolean} `true` if the value is a UUID, otherwise `false`.
284284
*/
285285
export function isUuid(value: any): boolean {
286-
return typeof value === 'string' && /^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i.test(value);
286+
return (
287+
typeof value === 'string' &&
288+
/^(?:[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}|00000000-0000-0000-0000-000000000000|ffffffff-ffff-ffff-ffff-ffffffffffff)$/i.test(
289+
value,
290+
)
291+
);
287292
}

handwritten/spanner/src/metrics/metrics-tracer-factory.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import * as crypto from 'crypto';
1616
import * as os from 'os';
1717
import * as process from 'process';
1818
import {MeterProvider, MetricReader} from '@opentelemetry/sdk-metrics';
19-
import {Counter, Histogram} from '@opentelemetry/api';
19+
import {Counter, Histogram, context, ROOT_CONTEXT} from '@opentelemetry/api';
2020
import {detectResources, Resource} from '@opentelemetry/resources';
2121
import {GcpDetectorSync} from '@google-cloud/opentelemetry-resource-util';
2222
import * as Constants from './constants';
@@ -83,9 +83,11 @@ export class MetricsTracerFactory {
8383
);
8484

8585
// Start the Tracer cleanup task at an interval
86-
this._intervalTracerCleanup = setInterval(
87-
this._cleanMetricsTracers.bind(this),
88-
Constants.TRACER_CLEANUP_INTERVAL_MS,
86+
this._intervalTracerCleanup = context.with(ROOT_CONTEXT, () =>
87+
setInterval(
88+
this._cleanMetricsTracers.bind(this),
89+
Constants.TRACER_CLEANUP_INTERVAL_MS,
90+
),
8991
);
9092
// unref the interval to prevent it from blocking app termination
9193
// in the event loop
@@ -140,7 +142,7 @@ export class MetricsTracerFactory {
140142
/**
141143
* Resets the singleton instance of the MetricsTracerFactory.
142144
*/
143-
public static async resetInstance(projectId?: string) {
145+
public static async resetInstance() {
144146
clearInterval(MetricsTracerFactory._instance?._intervalTracerCleanup);
145147
await MetricsTracerFactory._instance?.resetMeterProvider();
146148
MetricsTracerFactory._instance = null;

handwritten/spanner/src/multiplexed-session.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616

1717
import {EventEmitter} from 'events';
18+
import {context, ROOT_CONTEXT} from '@opentelemetry/api';
1819
import {Database} from './database';
1920
import {Session} from './session';
2021
import {GetSessionCallback} from './session-factory';
@@ -177,9 +178,11 @@ export class MultiplexedSession
177178
clearInterval(this._refreshHandle);
178179
}
179180
const refreshRate = this.refreshRate! * 24 * 60 * 60000;
180-
this._refreshHandle = setInterval(async () => {
181-
await this._createSession().catch(() => {});
182-
}, refreshRate);
181+
this._refreshHandle = context.with(ROOT_CONTEXT, () =>
182+
setInterval(() => {
183+
this._createSession().catch(() => {});
184+
}, refreshRate),
185+
);
183186

184187
// Unreference the timer so it does not prevent the Node.js process from exiting.
185188
// If the application has finished all other work, this background timer shouldn't

handwritten/spanner/src/session-pool.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {Session} from './session';
2222
import {Transaction} from './transaction';
2323
import {NormalCallback} from './common';
2424
import {GoogleError, grpc, ServiceError} from 'google-gax';
25+
import {context, ROOT_CONTEXT} from '@opentelemetry/api';
2526
import trace = require('stack-trace');
2627
import {
2728
ObservabilityOptions,
@@ -1061,12 +1062,16 @@ export class SessionPool extends EventEmitter implements SessionPoolInterface {
10611062
_startHouseKeeping(): void {
10621063
const evictRate = this.options.idlesAfter! * 60000;
10631064

1064-
this._evictHandle = setInterval(() => this._evictIdleSessions(), evictRate);
1065+
this._evictHandle = context.with(ROOT_CONTEXT, () =>
1066+
setInterval(() => this._evictIdleSessions(), evictRate),
1067+
);
10651068
this._evictHandle.unref();
10661069

10671070
const pingRate = this.options.keepAlive! * 60000;
10681071

1069-
this._pingHandle = setInterval(() => this._pingIdleSessions(), pingRate);
1072+
this._pingHandle = context.with(ROOT_CONTEXT, () =>
1073+
setInterval(() => this._pingIdleSessions(), pingRate),
1074+
);
10701075
this._pingHandle.unref();
10711076
}
10721077

handwritten/spanner/system-test/spanner.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,11 @@ describe('Spanner', () => {
933933
});
934934

935935
it('GOOGLE_STANDARD_SQL should write uuid array values', async () => {
936-
const values = [crypto.randomUUID(), crypto.randomUUID(), crypto.randomUUID()];
936+
const values = [
937+
crypto.randomUUID(),
938+
crypto.randomUUID(),
939+
crypto.randomUUID(),
940+
];
937941
const {row} = await insert(
938942
{UUIDArray: values},
939943
Spanner.GOOGLE_STANDARD_SQL,
@@ -942,7 +946,11 @@ describe('Spanner', () => {
942946
});
943947

944948
it.skip('POSTGRESQL should write uuid array values', async () => {
945-
const values = [crypto.randomUUID(), crypto.randomUUID(), crypto.randomUUID()];
949+
const values = [
950+
crypto.randomUUID(),
951+
crypto.randomUUID(),
952+
crypto.randomUUID(),
953+
];
946954
const {row} = await insert({UUIDArray: values}, Spanner.POSTGRESQL);
947955
assert.deepStrictEqual(row.toJSON().UUIDArray, values);
948956
});
@@ -4674,7 +4682,11 @@ describe('Spanner', () => {
46744682
});
46754683

46764684
it('GOOGLE_STANDARD_SQL should bind arrays', async () => {
4677-
const values = [crypto.randomUUID(), crypto.randomUUID(), crypto.randomUUID()];
4685+
const values = [
4686+
crypto.randomUUID(),
4687+
crypto.randomUUID(),
4688+
crypto.randomUUID(),
4689+
];
46784690

46794691
const query = {
46804692
sql: 'SELECT @v',
@@ -4714,7 +4726,11 @@ describe('Spanner', () => {
47144726
});
47154727

47164728
it.skip('POSTGRESQL should bind arrays', async () => {
4717-
const values = [crypto.randomUUID(), crypto.randomUUID(), crypto.randomUUID()];
4729+
const values = [
4730+
crypto.randomUUID(),
4731+
crypto.randomUUID(),
4732+
crypto.randomUUID(),
4733+
];
47184734

47194735
const query = {
47204736
sql: 'SELECT $1',

0 commit comments

Comments
 (0)