Skip to content

Commit 8650110

Browse files
committed
(fix): Prevent duplicate method chain wrapping
1 parent 9cd84f4 commit 8650110

7 files changed

Lines changed: 212 additions & 27 deletions

File tree

.versions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ reactive-var@1.0.13
4646
reload@1.3.2
4747
retry@1.1.1
4848
routepolicy@1.1.2
49-
skysignal:agent@1.0.23
49+
skysignal:agent@1.0.24
5050
socket-stream-client@0.6.1
5151
tracker@1.3.4
5252
typescript@5.9.3

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,13 @@ Main agent singleton instance.
854854

855855
## Changelog
856856

857+
### v1.0.25 (Stack Overflow Root Cause Fix)
858+
859+
- **Fix shared `protocol_handlers` wrapper chain buildup (ROOT CAUSE)** - In Meteor, `session.protocol_handlers` is a shared object on the Session prototype — all DDP sessions reference the same object. `_hijackMethodHandler` and `_hijackSubHandler` captured `session.protocol_handlers.method` and replaced it with a wrapper, but because the object is shared, each new session's wrap captured the PREVIOUS session's wrapper, building an N-deep call chain. After ~200 concurrent connections, calling `protocol_handlers.method` would recurse through hundreds of chained wrappers and overflow V8's stack. The fix adds a `_skySignalDDPQueueWrapped` flag on the wrapper function itself, ensuring the handler is wrapped exactly once globally regardless of how many sessions connect. (fixes [#7](https://github.com/SkySignalAPM/agent/issues/7))
860+
- **Auto-disable DDPQueueCollector when another APM agent is detected** - When `montiapm:agent` or `mdg:meteor-apm-agent` is installed alongside `skysignal:agent`, `DDPQueueCollector.start()` now automatically disables itself instead of merely logging a warning. Both agents wrap the same DDP session internals, and the compounded wrapper depth compounds the stack overflow risk. Users can force-enable via `{ forceEnable: true }` if they accept the risk.
861+
- **Skip MethodTracer unblock wrapping when another APM agent is detected** - `MethodTracer._wrapMethod()` now skips wrapping `methodInvocation.unblock` when a conflicting APM agent is present. Both agents wrapping unblock adds ~3-5 extra frames per method call. Unblock timing analysis is unavailable in this mode; all other MethodTracer features remain fully functional.
862+
- **New tests: shared prototype regression** - Added tests simulating 50 sessions sharing the same `protocol_handlers` object, verifying the handler is wrapped exactly once and does not build a deep chain.
863+
857864
### v1.0.24 (DDP Queue Stack Overflow Fix)
858865

859866
- **Fix secondary stack overflow in `wrapUnblock`** - When `_recordBlockingTime()` threw an error with a very deep stack trace (e.g. from mutual-recursion across wrapper layers), `console.error(error)` triggered `source-map-support`'s `prepareStackTrace` which re-overflowed, causing a secondary `RangeError`. All `console.error` calls in `DDPQueueCollector` now use `String(error)` to serialize the error message without triggering stack trace reprocessing. See [#12](https://github.com/SkySignalAPM/agent/pull/12).

lib/collectors/DDPQueueCollector.js

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { Meteor } from 'meteor/meteor';
1313
export default class DDPQueueCollector {
1414
constructor(options = {}) {
1515
this.enabled = options.enabled !== false;
16+
this.forceEnable = options.forceEnable || false;
1617
this.debug = options.debug || false;
1718

1819
// Store wait lists for messages (who they're waiting for)
@@ -49,13 +50,31 @@ export default class DDPQueueCollector {
4950
}
5051

5152
try {
52-
// Detect conflicting APM agents that also wrap DDP internals
53-
if (Package['montiapm:agent'] || Package['mdg:meteor-apm-agent']) {
53+
// Detect conflicting APM agents that also wrap DDP internals.
54+
// When another agent wraps the same session methods (processMessage,
55+
// protocol_handlers, unblock), the combined wrapper depth causes stack
56+
// overflows. Auto-disable to prevent this — the user can force-enable
57+
// via `forceEnable: true` in options if needed. See #7.
58+
this._conflictingAgent = !!(
59+
typeof Package !== 'undefined' &&
60+
(Package['montiapm:agent'] || Package['mdg:meteor-apm-agent'])
61+
);
62+
63+
if (this._conflictingAgent) {
5464
console.warn(
55-
'⚠️ DDPQueueCollector: Detected another APM agent (montiapm:agent) that also ' +
56-
'wraps DDP session internals. Running multiple APM agents simultaneously can ' +
57-
'cause stack overflow errors. Consider removing one of the agents.'
65+
'⚠️ DDPQueueCollector: Detected another APM agent (montiapm:agent / mdg:meteor-apm-agent) ' +
66+
'that also wraps DDP session internals. Automatically disabling DDPQueueCollector to prevent ' +
67+
'stack overflow errors from compounded wrapper depth. DDP queue wait times will not be tracked. ' +
68+
'To fix: remove the other APM agent package. ' +
69+
'To force-enable (at your own risk): pass { forceEnable: true } to DDPQueueCollector.'
5870
);
71+
72+
if (!this.forceEnable) {
73+
this.started = false;
74+
return;
75+
}
76+
77+
console.warn('⚠️ DDPQueueCollector: Force-enabled despite conflicting agent. Stack overflows may occur.');
5978
}
6079

6180
this._hijackSessionProcessing();
@@ -237,7 +256,14 @@ export default class DDPQueueCollector {
237256
}
238257

239258
/**
240-
* Hijack method handler to track wait time
259+
* Hijack method handler to track wait time.
260+
*
261+
* CRITICAL: session.protocol_handlers is a SHARED object on the Session
262+
* prototype — all sessions reference the SAME protocol_handlers. If we
263+
* naively wrap per-session, each new session captures the previous wrapper,
264+
* building an N-deep chain that overflows the stack after ~200 sessions.
265+
* We must wrap ONCE and use a flag on the wrapper function to prevent
266+
* re-wrapping. See #7.
241267
*/
242268
_hijackMethodHandler(session) {
243269
const self = this;
@@ -247,6 +273,11 @@ export default class DDPQueueCollector {
247273
return;
248274
}
249275

276+
// If already wrapped (shared across sessions), skip.
277+
if (session.protocol_handlers.method._skySignalDDPQueueWrapped) {
278+
return;
279+
}
280+
250281
// Get the current handler (might already be wrapped by another collector)
251282
const currentMethodHandler = session.protocol_handlers.method;
252283

@@ -255,7 +286,7 @@ export default class DDPQueueCollector {
255286
session._skySignalOriginalMethodHandler = currentMethodHandler;
256287
}
257288

258-
session.protocol_handlers.method = function (msg, unblock) {
289+
const wrappedHandler = function (msg, unblock) {
259290
// Check if we're tracking this message
260291
if (msg._queueEnterTime) {
261292
// Calculate wait time
@@ -278,10 +309,15 @@ export default class DDPQueueCollector {
278309
// more than once (e.g. agent restart during hot code push). See #5.
279310
return currentMethodHandler.call(this, msg, unblock);
280311
};
312+
313+
wrappedHandler._skySignalDDPQueueWrapped = true;
314+
session.protocol_handlers.method = wrappedHandler;
281315
}
282316

283317
/**
284-
* Hijack subscription handler to track wait time
318+
* Hijack subscription handler to track wait time.
319+
*
320+
* Same shared-prototype concern as _hijackMethodHandler — wrap ONCE. See #7.
285321
*/
286322
_hijackSubHandler(session) {
287323
const self = this;
@@ -291,6 +327,11 @@ export default class DDPQueueCollector {
291327
return;
292328
}
293329

330+
// If already wrapped (shared across sessions), skip.
331+
if (session.protocol_handlers.sub._skySignalDDPQueueWrapped) {
332+
return;
333+
}
334+
294335
// Get the current handler (might already be wrapped by another collector)
295336
const currentSubHandler = session.protocol_handlers.sub;
296337

@@ -299,7 +340,7 @@ export default class DDPQueueCollector {
299340
session._skySignalOriginalSubHandler = currentSubHandler;
300341
}
301342

302-
session.protocol_handlers.sub = function (msg, unblock) {
343+
const wrappedHandler = function (msg, unblock) {
303344
// Check if we're tracking this message
304345
if (msg._queueEnterTime) {
305346
// Calculate wait time
@@ -318,6 +359,9 @@ export default class DDPQueueCollector {
318359
// Call the version we captured (chains to other wrappers if present)
319360
return currentSubHandler.call(this, msg, unblock);
320361
};
362+
363+
wrappedHandler._skySignalDDPQueueWrapped = true;
364+
session.protocol_handlers.sub = wrappedHandler;
321365
}
322366

323367
/**

lib/collectors/MethodTracer.js

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,23 @@ export default class MethodTracer {
131131
return;
132132
}
133133

134+
// Detect conflicting APM agents. When another agent wraps the same
135+
// Meteor internals (methods, unblock, DDP handlers), the combined
136+
// wrapper depth can cause stack overflows. We skip wrapping unblock
137+
// in _wrapMethod to reduce stack frame overhead. See #7.
138+
this._hasConflictingAgent = !!(
139+
typeof Package !== 'undefined' &&
140+
(Package['montiapm:agent'] || Package['mdg:meteor-apm-agent'])
141+
);
142+
143+
if (this._hasConflictingAgent) {
144+
this._warn(
145+
'Detected another APM agent (montiapm:agent / mdg:meteor-apm-agent). ' +
146+
'Reducing wrapper depth to prevent stack overflow. ' +
147+
'Unblock timing will not be tracked. Consider removing the other agent.'
148+
);
149+
}
150+
134151
// Register tracer globally for user access
135152
global.SkySignalTracer = this;
136153

@@ -321,22 +338,26 @@ export default class MethodTracer {
321338
context: context // Store reference to context for adding child method operations
322339
});
323340

324-
// Wrap this.unblock() to detect when it's called
341+
// Wrap this.unblock() to detect when it's called.
342+
// Skip when another APM agent is detected — both wrapping unblock
343+
// compounds stack depth and is the primary cause of overflow. See #7.
325344
const methodInvocation = this;
326-
const originalUnblock = methodInvocation.unblock;
327-
let unblockCalled = false;
328-
329-
if (originalUnblock && typeof originalUnblock === 'function') {
330-
methodInvocation.unblock = function() {
331-
if (!unblockCalled) {
332-
unblockCalled = true;
333-
const unblockTime = Date.now();
334-
context.unblockAnalysis.called = true;
335-
context.unblockAnalysis.timeToUnblock = unblockTime - startTime;
336-
context.unblockAnalysis.callPosition = unblockTime - startTime;
337-
}
338-
return originalUnblock.call(this);
339-
};
345+
if (!self._hasConflictingAgent) {
346+
const originalUnblock = methodInvocation.unblock;
347+
let unblockCalled = false;
348+
349+
if (originalUnblock && typeof originalUnblock === 'function') {
350+
methodInvocation.unblock = function() {
351+
if (!unblockCalled) {
352+
unblockCalled = true;
353+
const unblockTime = Date.now();
354+
context.unblockAnalysis.called = true;
355+
context.unblockAnalysis.timeToUnblock = unblockTime - startTime;
356+
context.unblockAnalysis.callPosition = unblockTime - startTime;
357+
}
358+
return originalUnblock.call(this);
359+
};
360+
}
340361
}
341362

342363
// Helper function to record this method call in the parent's operations array

lib/collectors/SystemMetricsCollector.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import v8 from "v8";
1010
const execAsync = promisify(exec);
1111

1212
// Agent version - must be updated alongside package.js on each release
13-
const AGENT_VERSION = '1.0.24';
13+
const AGENT_VERSION = '1.0.25';
1414

1515
// cgroup v1 "unlimited" sentinel: values >= 2^62 mean no limit is set
1616
const CGROUP_V1_UNLIMITED = 2 ** 62;

package.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package.describe({
22
name: "skysignal:agent",
3-
version: "1.0.24",
3+
version: "1.0.25",
44
summary:
55
"SkySignal APM agent for Meteor applications - monitors performance, errors, and system metrics",
66
git: "https://github.com/skysignalapm/agent.git",

tests/unit/collectors/DDPQueueCollector.test.js

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,60 @@ describe('DDPQueueCollector', function () {
5050
const c = new DDPQueueCollector({ enabled: false });
5151
expect(c.enabled).to.be.false;
5252
});
53+
54+
it('accepts forceEnable option', function () {
55+
const c = new DDPQueueCollector({ forceEnable: true });
56+
expect(c.forceEnable).to.be.true;
57+
});
58+
});
59+
60+
describe('conflicting APM agent detection', function () {
61+
let collector;
62+
63+
afterEach(function () {
64+
// Stop collector to clear any setInterval handles
65+
if (collector) {
66+
try { collector.stop(); } catch (_e) { /* ignore */ }
67+
collector = null;
68+
}
69+
// Clean up global Package mock
70+
delete global.Package;
71+
});
72+
73+
it('auto-disables when montiapm:agent is detected', function () {
74+
global.Package = { 'montiapm:agent': {} };
75+
collector = new DDPQueueCollector({ enabled: true });
76+
collector.start();
77+
expect(collector.started).to.be.false;
78+
expect(collector._conflictingAgent).to.be.true;
79+
});
80+
81+
it('auto-disables when mdg:meteor-apm-agent is detected', function () {
82+
global.Package = { 'mdg:meteor-apm-agent': {} };
83+
collector = new DDPQueueCollector({ enabled: true });
84+
collector.start();
85+
expect(collector.started).to.be.false;
86+
});
87+
88+
it('does not set _conflictingAgent when no conflicting agent present', function () {
89+
global.Package = {};
90+
collector = new DDPQueueCollector({ enabled: true });
91+
// We can't call start() without Meteor.server (it would try to hijack sessions),
92+
// but we can test the detection logic directly by checking what start() would set.
93+
// Calling start will error at _hijackSessionProcessing, but _conflictingAgent
94+
// is set before that call.
95+
try { collector.start(); } catch (_e) { /* expected without Meteor.server */ }
96+
expect(collector._conflictingAgent).to.be.false;
97+
});
98+
99+
it('force-enables despite conflicting agent when forceEnable is true', function () {
100+
global.Package = { 'montiapm:agent': {} };
101+
collector = new DDPQueueCollector({ enabled: true, forceEnable: true });
102+
// start() proceeds past the guard and tries _hijackSessionProcessing,
103+
// which errors without Meteor.server — but it proves it didn't early-return.
104+
try { collector.start(); } catch (_e) { /* expected without Meteor.server */ }
105+
expect(collector._conflictingAgent).to.be.true;
106+
});
53107
});
54108

55109
// ==========================================
@@ -298,6 +352,65 @@ describe('DDPQueueCollector', function () {
298352
});
299353
});
300354

355+
describe('shared protocol_handlers (Bug #7 root cause)', function () {
356+
357+
it('does NOT build N-deep wrapper chain when sessions share protocol_handlers', function () {
358+
// In Meteor, session.protocol_handlers is on the Session prototype.
359+
// All sessions share the SAME protocol_handlers object. Without the
360+
// _skySignalDDPQueueWrapped guard, each _wrapSession call adds another
361+
// wrapper layer. After N sessions, calling protocol_handlers.method
362+
// recurses N levels deep, causing stack overflow. See #7.
363+
const sharedHandlers = {
364+
method: sinon.stub(),
365+
sub: sinon.stub()
366+
};
367+
368+
// Create 50 sessions sharing the same protocol_handlers object
369+
const sessions = [];
370+
for (let i = 0; i < 50; i++) {
371+
const session = createMockSession(`shared-${i}`);
372+
session.protocol_handlers = sharedHandlers; // Shared reference
373+
sessions.push(session);
374+
}
375+
376+
// Wrap all 50 sessions
377+
sessions.forEach(s => collector._wrapSession(s));
378+
379+
// protocol_handlers.method should be wrapped exactly ONCE, not 50 times
380+
expect(sharedHandlers.method._skySignalDDPQueueWrapped).to.be.true;
381+
382+
// Calling the handler should NOT overflow — it's 1 wrapper deep, not 50
383+
const msg = { id: 'test', msg: 'method', method: 'test.method', _queueEnterTime: Date.now() };
384+
expect(() => {
385+
sharedHandlers.method.call(sessions[0], msg, () => {});
386+
}).to.not.throw();
387+
});
388+
389+
it('wraps protocol_handlers.sub only once across shared sessions', function () {
390+
const sharedHandlers = {
391+
method: sinon.stub(),
392+
sub: sinon.stub()
393+
};
394+
395+
const sessions = [];
396+
for (let i = 0; i < 20; i++) {
397+
const session = createMockSession(`shared-sub-${i}`);
398+
session.protocol_handlers = sharedHandlers;
399+
sessions.push(session);
400+
}
401+
402+
sessions.forEach(s => collector._wrapSession(s));
403+
404+
expect(sharedHandlers.sub._skySignalDDPQueueWrapped).to.be.true;
405+
406+
// Call the sub handler — should not overflow
407+
const msg = { id: 'sub-test', msg: 'sub', name: 'test.pub', _queueEnterTime: Date.now() };
408+
expect(() => {
409+
sharedHandlers.sub.call(sessions[0], msg, () => {});
410+
}).to.not.throw();
411+
});
412+
});
413+
301414
describe('_wrapSession', function () {
302415

303416
it('sets _skySignalDDPQueueWrapped flag', function () {

0 commit comments

Comments
 (0)