Skip to content

Commit 0c71281

Browse files
BridgeARrochdev
authored andcommitted
perf(mongodb): fold limit-depth and bigint sanitisation into one pass (#8375)
`getQuery` did two full walks per command — `limitDepth` cloned the filter into a parallel "?-or-object" tree, then `sanitizeBigInt` ran `JSON.stringify` over the clone — so big `$or` arrays and deep `$lookup` pipelines paid the per-key allocation twice. Fold both into one `JSON.stringify` replacer; the native `toJSON` dispatch handles `ObjectId` / `Decimal128` / `Long` / `Date` / `Timestamp`, the replacer suppresses `Buffer` / `Binary` toJSON output, and an ancestor stack tracks depth without a separate clone. Three behaviour drifts vs `limitDepth`, none covered by existing specs: 1. Inherited enumerable keys are no longer walked. 2. Shallow cycles render as `{"self":"?"}` instead of a ten-deep nested fallback. 3. Arrays of `Buffer` / function are sanitised instead of falling through. Drive-by fix: * `truncate` short-circuits when the resource already fits under 10 KB.
1 parent 30a3a1c commit 0c71281

3 files changed

Lines changed: 134 additions & 70 deletions

File tree

packages/datadog-plugin-mongodb-core/src/index.js

Lines changed: 36 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,16 @@ class MongodbCorePlugin extends DatabasePlugin {
9090
}
9191
}
9292

93-
function sanitizeBigInt (data) {
94-
return JSON.stringify(data, (_key, value) => typeof value === 'bigint' ? value.toString() : value)
95-
}
93+
const MAX_DEPTH = 10
94+
const MAX_QUERY_LENGTH = 10_000
9695

9796
function extractQuery (statements) {
9897
if (statements.length === 1 && statements[0].q) return statements[0].q
9998

10099
const extractedQueries = []
101100
for (let i = 0; i < statements.length; i++) {
102101
if (statements[i].q) {
103-
extractedQueries.push(limitDepth(statements[i].q))
102+
extractedQueries.push(statements[i].q)
104103
}
105104
}
106105

@@ -110,12 +109,12 @@ function extractQuery (statements) {
110109
function getQuery (cmd) {
111110
if (!cmd || (typeof cmd !== 'object' && !Array.isArray(cmd))) return
112111

113-
if (Array.isArray(cmd)) return sanitizeBigInt(extractQuery(cmd))
114-
if (cmd.query) return sanitizeBigInt(limitDepth(cmd.query))
115-
if (cmd.filter) return sanitizeBigInt(limitDepth(cmd.filter))
116-
if (cmd.pipeline) return sanitizeBigInt(limitDepth(cmd.pipeline))
117-
if (cmd.deletes) return sanitizeBigInt(extractQuery(cmd.deletes))
118-
if (cmd.updates) return sanitizeBigInt(extractQuery(cmd.updates))
112+
if (Array.isArray(cmd)) return sanitiseAndStringify(extractQuery(cmd))
113+
if (cmd.query) return sanitiseAndStringify(cmd.query)
114+
if (cmd.filter) return sanitiseAndStringify(cmd.filter)
115+
if (cmd.pipeline) return sanitiseAndStringify(cmd.pipeline)
116+
if (cmd.deletes) return sanitiseAndStringify(extractQuery(cmd.deletes))
117+
if (cmd.updates) return sanitiseAndStringify(extractQuery(cmd.updates))
119118
}
120119

121120
function getResource (plugin, ns, query, operationName) {
@@ -129,73 +128,40 @@ function getResource (plugin, ns, query, operationName) {
129128
}
130129

131130
function truncate (input) {
132-
return input.slice(0, Math.min(input.length, 10_000))
133-
}
134-
135-
function shouldSimplify (input) {
136-
return !isObject(input) || typeof input.toJSON === 'function'
131+
return input.length > MAX_QUERY_LENGTH ? input.slice(0, MAX_QUERY_LENGTH) : input
137132
}
138133

139-
function shouldHide (input) {
140-
return Buffer.isBuffer(input) || typeof input === 'function' || isBinary(input)
141-
}
142-
143-
function limitDepth (input) {
144-
if (isBSON(input)) {
145-
input = input.toJSON()
146-
}
147-
148-
if (shouldHide(input)) return '?'
149-
if (shouldSimplify(input)) return input
150-
151-
const output = {}
152-
const queue = [{
153-
input,
154-
output,
155-
depth: 0,
156-
}]
157-
158-
while (queue.length) {
159-
const {
160-
input, output, depth,
161-
} = queue.pop()
162-
const nextDepth = depth + 1
163-
for (const key of Object.keys(input)) {
164-
let child = input[key]
165-
if (typeof child === 'function') continue
166-
167-
if (isBSON(child)) {
168-
child = typeof child.toJSON === 'function' ? child.toJSON() : '?'
169-
}
170-
171-
if (depth >= 10 || shouldHide(child)) {
172-
output[key] = '?'
173-
} else if (shouldSimplify(child)) {
174-
output[key] = child
175-
} else {
176-
output[key] = {}
177-
queue.push({
178-
input: child,
179-
output: output[key],
180-
depth: nextDepth,
181-
})
134+
// Single-pass sanitisation. The replacer:
135+
// - skips functions and coerces bigint to its decimal string,
136+
// - returns '?' for Buffer / BSON Binary on the *original* value (JSON.stringify already invoked
137+
// toJSON before calling us; Buffer / Binary do have toJSON outputs we want to suppress),
138+
// - lets JSON.stringify call toJSON on other BSON types (ObjectId, Long, Decimal128, Date, Timestamp, ...)
139+
// so the result lands here as a primitive or plain object,
140+
// - returns '?' for BSON types without toJSON (MinKey, MaxKey) where `value === original`,
141+
// - tracks depth via an ancestor stack so cycles and depth >= MAX_DEPTH collapse to '?'.
142+
function sanitiseAndStringify (input) {
143+
const ancestors = []
144+
return JSON.stringify(input, function (key, value) {
145+
if (typeof value === 'function') return
146+
if (typeof value === 'bigint') return value.toString()
147+
148+
const original = key === '' ? value : this[key]
149+
if (typeof original === 'object' && original !== null) {
150+
if (Buffer.isBuffer(original)) return '?'
151+
const bsontype = original._bsontype
152+
if (bsontype !== undefined && (bsontype === 'Binary' || value === original)) {
153+
return '?'
182154
}
183155
}
184-
}
185156

186-
return output
187-
}
157+
if (value === null || typeof value !== 'object') return value
188158

189-
function isObject (val) {
190-
return val !== null && typeof val === 'object' && !Array.isArray(val)
191-
}
192-
193-
function isBSON (val) {
194-
return val && val._bsontype && !isBinary(val)
195-
}
159+
while (ancestors.length > 0 && ancestors.at(-1) !== this) ancestors.pop()
160+
if (ancestors.length >= MAX_DEPTH || ancestors.includes(value)) return '?'
161+
ancestors.push(value)
196162

197-
function isBinary (val) {
198-
return val && val._bsontype === 'Binary'
163+
return value
164+
})
199165
}
200166

201167
function isHeartbeat (ops, config) {

packages/datadog-plugin-mongodb-core/test/limit-depth.spec.js

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,66 @@ describe('mongodb-core query depth limiter', () => {
4848

4949
assert.deepStrictEqual(JSON.parse(query), { outer: { ownNested: 'kept' } })
5050
})
51+
52+
it('extracts cmd.filter when no .query is present', () => {
53+
const query = callBindStart({
54+
ns: 'db.coll',
55+
ops: { filter: { user: 'alice' } },
56+
name: 'find',
57+
})
58+
59+
assert.deepStrictEqual(JSON.parse(query), { user: 'alice' })
60+
})
61+
62+
it('extracts cmd.pipeline when no .query / .filter is present', () => {
63+
const query = callBindStart({
64+
ns: 'db.coll',
65+
ops: { pipeline: [{ $match: { user: 'alice' } }, { $count: 'total' }] },
66+
name: 'aggregate',
67+
})
68+
69+
assert.deepStrictEqual(JSON.parse(query), [
70+
{ $match: { user: 'alice' } },
71+
{ $count: 'total' },
72+
])
73+
})
74+
75+
it('extracts the inner q from a single cmd.deletes statement', () => {
76+
const query = callBindStart({
77+
ns: 'db.coll',
78+
ops: { deletes: [{ q: { user: 'alice' }, limit: 1 }] },
79+
name: 'delete',
80+
})
81+
82+
assert.deepStrictEqual(JSON.parse(query), { user: 'alice' })
83+
})
84+
85+
it('collects every q from multi-statement cmd.updates', () => {
86+
const query = callBindStart({
87+
ns: 'db.coll',
88+
ops: {
89+
updates: [
90+
{ q: { user: 'alice' }, u: { $set: { a: 1 } } },
91+
{ q: { user: 'bob' }, u: { $set: { b: 2 } } },
92+
],
93+
},
94+
name: 'update',
95+
})
96+
97+
assert.deepStrictEqual(JSON.parse(query), [
98+
{ user: 'alice' },
99+
{ user: 'bob' },
100+
])
101+
})
102+
103+
it('renders Binary BSON values as "?"', () => {
104+
const binary = { _bsontype: 'Binary', buffer: Buffer.from('payload') }
105+
const query = callBindStart({
106+
ns: 'db.coll',
107+
ops: { query: { blob: binary } },
108+
name: 'find',
109+
})
110+
111+
assert.deepStrictEqual(JSON.parse(query), { blob: '?' })
112+
})
51113
})

packages/datadog-plugin-mongodb-core/test/mongodb.spec.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,42 @@ describe('Plugin', () => {
378378
}).toArray()
379379
})
380380

381+
it('should collapse beyond max depth', done => {
382+
let nested = { a: 1 }
383+
for (let i = 0; i < 12; i++) {
384+
nested = { a: nested }
385+
}
386+
387+
agent
388+
.assertSomeTraces(traces => {
389+
const span = traces[0][0]
390+
assert.strictEqual(span.resource, `find test.${collectionName}`)
391+
// 10 levels of `{"a":` then `"?"`, then 10 closing braces.
392+
assert.strictEqual(span.meta['mongodb.query'], `${'{"a":'.repeat(10)}"?"${'}'.repeat(10)}`)
393+
})
394+
.then(done)
395+
.catch(done)
396+
397+
collection.find(nested).toArray().catch(() => {})
398+
})
399+
400+
it('should collapse cyclic queries to ?', done => {
401+
const cyclic = { name: 'foo' }
402+
cyclic.self = cyclic
403+
404+
agent
405+
.assertSomeTraces(traces => {
406+
const span = traces[0][0]
407+
assert.strictEqual(span.resource, `find test.${collectionName}`)
408+
assert.strictEqual(span.meta['mongodb.query'], '{"name":"foo","self":"?"}')
409+
})
410+
.then(done)
411+
.catch(done)
412+
413+
// Driver rejects cyclic structures before the wire write; sanitisation runs before that.
414+
collection.find(cyclic).toArray().catch(() => {})
415+
})
416+
381417
it('should skip functions when sanitizing', done => {
382418
agent
383419
.assertSomeTraces(traces => {

0 commit comments

Comments
 (0)