Skip to content

Commit 4189b0a

Browse files
CopilotneSpecc
andauthored
Fix DataFilter false positives for UUIDs and MongoDB ObjectIds (#518)
* Initial plan * Fix DataFilter to not filter UUIDs and MongoDB ObjectIds Co-authored-by: neSpecc <3684889+neSpecc@users.noreply.github.com> * Address code review feedback: improve UUID regex and test coverage Co-authored-by: neSpecc <3684889+neSpecc@users.noreply.github.com> * Fix tests to use values that would actually fail without UUID/ObjectId detection Co-authored-by: neSpecc <3684889+neSpecc@users.noreply.github.com> * upd version --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: neSpecc <3684889+neSpecc@users.noreply.github.com> Co-authored-by: Peter Savchenko <specc.dev@gmail.com>
1 parent 300b469 commit 4189b0a

File tree

4 files changed

+147
-2
lines changed

4 files changed

+147
-2
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "hawk.workers",
33
"private": true,
4-
"version": "0.1.1",
4+
"version": "0.1.2",
55
"description": "Hawk workers",
66
"repository": "git@github.com:codex-team/hawk.workers.git",
77
"license": "BUSL-1.1",

workers/grouper/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "hawk-worker-grouper",
3-
"version": "0.0.1",
3+
"version": "0.0.2",
44
"description": "Accepts processed errors from language-workers and saves it to the DB with grouping of similar ones. ",
55
"main": "src/index.ts",
66
"repository": "https://github.com/codex-team/hawk.workers/tree/master/workers/grouper",

workers/grouper/src/data-filter.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,16 @@ export default class DataFilter {
5454
*/
5555
private bankCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$/g;
5656

57+
/**
58+
* MongoDB ObjectId Regex (24 hexadecimal characters)
59+
*/
60+
private objectIdRegex = /^[0-9a-fA-F]{24}$/;
61+
62+
/**
63+
* UUID Regex - matches UUIDs with all dashes (8-4-4-4-12 format) or no dashes (32 hex chars)
64+
*/
65+
private uuidRegex = /^(?:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/;
66+
5767
/**
5868
* Accept event and process 'addons' and 'context' fields.
5969
* It mutates the original object
@@ -96,6 +106,22 @@ export default class DataFilter {
96106
return value;
97107
}
98108

109+
/**
110+
* Check if value matches MongoDB ObjectId pattern (24 hex chars)
111+
* ObjectIds should not be filtered
112+
*/
113+
if (this.objectIdRegex.test(value)) {
114+
return value;
115+
}
116+
117+
/**
118+
* Check if value matches UUID pattern (with or without dashes)
119+
* UUIDs should not be filtered
120+
*/
121+
if (this.uuidRegex.test(value)) {
122+
return value;
123+
}
124+
99125
/**
100126
* Remove all non-digit chars
101127
*/

workers/grouper/tests/data-filter.test.ts

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,5 +143,124 @@ describe('GrouperWorker', () => {
143143
expect(event.context['normalKey']).toBe(normalValue);
144144
expect(event.addons['vue']['props']['normalKey']).toBe(normalValue);
145145
});
146+
147+
test('should not filter UUID values that contain exactly 16 digits', async () => {
148+
// These UUIDs contain exactly 16 digits, which when cleaned match PAN patterns
149+
// Without UUID detection, they would be incorrectly filtered as credit cards
150+
const uuidWithManyDigits = '4a1b2c3d-4e5f-6a7b-8c9d-0e1f2a3b4c5d'; // Cleans to 16 digits starting with 4
151+
const uuidUpperCase = '5A1B2C3D-4E5F-6A7B-8C9D-0E1F2A3B4C5D'; // Cleans to 16 digits starting with 5
152+
const uuidNoDashes = '2a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d'; // 32 hex chars without dashes
153+
154+
const event = generateEvent({
155+
context: {
156+
userId: uuidWithManyDigits,
157+
sessionId: uuidUpperCase,
158+
transactionId: uuidNoDashes,
159+
},
160+
addons: {
161+
vue: {
162+
props: {
163+
componentId: uuidWithManyDigits,
164+
},
165+
},
166+
},
167+
});
168+
169+
dataFilter.processEvent(event);
170+
171+
expect(event.context['userId']).toBe(uuidWithManyDigits);
172+
expect(event.context['sessionId']).toBe(uuidUpperCase);
173+
expect(event.context['transactionId']).toBe(uuidNoDashes);
174+
expect(event.addons['vue']['props']['componentId']).toBe(uuidWithManyDigits);
175+
});
176+
177+
test('should not filter MongoDB ObjectId values that contain exactly 16 digits', async () => {
178+
// These ObjectIds contain exactly 16 digits which when cleaned match PAN patterns
179+
// Without ObjectId detection, they would be incorrectly filtered as credit cards
180+
const objectIdWithManyDigits = '4111111111111111abcdefab'; // 16 digits + 8 hex letters = 24 chars, cleans to Visa pattern
181+
const objectIdUpperCase = '5111111111111111ABCDEFAB'; // Cleans to Mastercard pattern
182+
const objectIdMixedCase = '2111111111111111AbCdEfAb'; // Cleans to Maestro/Mastercard pattern
183+
184+
const event = generateEvent({
185+
context: {
186+
projectId: objectIdWithManyDigits,
187+
workspaceId: objectIdUpperCase,
188+
transactionId: objectIdMixedCase,
189+
},
190+
addons: {
191+
hawk: {
192+
projectId: objectIdWithManyDigits,
193+
},
194+
},
195+
});
196+
197+
dataFilter.processEvent(event);
198+
199+
expect(event.context['projectId']).toBe(objectIdWithManyDigits);
200+
expect(event.context['workspaceId']).toBe(objectIdUpperCase);
201+
expect(event.context['transactionId']).toBe(objectIdMixedCase);
202+
expect(event.addons['hawk']['projectId']).toBe(objectIdWithManyDigits);
203+
});
204+
205+
test('should still filter actual PAN numbers with formatting characters', async () => {
206+
// Test real Mastercard test number with spaces and dashes
207+
const panWithSpaces = '5500 0000 0000 0004';
208+
const panWithDashes = '5500-0000-0000-0004';
209+
210+
const event = generateEvent({
211+
context: {
212+
cardNumber: panWithSpaces,
213+
paymentCard: panWithDashes,
214+
},
215+
});
216+
217+
dataFilter.processEvent(event);
218+
219+
expect(event.context['cardNumber']).toBe('[filtered]');
220+
expect(event.context['paymentCard']).toBe('[filtered]');
221+
});
222+
223+
test('should not filter values that are not UUIDs, ObjectIds, or PANs', async () => {
224+
// These are edge cases that should NOT be filtered
225+
const shortHex = '507f1f77bcf86cd7'; // 16 hex chars (not 24)
226+
const longNumber = '67280841958304100309082499'; // 26 digits (too long for PAN)
227+
const mixedAlphaNum = 'abc123def456ghi789'; // Mixed content
228+
229+
const event = generateEvent({
230+
context: {
231+
shortId: shortHex,
232+
longId: longNumber,
233+
mixedId: mixedAlphaNum,
234+
},
235+
});
236+
237+
dataFilter.processEvent(event);
238+
239+
expect(event.context['shortId']).toBe(shortHex);
240+
expect(event.context['longId']).toBe(longNumber);
241+
expect(event.context['mixedId']).toBe(mixedAlphaNum);
242+
});
243+
244+
test('should filter UUIDs and ObjectIds when they are in sensitive key fields', async () => {
245+
// Even if the value is a valid UUID or ObjectId, it should be filtered
246+
// if the key name is in the sensitive keys list
247+
const uuid = '550e8400-e29b-41d4-a716-446655440000';
248+
const objectId = '507f1f77bcf86cd799439011';
249+
250+
const event = generateEvent({
251+
context: {
252+
password: uuid,
253+
secret: objectId,
254+
auth: '672808419583041003090824',
255+
},
256+
});
257+
258+
dataFilter.processEvent(event);
259+
260+
// All should be filtered because of sensitive key names
261+
expect(event.context['password']).toBe('[filtered]');
262+
expect(event.context['secret']).toBe('[filtered]');
263+
expect(event.context['auth']).toBe('[filtered]');
264+
});
146265
});
147266
});

0 commit comments

Comments
 (0)