diff --git a/package.json b/package.json index 506d732d..a053c5b8 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "hawk.workers", "private": true, - "version": "0.1.1", + "version": "0.1.2", "description": "Hawk workers", "repository": "git@github.com:codex-team/hawk.workers.git", "license": "BUSL-1.1", diff --git a/workers/grouper/package.json b/workers/grouper/package.json index 6e98edc1..294322a5 100644 --- a/workers/grouper/package.json +++ b/workers/grouper/package.json @@ -1,6 +1,6 @@ { "name": "hawk-worker-grouper", - "version": "0.0.1", + "version": "0.0.2", "description": "Accepts processed errors from language-workers and saves it to the DB with grouping of similar ones. ", "main": "src/index.ts", "repository": "https://github.com/codex-team/hawk.workers/tree/master/workers/grouper", diff --git a/workers/grouper/src/data-filter.ts b/workers/grouper/src/data-filter.ts index 40a4acf9..3571a1c6 100644 --- a/workers/grouper/src/data-filter.ts +++ b/workers/grouper/src/data-filter.ts @@ -54,6 +54,16 @@ export default class DataFilter { */ private bankCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$/g; + /** + * MongoDB ObjectId Regex (24 hexadecimal characters) + */ + private objectIdRegex = /^[0-9a-fA-F]{24}$/; + + /** + * UUID Regex - matches UUIDs with all dashes (8-4-4-4-12 format) or no dashes (32 hex chars) + */ + private uuidRegex = /^(?:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/; + /** * Accept event and process 'addons' and 'context' fields. * It mutates the original object @@ -96,6 +106,22 @@ export default class DataFilter { return value; } + /** + * Check if value matches MongoDB ObjectId pattern (24 hex chars) + * ObjectIds should not be filtered + */ + if (this.objectIdRegex.test(value)) { + return value; + } + + /** + * Check if value matches UUID pattern (with or without dashes) + * UUIDs should not be filtered + */ + if (this.uuidRegex.test(value)) { + return value; + } + /** * Remove all non-digit chars */ diff --git a/workers/grouper/tests/data-filter.test.ts b/workers/grouper/tests/data-filter.test.ts index d0a4c3af..28ff2979 100644 --- a/workers/grouper/tests/data-filter.test.ts +++ b/workers/grouper/tests/data-filter.test.ts @@ -143,5 +143,124 @@ describe('GrouperWorker', () => { expect(event.context['normalKey']).toBe(normalValue); expect(event.addons['vue']['props']['normalKey']).toBe(normalValue); }); + + test('should not filter UUID values that contain exactly 16 digits', async () => { + // These UUIDs contain exactly 16 digits, which when cleaned match PAN patterns + // Without UUID detection, they would be incorrectly filtered as credit cards + const uuidWithManyDigits = '4a1b2c3d-4e5f-6a7b-8c9d-0e1f2a3b4c5d'; // Cleans to 16 digits starting with 4 + const uuidUpperCase = '5A1B2C3D-4E5F-6A7B-8C9D-0E1F2A3B4C5D'; // Cleans to 16 digits starting with 5 + const uuidNoDashes = '2a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d'; // 32 hex chars without dashes + + const event = generateEvent({ + context: { + userId: uuidWithManyDigits, + sessionId: uuidUpperCase, + transactionId: uuidNoDashes, + }, + addons: { + vue: { + props: { + componentId: uuidWithManyDigits, + }, + }, + }, + }); + + dataFilter.processEvent(event); + + expect(event.context['userId']).toBe(uuidWithManyDigits); + expect(event.context['sessionId']).toBe(uuidUpperCase); + expect(event.context['transactionId']).toBe(uuidNoDashes); + expect(event.addons['vue']['props']['componentId']).toBe(uuidWithManyDigits); + }); + + test('should not filter MongoDB ObjectId values that contain exactly 16 digits', async () => { + // These ObjectIds contain exactly 16 digits which when cleaned match PAN patterns + // Without ObjectId detection, they would be incorrectly filtered as credit cards + const objectIdWithManyDigits = '4111111111111111abcdefab'; // 16 digits + 8 hex letters = 24 chars, cleans to Visa pattern + const objectIdUpperCase = '5111111111111111ABCDEFAB'; // Cleans to Mastercard pattern + const objectIdMixedCase = '2111111111111111AbCdEfAb'; // Cleans to Maestro/Mastercard pattern + + const event = generateEvent({ + context: { + projectId: objectIdWithManyDigits, + workspaceId: objectIdUpperCase, + transactionId: objectIdMixedCase, + }, + addons: { + hawk: { + projectId: objectIdWithManyDigits, + }, + }, + }); + + dataFilter.processEvent(event); + + expect(event.context['projectId']).toBe(objectIdWithManyDigits); + expect(event.context['workspaceId']).toBe(objectIdUpperCase); + expect(event.context['transactionId']).toBe(objectIdMixedCase); + expect(event.addons['hawk']['projectId']).toBe(objectIdWithManyDigits); + }); + + test('should still filter actual PAN numbers with formatting characters', async () => { + // Test real Mastercard test number with spaces and dashes + const panWithSpaces = '5500 0000 0000 0004'; + const panWithDashes = '5500-0000-0000-0004'; + + const event = generateEvent({ + context: { + cardNumber: panWithSpaces, + paymentCard: panWithDashes, + }, + }); + + dataFilter.processEvent(event); + + expect(event.context['cardNumber']).toBe('[filtered]'); + expect(event.context['paymentCard']).toBe('[filtered]'); + }); + + test('should not filter values that are not UUIDs, ObjectIds, or PANs', async () => { + // These are edge cases that should NOT be filtered + const shortHex = '507f1f77bcf86cd7'; // 16 hex chars (not 24) + const longNumber = '67280841958304100309082499'; // 26 digits (too long for PAN) + const mixedAlphaNum = 'abc123def456ghi789'; // Mixed content + + const event = generateEvent({ + context: { + shortId: shortHex, + longId: longNumber, + mixedId: mixedAlphaNum, + }, + }); + + dataFilter.processEvent(event); + + expect(event.context['shortId']).toBe(shortHex); + expect(event.context['longId']).toBe(longNumber); + expect(event.context['mixedId']).toBe(mixedAlphaNum); + }); + + test('should filter UUIDs and ObjectIds when they are in sensitive key fields', async () => { + // Even if the value is a valid UUID or ObjectId, it should be filtered + // if the key name is in the sensitive keys list + const uuid = '550e8400-e29b-41d4-a716-446655440000'; + const objectId = '507f1f77bcf86cd799439011'; + + const event = generateEvent({ + context: { + password: uuid, + secret: objectId, + auth: '672808419583041003090824', + }, + }); + + dataFilter.processEvent(event); + + // All should be filtered because of sensitive key names + expect(event.context['password']).toBe('[filtered]'); + expect(event.context['secret']).toBe('[filtered]'); + expect(event.context['auth']).toBe('[filtered]'); + }); }); });