Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "hawk.workers",
"private": true,
"version": "0.1.1",
"version": "0.1.2",
"description": "Hawk workers",
"repository": "git@github.com:codex-team/hawk.workers.git",
"license": "BUSL-1.1",
Expand Down
2 changes: 1 addition & 1 deletion workers/grouper/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hawk-worker-grouper",
"version": "0.0.1",
"version": "0.0.2",
"description": "Accepts processed errors from language-workers and saves it to the DB with grouping of similar ones. ",
"main": "src/index.ts",
"repository": "https://github.com/codex-team/hawk.workers/tree/master/workers/grouper",
Expand Down
26 changes: 26 additions & 0 deletions workers/grouper/src/data-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ export default class DataFilter {
*/
private bankCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$/g;

/**
* MongoDB ObjectId Regex (24 hexadecimal characters)
*/
private objectIdRegex = /^[0-9a-fA-F]{24}$/;

/**
* UUID Regex - matches UUIDs with all dashes (8-4-4-4-12 format) or no dashes (32 hex chars)
*/
private uuidRegex = /^(?:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/;

/**
* Accept event and process 'addons' and 'context' fields.
* It mutates the original object
Expand Down Expand Up @@ -96,6 +106,22 @@ export default class DataFilter {
return value;
}

/**
* Check if value matches MongoDB ObjectId pattern (24 hex chars)
* ObjectIds should not be filtered
*/
if (this.objectIdRegex.test(value)) {
return value;
}

/**
* Check if value matches UUID pattern (with or without dashes)
* UUIDs should not be filtered
*/
if (this.uuidRegex.test(value)) {
return value;
}
Comment thread
neSpecc marked this conversation as resolved.

/**
* Remove all non-digit chars
*/
Expand Down
119 changes: 119 additions & 0 deletions workers/grouper/tests/data-filter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,124 @@ describe('GrouperWorker', () => {
expect(event.context['normalKey']).toBe(normalValue);
expect(event.addons['vue']['props']['normalKey']).toBe(normalValue);
});

test('should not filter UUID values that contain exactly 16 digits', async () => {
// These UUIDs contain exactly 16 digits, which when cleaned match PAN patterns
// Without UUID detection, they would be incorrectly filtered as credit cards
const uuidWithManyDigits = '4a1b2c3d-4e5f-6a7b-8c9d-0e1f2a3b4c5d'; // Cleans to 16 digits starting with 4
const uuidUpperCase = '5A1B2C3D-4E5F-6A7B-8C9D-0E1F2A3B4C5D'; // Cleans to 16 digits starting with 5
const uuidNoDashes = '2a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d'; // 32 hex chars without dashes

const event = generateEvent({
context: {
userId: uuidWithManyDigits,
sessionId: uuidUpperCase,
transactionId: uuidNoDashes,
},
addons: {
vue: {
props: {
componentId: uuidWithManyDigits,
},
},
},
});

dataFilter.processEvent(event);

expect(event.context['userId']).toBe(uuidWithManyDigits);
expect(event.context['sessionId']).toBe(uuidUpperCase);
expect(event.context['transactionId']).toBe(uuidNoDashes);
expect(event.addons['vue']['props']['componentId']).toBe(uuidWithManyDigits);
});

test('should not filter MongoDB ObjectId values that contain exactly 16 digits', async () => {
// These ObjectIds contain exactly 16 digits which when cleaned match PAN patterns
// Without ObjectId detection, they would be incorrectly filtered as credit cards
const objectIdWithManyDigits = '4111111111111111abcdefab'; // 16 digits + 8 hex letters = 24 chars, cleans to Visa pattern
const objectIdUpperCase = '5111111111111111ABCDEFAB'; // Cleans to Mastercard pattern
const objectIdMixedCase = '2111111111111111AbCdEfAb'; // Cleans to Maestro/Mastercard pattern

const event = generateEvent({
context: {
projectId: objectIdWithManyDigits,
workspaceId: objectIdUpperCase,
transactionId: objectIdMixedCase,
},
addons: {
hawk: {
projectId: objectIdWithManyDigits,
},
},
});

dataFilter.processEvent(event);

expect(event.context['projectId']).toBe(objectIdWithManyDigits);
expect(event.context['workspaceId']).toBe(objectIdUpperCase);
expect(event.context['transactionId']).toBe(objectIdMixedCase);
expect(event.addons['hawk']['projectId']).toBe(objectIdWithManyDigits);
});

test('should still filter actual PAN numbers with formatting characters', async () => {
// Test real Mastercard test number with spaces and dashes
const panWithSpaces = '5500 0000 0000 0004';
const panWithDashes = '5500-0000-0000-0004';

const event = generateEvent({
context: {
cardNumber: panWithSpaces,
paymentCard: panWithDashes,
},
});

dataFilter.processEvent(event);

expect(event.context['cardNumber']).toBe('[filtered]');
expect(event.context['paymentCard']).toBe('[filtered]');
});

test('should not filter values that are not UUIDs, ObjectIds, or PANs', async () => {
// These are edge cases that should NOT be filtered
const shortHex = '507f1f77bcf86cd7'; // 16 hex chars (not 24)
const longNumber = '67280841958304100309082499'; // 26 digits (too long for PAN)
const mixedAlphaNum = 'abc123def456ghi789'; // Mixed content

const event = generateEvent({
context: {
shortId: shortHex,
longId: longNumber,
mixedId: mixedAlphaNum,
},
});

dataFilter.processEvent(event);

expect(event.context['shortId']).toBe(shortHex);
expect(event.context['longId']).toBe(longNumber);
expect(event.context['mixedId']).toBe(mixedAlphaNum);
});

test('should filter UUIDs and ObjectIds when they are in sensitive key fields', async () => {
// Even if the value is a valid UUID or ObjectId, it should be filtered
// if the key name is in the sensitive keys list
const uuid = '550e8400-e29b-41d4-a716-446655440000';
const objectId = '507f1f77bcf86cd799439011';

const event = generateEvent({
context: {
password: uuid,
secret: objectId,
auth: '672808419583041003090824',
},
});

dataFilter.processEvent(event);

// All should be filtered because of sensitive key names
expect(event.context['password']).toBe('[filtered]');
expect(event.context['secret']).toBe('[filtered]');
expect(event.context['auth']).toBe('[filtered]');
});
});
});
Loading