Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.test
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ REPORT_NOTIFY_URL=http://mock.com/

# Url for connecting to Redis
REDIS_URL=redis://localhost:6379

# Disable memoization in tests
MEMOIZATION_TTL=-1
107 changes: 62 additions & 45 deletions workers/grouper/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
SourceCodeLine,
ProjectEventGroupingPatternsDBScheme,
ErrorsCatcherType,
CatcherMessagePayload
} from '@hawk.so/types';
import type { RepetitionDBScheme } from '../types/repetition';
import { DatabaseReadWriteError, DiffCalculationError, ValidationError } from '../../../lib/workerErrors';
Expand All @@ -23,9 +22,16 @@
import DataFilter from './data-filter';
import RedisHelper from './redisHelper';
import { computeDelta } from './utils/repetitionDiff';
import TimeMs from '../../../lib/utils/time';
import { rightTrim } from '../../../lib/utils/string';
import { hasValue } from '../../../lib/utils/hasValue';
/* eslint-disable-next-line no-unused-vars */
import { memoize } from '../../../lib/memoize';

/**
* eslint does not count decorators as a variable usage
*/
/* eslint-disable-next-line no-unused-vars */
const MEMOIZATION_TTL = Number(process.env.MEMOIZATION_TTL ?? 0);

/**
* Error code of MongoDB key duplication error
Expand Down Expand Up @@ -110,32 +116,38 @@
}
}

/**
* Find event by group hash.
*/
let existedEvent = await this.getEvent(task.projectId, uniqueEventHash);
let existedEvent: GroupedEventDBScheme;

/**
* If we couldn't group by group hash (title), try grouping by patterns
* Find similar events by grouping pattern
*/
if (!existedEvent) {
const similarEvent = await this.findSimilarEvent(task.projectId, task.payload);
const similarEvent = await this.findSimilarEvent(task.projectId, task.payload.title);

if (similarEvent) {
this.logger.info(`similar event: ${JSON.stringify(similarEvent)}`);
/**
* Override group hash with found event's group hash
*/
uniqueEventHash = similarEvent.groupHash;
if (similarEvent) {
this.logger.info(`similar event: ${JSON.stringify(similarEvent)}`);

existedEvent = similarEvent;
}
/**
* Override group hash with found event's group hash
*/
uniqueEventHash = similarEvent.groupHash;

existedEvent = similarEvent;
}

/**
* If we couldn't group by grouping pattern — try grouping bt hash (title)
*/
else {
/**
* Find event by group hash.
*/
existedEvent = await this.getEvent(task.projectId, uniqueEventHash);
}

/**
* Event happened for the first time
*/
const isFirstOccurrence = existedEvent === null;
const isFirstOccurrence = !existedEvent && !similarEvent;

let repetitionId = null;

Expand Down Expand Up @@ -281,6 +293,10 @@
};
});
});

if (event.backtrace.length === 0) {
event.backtrace = null;
}
}

/**
Expand All @@ -296,31 +312,37 @@
});
}

/**
* Method that is used to retrieve the first original event that satisfies the grouping pattern
* @param pattern - event should satisfy this pattern
*/
@memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'concat' })
private async findFirstEventByPattern(pattern: string, projectId: string) {
return await this.eventsDb.getConnection()

Check failure on line 321 in workers/grouper/src/index.ts

View workflow job for this annotation

GitHub Actions / ESlint

Missing return type on function
.collection(`events:${projectId}`)
.findOne(
{ 'payload.title': { $regex: pattern } },
);
}

/**
* Tries to find events with a small Levenshtein distance of a title or by matching grouping patterns
*
* @param projectId - where to find
* @param event - event to compare
* @param title - title of the event to find similar one
*/
private async findSimilarEvent(projectId: string, event: EventData<EventAddons>): Promise<GroupedEventDBScheme | undefined> {
private async findSimilarEvent(projectId: string, title: string): Promise<GroupedEventDBScheme | undefined> {
/**
* If no match by Levenshtein, try matching by patterns
*/
const patterns = await this.getProjectPatterns(projectId);

if (patterns && patterns.length > 0) {
const matchingPattern = await this.findMatchingPattern(patterns, event);
const matchingPattern = await this.findMatchingPattern(patterns, title);

if (matchingPattern !== null && matchingPattern !== undefined) {
try {
const originalEvent = await this.cache.get(`${projectId}:${matchingPattern._id}:originalEvent`, async () => {
return await this.eventsDb.getConnection()
.collection(`events:${projectId}`)
.findOne(
{ 'payload.title': { $regex: matchingPattern.pattern } },
{ sort: { _id: 1 } }
);
});
const originalEvent = await this.findFirstEventByPattern(matchingPattern.pattern, projectId);

this.logger.info(`original event for pattern: ${JSON.stringify(originalEvent)}`);

Expand All @@ -340,12 +362,13 @@
* Method that returns matched pattern for event, if event do not match any of patterns return null
*
* @param patterns - list of the patterns of the related project
* @param event - event which title would be cheched
* @param title - title of the event to check for pattern match
* @returns {ProjectEventGroupingPatternsDBScheme | null} matched pattern object or null if no match
*/
@memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'hash' })
private async findMatchingPattern(
patterns: ProjectEventGroupingPatternsDBScheme[],
event: CatcherMessagePayload<ErrorsCatcherType>
title: string,
): Promise<ProjectEventGroupingPatternsDBScheme | null> {
if (!patterns || patterns.length === 0) {
return null;
Expand All @@ -354,7 +377,7 @@
return patterns.filter(pattern => {
const patternRegExp = new RegExp(pattern.pattern);

return event.title.match(patternRegExp);
return title.match(patternRegExp);
}).pop() || null;
}

Expand All @@ -364,21 +387,15 @@
* @param projectId - id of the project to find related event patterns
* @returns {ProjectEventGroupingPatternsDBScheme[]} EventPatterns object with projectId and list of patterns
*/
@memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'concat' })
private async getProjectPatterns(projectId: string): Promise<ProjectEventGroupingPatternsDBScheme[]> {
return this.cache.get(`project:${projectId}:patterns`, async () => {
const project = await this.accountsDb.getConnection()
.collection('projects')
.findOne({
_id: new mongodb.ObjectId(projectId),
});
const project = await this.accountsDb.getConnection()
.collection('projects')
.findOne({
_id: new mongodb.ObjectId(projectId),
});

return project?.eventGroupingPatterns || [];
},
/**
* Cache project patterns for 5 minutes since they don't change frequently
*/
/* eslint-disable-next-line @typescript-eslint/no-magic-numbers */
5 * TimeMs.MINUTE / MS_IN_SEC);
return project?.eventGroupingPatterns || [];
}

/**
Expand Down
72 changes: 72 additions & 0 deletions workers/grouper/tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,78 @@ describe('GrouperWorker', () => {
expect(await repetitionsCollection.find().count()).toBe(1);
});
});

describe('dynamic pattern addition', () => {
test('should group events firslty by pattern, secondly by grouphash', async () => {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
test('should group events firslty by pattern, secondly by grouphash', async () => {
test('should group events when pattern added after we received the first event', async () => {

/**
* Remove all existing patterns from the project
*/
jest.spyOn(GrouperWorker.prototype as any, 'getProjectPatterns').mockResolvedValue([]);

/**
* Two nearly identical titles that could be grouped by `New error .*` pattern
*/
const firstTitle = 'Dynamic pattern error 1111111111111111';
const secondTitle = 'Dynamic pattern error 2222222222222222';

await worker.handle(generateTask({ title: firstTitle }));
await worker.handle(generateTask({ title: secondTitle }));

const originalsBefore = await eventsCollection.find().toArray();
expect(originalsBefore.length).toBe(2);

const originalA = originalsBefore.find(e => e.payload.title === firstTitle)!;
const originalB = originalsBefore.find(e => e.payload.title === secondTitle)!;
expect(originalA).toBeTruthy();
expect(originalB).toBeTruthy();

/**
* Two events should be stored separately since grouping patterns of the project were empty
*/
expect(originalA.groupHash).not.toBe(originalB.groupHash);

jest.spyOn(GrouperWorker.prototype as any, 'getProjectPatterns').mockResolvedValue([
{
_id: new mongodb.ObjectId(),
pattern: 'Dynamic pattern error .*',
}
]);

/**
* Second title should be grouped with first event that matches inserted grouping pattern
* It should not be grouped with the existing event with same item because it violates grouping pattern logic
*/
await worker.handle(generateTask({ title: secondTitle }));

const allEvents = await eventsCollection.find().toArray();
const allRepetitions = await repetitionsCollection.find().toArray();

/**
* Should still be only 2 original event documents in the DB
*/
expect(allEvents.length).toBe(2);

const refreshedOriginalA = await eventsCollection.findOne({ _id: originalA._id });
const refreshedOriginalB = await eventsCollection.findOne({ _id: originalB._id });

// totalCount: originalA should have 2 (1 original + 1 new repetition),
// originalB should remain 1.
expect(refreshedOriginalA?.totalCount).toBe(2);
expect(refreshedOriginalB?.totalCount).toBe(1);

// Repetitions should be 1 and must reference originalA's groupHash
expect(allRepetitions.length).toBe(1);
allRepetitions.forEach(rep => {
expect(rep.groupHash).toBe(refreshedOriginalA!.groupHash);
});

/**
* Original B should have zero repetitions despite same title with latest event passed
*/
const repsForOriginalB = await repetitionsCollection.find({ groupHash: refreshedOriginalB!.groupHash }).count();
expect(repsForOriginalB).toBe(0);
});
});
});

describe('Event marks handling', () => {
Expand Down
Loading