-
Notifications
You must be signed in to change notification settings - Fork 1
fix(grouper): identify events similarity only when titles are same #410
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,7 +14,7 @@ | |
| import { MS_IN_SEC } from '../../../lib/utils/consts'; | ||
| import DataFilter from './data-filter'; | ||
| import RedisHelper from './redisHelper'; | ||
| import levenshtein from 'js-levenshtein'; | ||
|
Check warning on line 17 in workers/grouper/src/index.ts
|
||
| import { computeDelta } from './utils/repetitionDiff'; | ||
| import TimeMs from '../../../lib/utils/time'; | ||
|
|
||
|
|
@@ -244,22 +244,24 @@ | |
| */ | ||
| private async findSimilarEvent(projectId: string, event: EventDataAccepted<EventAddons>): Promise<GroupedEventDBScheme | undefined> { | ||
| const eventsCountToCompare = 60; | ||
| const diffTreshold = 0.35; | ||
|
Check warning on line 247 in workers/grouper/src/index.ts
|
||
|
|
||
| const lastUniqueEvents = await this.findLastEvents(projectId, eventsCountToCompare); | ||
|
|
||
| /** | ||
| * First try to find by Levenshtein distance | ||
| * First try to find similar event by title | ||
| */ | ||
| const similarByLevenshtein = lastUniqueEvents.filter(prevEvent => { | ||
| const distance = levenshtein(event.title, prevEvent.payload.title); | ||
| const threshold = event.title.length * diffTreshold; | ||
| const similarByTitle = lastUniqueEvents.filter(prevEvent => { | ||
| // const distance = levenshtein(event.title, prevEvent.payload.title); | ||
|
|
||
| return distance < threshold; | ||
| // const threshold = event.title.length * diffTreshold; | ||
|
|
||
| // return distance < threshold; | ||
| return event.title.toLowerCase() === prevEvent.payload.title.toLowerCase(); | ||
| }).pop(); | ||
|
Comment on lines
+253
to
260
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets just comment out grouping by Levenshtein and implement grouping by title below. |
||
|
|
||
| if (similarByLevenshtein) { | ||
| return similarByLevenshtein; | ||
| if (similarByTitle) { | ||
| return similarByTitle; | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -472,17 +472,17 @@ | |
| }); | ||
|
|
||
| describe('Grouping', () => { | ||
| test('should group events with partially different titles', async () => { | ||
| await worker.handle(generateTask({ title: 'Some error (but not filly identical) example' })); | ||
| await worker.handle(generateTask({ title: 'Some error (yes, it is not the identical) example' })); | ||
| await worker.handle(generateTask({ title: 'Some error (and it is not identical) example' })); | ||
| // test('should group events with partially different titles', async () => { | ||
| // await worker.handle(generateTask({ title: 'Some error (but not filly identical) example' })); | ||
| // await worker.handle(generateTask({ title: 'Some error (yes, it is not the identical) example' })); | ||
| // await worker.handle(generateTask({ title: 'Some error (and it is not identical) example' })); | ||
|
|
||
| const originalEvent = await eventsCollection.findOne({}); | ||
| // const originalEvent = await eventsCollection.findOne({}); | ||
|
|
||
| expect((await repetitionsCollection.find({ | ||
| groupHash: originalEvent.groupHash, | ||
| }).toArray()).length).toBe(2); | ||
| }); | ||
| // expect((await repetitionsCollection.find({ | ||
| // groupHash: originalEvent.groupHash, | ||
| // }).toArray()).length).toBe(2); | ||
| // }); | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add test for grouping by title |
||
| describe('Pattern matching', () => { | ||
| beforeEach(() => { | ||
|
|
@@ -490,8 +490,8 @@ | |
| }); | ||
|
|
||
| test('should group events with titles matching one pattern', async () => { | ||
| jest.spyOn(GrouperWorker.prototype as any, 'getProjectPatterns').mockResolvedValue([ 'New error .*' ]); | ||
| const findMatchingPatternSpy = jest.spyOn(GrouperWorker.prototype as any, 'findMatchingPattern'); | ||
|
|
||
| await worker.handle(generateTask({ title: 'New error 0000000000000000' })); | ||
| await worker.handle(generateTask({ title: 'New error 1111111111111111' })); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably, we can find event by title using mongo. We dont need to get 60 events for that.