Skip to content

Commit 26d29e6

Browse files
github-actions[bot]e11syTatianaFomina
authored
Update prod (#432)
* imp(): update node version (#429) * chore: imp convertor script (#431) * chore(): disable grouping by levenstein (#433) * chore(): disable grouping by levenstein * chore(): rm redundant test * chore: lint fix * fix(groupper): Remove sending duplicate key error (#438) * Remove sending error * Lint --------- Co-authored-by: e11sy <130844513+e11sy@users.noreply.github.com> Co-authored-by: Tatiana Fomina <fomina.tatianaaa@yandex.ru>
1 parent b80e5d7 commit 26d29e6

File tree

3 files changed

+84
-68
lines changed

3 files changed

+84
-68
lines changed

convertors/move-timestamp-out-of-payload.js

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
require('dotenv').config();
2+
require('process');
23
const { MongoClient } = require('mongodb');
34

45
/**
@@ -15,7 +16,12 @@ async function movePayloadTimestampToEventLevel(db, collectionName) {
1516

1617
const docsToUpdate = collection.find(
1718
{ timestamp: { $exists: false } },
18-
{ projection: { _id: 1, 'payload.timestamp': 1 } }
19+
{
20+
projection: {
21+
_id: 1,
22+
'payload.timestamp': 1,
23+
},
24+
}
1925
).limit(documentsSelectionLimit);
2026

2127
const batchedOps = [];
@@ -33,11 +39,11 @@ async function movePayloadTimestampToEventLevel(db, collectionName) {
3339
updateOne: {
3440
filter: { _id: doc._id },
3541
update: {
36-
$set: { timestamp: Number(doc.payload.timestamp)},
37-
$unset: {'payload.timestamp': ''},
38-
}
39-
}
40-
})
42+
$set: { timestamp: Number(doc.payload.timestamp) },
43+
$unset: { 'payload.timestamp': '' },
44+
},
45+
},
46+
});
4147

4248
currentCount++;
4349
}
@@ -46,7 +52,7 @@ async function movePayloadTimestampToEventLevel(db, collectionName) {
4652
await collection.bulkWrite(batchedOps);
4753
}
4854

49-
return currentCount
55+
return currentCount;
5056
}
5157
/**
5258
* @param db - mongo db instance
@@ -57,15 +63,21 @@ async function backfillTimestampsFromEvents(db, repetitionCollectionName, projec
5763
const repetitions = db.collection(repetitionCollectionName);
5864
const events = db.collection(`events:${projectId}`);
5965

60-
let bulkOps = [];
66+
const bulkOps = [];
6167
let repetitionCount = 1;
6268

6369
const repetitionsList = await repetitions.find(
6470
{
6571
timestamp: { $exists: false },
6672
},
67-
{ projection: { _id: 1, groupHash: 1 } }
68-
).limit(documentsSelectionLimit).toArray();
73+
{
74+
projection: {
75+
_id: 1,
76+
groupHash: 1,
77+
},
78+
}
79+
).limit(documentsSelectionLimit)
80+
.toArray();
6981

7082
const groupHashList = [];
7183

@@ -77,24 +89,29 @@ async function backfillTimestampsFromEvents(db, repetitionCollectionName, projec
7789

7890
const relatedEvents = await events.find(
7991
{ groupHash: { $in: groupHashList } },
80-
{ projection: { timestamp: 1, groupHash: 1 } }
92+
{
93+
projection: {
94+
timestamp: 1,
95+
groupHash: 1,
96+
},
97+
}
8198
).toArray();
8299

83-
const relatedEventsMap = new Map()
100+
const relatedEventsMap = new Map();
84101

85102
relatedEvents.forEach(e => {
86103
relatedEventsMap.set(e.groupHash, e);
87-
})
104+
});
88105

89106
for (const repetition of repetitionsList) {
90107
const relatedEvent = relatedEventsMap.get(repetition.groupHash);
91108

92109
if (!relatedEvent) {
93110
bulkOps.push({
94111
deleteOne: {
95-
filter: { _id: repetition._id }
96-
}
97-
})
112+
filter: { _id: repetition._id },
113+
},
114+
});
98115
} else if (relatedEvent?.timestamp !== null) {
99116
bulkOps.push({
100117
updateOne: {
@@ -111,11 +128,12 @@ async function backfillTimestampsFromEvents(db, repetitionCollectionName, projec
111128
const result = await repetitions.bulkWrite(bulkOps);
112129
const updated = result.modifiedCount;
113130
const deleted = result.deletedCount;
131+
114132
processed = bulkOps.length;
115133
console.log(` updates (${processed} processed, ${updated} updated, ${deleted} deleted)`);
116134

117135
if (updated + deleted === 0) {
118-
repetitionCollectionsToCheck.filter(collection => collection !== repetition)
136+
repetitionCollectionsToCheck.filter(collection => collection !== repetition);
119137
}
120138
}
121139

@@ -126,7 +144,7 @@ async function backfillTimestampsFromEvents(db, repetitionCollectionName, projec
126144
* Method that runs convertor script
127145
*/
128146
async function run() {
129-
const fullUri = 'mongodb://hawk_new:evieg9bauK0ahs2youhoh7aer7kohT@rc1d-2jltinutse1eadfs.mdb.yandexcloud.net:27018/hawk_events?authSource=admin&replicaSet=rs01&tls=true&tlsInsecure=true';
147+
const fullUri = process.env.MONGO_EVENTS_DATABASE_URI;
130148

131149
// Parse the Mongo URL manually
132150
const mongoUrl = new URL(fullUri);
@@ -174,13 +192,13 @@ async function run() {
174192

175193
// Convert events
176194
let i = 1;
177-
let documentsUpdatedCount = 1
195+
let documentsUpdatedCount = 1;
178196

179197
while (documentsUpdatedCount != 0) {
180198
documentsUpdatedCount = 0;
181199
i = 1;
182200
const collectionsToUpdateCount = eventCollectionsToCheck.length;
183-
201+
184202
for (const collectionName of eventCollectionsToCheck) {
185203
console.log(`[${i}/${collectionsToUpdateCount}] Processing ${collectionName}`);
186204
const updated = await movePayloadTimestampToEventLevel(db, collectionName);
@@ -189,10 +207,10 @@ async function run() {
189207
eventCollectionsToCheck = eventCollectionsToCheck.filter(collection => collection !== collectionName);
190208
}
191209

192-
documentsUpdatedCount += updated
210+
documentsUpdatedCount += updated;
193211
i++;
194212
}
195-
}
213+
}
196214

197215
// Convert repetitions + backfill from events
198216
documentsUpdatedCount = 1;

workers/grouper/src/index.ts

Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@ import type {
1919
import type { RepetitionDBScheme } from '../types/repetition';
2020
import { DatabaseReadWriteError, DiffCalculationError, ValidationError } from '../../../lib/workerErrors';
2121
import { decodeUnsafeFields, encodeUnsafeFields } from '../../../lib/utils/unsafeFields';
22-
import HawkCatcher from '@hawk.so/nodejs';
2322
import { MS_IN_SEC } from '../../../lib/utils/consts';
2423
import DataFilter from './data-filter';
2524
import RedisHelper from './redisHelper';
26-
import levenshtein from 'js-levenshtein';
25+
// import levenshtein from 'js-levenshtein';
2726
import { computeDelta } from './utils/repetitionDiff';
2827
import TimeMs from '../../../lib/utils/time';
2928
import { rightTrim } from '../../../lib/utils/string';
@@ -109,7 +108,7 @@ export default class GrouperWorker extends Worker {
109108
let existedEvent = await this.getEvent(task.projectId, uniqueEventHash);
110109

111110
/**
112-
* If we couldn't group by group hash (title), try grouping by Levenshtein distance or patterns
111+
* If we couldn't group by group hash (title), try grouping by patterns
113112
*/
114113
if (!existedEvent) {
115114
const similarEvent = await this.findSimilarEvent(task.projectId, task.payload);
@@ -177,7 +176,6 @@ export default class GrouperWorker extends Worker {
177176
* and we need to process this event as repetition
178177
*/
179178
if (e.code?.toString() === DB_DUPLICATE_KEY_ERROR) {
180-
HawkCatcher.send(new Error('[Grouper] MongoError: E11000 duplicate key error collection'));
181179
await this.handle(task);
182180

183181
return;
@@ -287,35 +285,35 @@ export default class GrouperWorker extends Worker {
287285
* @param event - event to compare
288286
*/
289287
private async findSimilarEvent(projectId: string, event: EventData<EventAddons>): Promise<GroupedEventDBScheme | undefined> {
290-
const eventsCountToCompare = 60;
291-
const diffTreshold = 0.35;
288+
// const eventsCountToCompare = 60;
289+
// const diffTreshold = 0.35;
292290

293-
const lastUniqueEvents = await this.findLastEvents(projectId, eventsCountToCompare);
291+
// const lastUniqueEvents = await this.findLastEvents(projectId, eventsCountToCompare);
294292

295293
/**
296294
* Trim titles to reduce CPU usage for Levenshtein comparison
297295
*/
298-
const trimmedEventTitle = hasValue(event.title) ? rightTrim(event.title, MAX_CODE_LINE_LENGTH) : '';
296+
// const trimmedEventTitle = hasValue(event.title) ? rightTrim(event.title, MAX_CODE_LINE_LENGTH) : '';
299297

300298
/**
301299
* First try to find by Levenshtein distance
302300
*/
303-
const similarByLevenshtein = lastUniqueEvents.filter(prevEvent => {
304-
const trimmedPrevTitle = hasValue(prevEvent.payload.title) ? rightTrim(prevEvent.payload.title, MAX_CODE_LINE_LENGTH) : '';
301+
// const similarByLevenshtein = lastUniqueEvents.filter(prevEvent => {
302+
// const trimmedPrevTitle = hasValue(prevEvent.payload.title) ? rightTrim(prevEvent.payload.title, MAX_CODE_LINE_LENGTH) : '';
305303

306-
if (trimmedEventTitle === '' || trimmedPrevTitle === '') {
307-
return false;
308-
}
304+
// if (trimmedEventTitle === '' || trimmedPrevTitle === '') {
305+
// return false;
306+
// }
309307

310-
const distance = levenshtein(trimmedEventTitle, trimmedPrevTitle);
311-
const threshold = trimmedEventTitle.length * diffTreshold;
308+
// const distance = levenshtein(trimmedEventTitle, trimmedPrevTitle);
309+
// const threshold = trimmedEventTitle.length * diffTreshold;
312310

313-
return distance < threshold;
314-
}).pop();
311+
// return distance < threshold;
312+
// }).pop();
315313

316-
if (similarByLevenshtein) {
317-
return similarByLevenshtein;
318-
}
314+
// if (similarByLevenshtein) {
315+
// return similarByLevenshtein;
316+
// }
319317

320318
/**
321319
* If no match by Levenshtein, try matching by patterns
@@ -402,23 +400,23 @@ export default class GrouperWorker extends Worker {
402400
* @param count - how many events to return
403401
* @returns {GroupedEventDBScheme[]} list of the last N unique events
404402
*/
405-
private findLastEvents(projectId: string, count: number): Promise<GroupedEventDBScheme[]> {
406-
return this.cache.get(`last:${count}:eventsOf:${projectId}`, async () => {
407-
return this.eventsDb.getConnection()
408-
.collection(`events:${projectId}`)
409-
.find()
410-
.sort({
411-
_id: 1,
412-
})
413-
.limit(count)
414-
.toArray();
415-
},
416-
/**
417-
* TimeMs class stores time intervals in milliseconds, however NodeCache ttl needs to be specified in seconds
418-
*/
419-
/* eslint-disable-next-line @typescript-eslint/no-magic-numbers */
420-
TimeMs.MINUTE / 1000);
421-
}
403+
// private findLastEvents(projectId: string, count: number): Promise<GroupedEventDBScheme[]> {
404+
// return this.cache.get(`last:${count}:eventsOf:${projectId}`, async () => {
405+
// return this.eventsDb.getConnection()
406+
// .collection(`events:${projectId}`)
407+
// .find()
408+
// .sort({
409+
// _id: 1,
410+
// })
411+
// .limit(count)
412+
// .toArray();
413+
// },
414+
// /**
415+
// * TimeMs class stores time intervals in milliseconds, however NodeCache ttl needs to be specified in seconds
416+
// */
417+
// /* eslint-disable-next-line @typescript-eslint/no-magic-numbers */
418+
// TimeMs.MINUTE / 1000);
419+
// }
422420

423421
/**
424422
* Decides whether to increase the number of affected users for the repetition and the daily aggregation

workers/grouper/tests/index.test.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -457,17 +457,17 @@ describe('GrouperWorker', () => {
457457
});
458458

459459
describe('Grouping', () => {
460-
test('should group events with partially different titles', async () => {
461-
await worker.handle(generateTask({ title: 'Some error (but not filly identical) example' }));
462-
await worker.handle(generateTask({ title: 'Some error (yes, it is not the identical) example' }));
463-
await worker.handle(generateTask({ title: 'Some error (and it is not identical) example' }));
460+
// test('should group events with partially different titles', async () => {
461+
// await worker.handle(generateTask({ title: 'Some error (but not filly identical) example' }));
462+
// await worker.handle(generateTask({ title: 'Some error (yes, it is not the identical) example' }));
463+
// await worker.handle(generateTask({ title: 'Some error (and it is not identical) example' }));
464464

465-
const originalEvent = await eventsCollection.findOne({});
465+
// const originalEvent = await eventsCollection.findOne({});
466466

467-
expect((await repetitionsCollection.find({
468-
groupHash: originalEvent.groupHash,
469-
}).toArray()).length).toBe(2);
470-
});
467+
// expect((await repetitionsCollection.find({
468+
// groupHash: originalEvent.groupHash,
469+
// }).toArray()).length).toBe(2);
470+
// });
471471

472472
describe('Pattern matching', () => {
473473
beforeEach(() => {

0 commit comments

Comments
 (0)