diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..0f13dca4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,96 @@ +Business Source License 1.1 + +Parameters + +Licensor: CodeX (Hawk) +Licensed Work: hawk.workers (https://github.com/codex-team/hawk.workers) + The Licensed Work is © 2025 CodeX +Additional Use Grant: Self-hosted use for own/internal needs and research/evaluation is permitted. + Using this code to provide a competing error-tracking SaaS or commercial + hosted service without the Licensor’s prior permission is prohibited. + +Change Date: 2030-01-01 + +Change License: AGPL-3.0 + +------------------------------------------------------------------------------- + +License text copyright © 2024 MariaDB plc, All Rights Reserved. +“Business Source License” is a trademark of MariaDB plc. + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). TO THE EXTENT PERMITTED BY +APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN “AS IS” BASIS. LICENSOR +HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING +(WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE, NON-INFRINGEMENT, AND TITLE. MariaDB hereby grants you permission to +use this License’s text to license your works, and to refer to it using the +trademark “Business Source License”, as long as you comply with the Covenants +of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date not later than the fourth anniversary of the first + publicly available distribution of a specific version of the Licensed Work + under this License. + +4. Not to modify this License in any other way. + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +For more information on the use of the Business Source License for MariaDB +products, please visit the MariaDB Business Source License FAQ. +For more information on the use of the Business Source License generally, +please visit the Adopting and Developing Business Source License FAQ. diff --git a/README.md b/README.md index c6fd3846..371a196e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ # Hawk Workers +![License](https://img.shields.io/badge/license-BSL--1.1-orange) + Workers are services for processing hawk's background tasks @@ -219,3 +221,8 @@ yarn migrate Refactor mongo-migrate commands to have an opportunity to create or rollback [More details](https://www.npmjs.com/package/migrate-mongo) + +## License + +Source code is available under **Business Source License 1.1 (BSL 1.1)**. +See [`LICENSE`](./LICENSE) for terms, including: diff --git a/lib/memoize/index.test.ts b/lib/memoize/index.test.ts new file mode 100644 index 00000000..359c5b1e --- /dev/null +++ b/lib/memoize/index.test.ts @@ -0,0 +1,229 @@ +/* eslint-disable + no-unused-vars, + @typescript-eslint/explicit-function-return-type, + @typescript-eslint/no-unused-vars-experimental, + jsdoc/require-param-description +*/ +/** + * Ignore eslint jsdoc rules for mocked class + * Ignore eslint unused vars rule for decorator + */ + +import { memoize } from './index'; +import Crypto from '../utils/crypto'; + +describe('memoize decorator — per-test inline classes', () => { + afterEach(() => { + jest.useRealTimers(); + jest.restoreAllMocks(); + jest.clearAllMocks(); + }); + + it('should memoize return value with concat strategy across several calls', async () => { + class Sample { + public calls = 0; + + @memoize({ strategy: 'concat', ttl: 60_000, max: 50 }) + public async run(a: number, b: string) { + this.calls += 1; + return `${a}-${b}`; + } + } + + const sample = new Sample(); + + /** + * First call should memoize the method + */ + expect(await sample.run(1, 'x')).toBe('1-x'); + /** + * In this case + */ + expect(await sample.run(1, 'x')).toBe('1-x'); + expect(await sample.run(1, 'x')).toBe('1-x'); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value with set of arguments with concat strategy across several calls', async () => { + class Sample { + public calls = 0; + + @memoize({ strategy: 'concat' }) + public async run(a: unknown, b: unknown) { + this.calls += 1; + return `${String(a)}|${String(b)}`; + } + } + + const sample = new Sample(); + + /** + * Fill the memoization cache with values + */ + await sample.run(1, 'a'); + await sample.run(2, 'a'); + await sample.run(1, 'b'); + await sample.run(true, false); + await sample.run(undefined, null); + + expect(sample.calls).toBe(5); + + /** + * Those calls should not call the original method, they should return from memoize + */ + await sample.run(1, 'a'); + await sample.run(2, 'a'); + await sample.run(1, 'b'); + await sample.run(true, false); + await sample.run(undefined, null); + + expect(sample.calls).toBe(5); + }); + + it('should memoize return value for stringified objects across several calls', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'concat' }) + public async run(x: unknown, y: unknown) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + const o1 = { a: 1 }; + const o2 = { b: 2 }; + + await sample.run(o1, o2); + await sample.run(o1, o2); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value for method with non-default arguments (NaN, Infinity, -0, Symbol, Date, RegExp) still cache same-args', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'concat' }) + public async run(...args: unknown[]) { + this.calls += 1; + return args.map(String).join(','); + } + } + const sample = new Sample(); + + const sym = Symbol('t'); + const d = new Date('2020-01-01T00:00:00Z'); + const re = /a/i; + + const first = await sample.run(NaN, Infinity, -0, sym, d, re); + const second = await sample.run(NaN, Infinity, -0, sym, d, re); + + expect(second).toBe(first); + expect(sample.calls).toBe(1); + }); + + it('should call crypto hash with blake2b512 algo and base64url digest, should memoize return value with hash strategy', async () => { + const hashSpy = jest.spyOn(Crypto, 'hash'); + + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(...args: unknown[]) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({a: 1}, undefined, 0); + await sample.run({a: 1}, undefined, 0); + + expect(hashSpy).toHaveBeenCalledWith([{a: 1}, undefined, 0], 'blake2b512', 'base64url'); + expect(sample.calls).toBe(1); + }); + + it('should not memoize return value with hash strategy and different arguments', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(...args: unknown[]) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({ v: 1 }); + await sample.run({ v: 2 }); + await sample.run({ v: 3 }); + + expect(sample.calls).toBe(3); + }); + + it('should memoize return value with hash strategy across several calls with same args', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(arg: unknown) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({ a: 1 }); + await sample.run({ a: 1 }); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value exactly for passed ttl millis', async () => { + jest.resetModules(); + jest.useFakeTimers({ legacyFakeTimers: false }); + jest.setSystemTime(new Date('2025-01-01T00:00:00Z')); + + const { memoize: memoizeWithMockedTimers } = await import('../memoize/index'); + + class Sample { + public calls = 0; + @memoizeWithMockedTimers({ strategy: 'concat', ttl: 1_000 }) + public async run(x: string) { + this.calls += 1; + return x; + } + } + const sample = new Sample(); + + await sample.run('k1'); + expect(sample.calls).toBe(1); + + /** + * Skip time beyond the ttl + */ + jest.advanceTimersByTime(1_001); + + await sample.run('k1'); + expect(sample.calls).toBe(2); + + }); + + it('error calls should never be momized', async () => { + class Sample { + public calls = 0; + @memoize() + public async run(x: number) { + this.calls += 1; + if (x === 1) throw new Error('boom'); + return x * 2; + } + } + const sample = new Sample(); + + /** + * Compute with throw + */ + await expect(sample.run(1)).rejects.toThrow('boom'); + await expect(sample.run(1)).rejects.toThrow('boom'); + expect(sample.calls).toBe(2); + }); +}); diff --git a/lib/memoize/index.ts b/lib/memoize/index.ts new file mode 100644 index 00000000..10430b69 --- /dev/null +++ b/lib/memoize/index.ts @@ -0,0 +1,98 @@ +import LRUCache from 'lru-cache'; +import Crypto from '../utils/crypto'; + +/** + * Pick the strategy of cache key form + * It could be concatenated list of arguments like 'projectId:eventId' + * Or it could be hashed json object — blake2b512 algorithm + */ +export type MemoizeKeyStrategy = 'concat' | 'hash'; + +/** + * Options of the memoize decorator + */ +export interface MemoizeOptions { + /** + * Max number of values stored in LRU cache at the same time + */ + max?: number; + + /** + * TTL in milliseconds + */ + ttl?: number; + + /** + * Strategy for key generation + */ + strategy?: MemoizeKeyStrategy; +} + +/** + * Async-only, per-method LRU-backed memoization decorator. + * Cache persists for the lifetime of the class instance (e.g. worker). + * + * @param options + */ +export function memoize(options: MemoizeOptions = {}): MethodDecorator { + /* eslint-disable @typescript-eslint/no-magic-numbers */ + const { + max = 50, + ttl = 1000 * 60 * 30, + strategy = 'concat', + } = options; + /* eslint-enable */ + + return function ( + _target, + propertyKey, + descriptor: PropertyDescriptor + ): PropertyDescriptor { + const originalMethod = descriptor.value; + + if (typeof originalMethod !== 'function') { + throw new Error('@Memoize can only decorate methods'); + } + + descriptor.value = async function (...args: unknown[]): Promise { + /** + * Create a cache key for each decorated method + */ + const cacheKey = `memoizeCache:${String(propertyKey)}`; + + /** + * Create a new cache if it does not exists yet (for certain function) + */ + const cache: LRUCache = this[cacheKey] ??= new LRUCache({ + max, + maxAge: ttl, + }); + + const key = strategy === 'hash' + ? Crypto.hash(args, 'blake2b512', 'base64url') + : args.map((arg) => JSON.stringify(arg)).join('__ARG_JOIN__'); + + /** + * Check if we have a cached result + */ + const cachedResult = cache.get(key); + + if (cachedResult !== undefined) { + return cachedResult; + } + + try { + const result = await originalMethod.apply(this, args); + + cache.set(key, result); + + return result; + } catch (err) { + cache.del(key); + throw err; + } + }; + + return descriptor; + }; +} diff --git a/lib/utils/crypto.ts b/lib/utils/crypto.ts index f4785c8a..74f1a2c1 100644 --- a/lib/utils/crypto.ts +++ b/lib/utils/crypto.ts @@ -1,4 +1,4 @@ -import crypto from 'crypto'; +import crypto, { BinaryToTextEncoding } from 'crypto'; /** * Crypto helper @@ -9,12 +9,13 @@ export default class Crypto { * * @param value — data to be hashed * @param algo — type of algorithm to be used for hashing + * @param digest - type of the representation of the hashed value */ - public static hash(value: unknown, algo = 'sha256'): string { - const stringifiedValue = JSON.stringify(value); + public static hash(value: unknown, algo = 'sha256', digest: BinaryToTextEncoding = 'hex'): string { + const stringifiedValue = typeof value === 'string' ? value : JSON.stringify(value); return crypto.createHash(algo) .update(stringifiedValue) - .digest('hex'); + .digest(digest); } } diff --git a/migrations/20250917000000-create-release-project-id-index.js b/migrations/20250917000000-create-release-project-id-index.js new file mode 100644 index 00000000..9fee1934 --- /dev/null +++ b/migrations/20250917000000-create-release-project-id-index.js @@ -0,0 +1,105 @@ +const indexName = 'projectId_release_unique_idx'; +const collectionName = 'releases'; + +module.exports = { + async up(db) { + const pairs = await db.collection(collectionName).aggregate([ + { + $group: { + _id: { projectId: '$projectId', release: '$release' }, + count: { $sum: 1 }, + }, + }, + { $project: { _id: 0, projectId: '$_id.projectId', release: '$_id.release', count: 1 } }, + ]).toArray(); + + console.log(`Found ${pairs.length} unique (projectId, release) pairs to process.`); + + let processed = 0; + + for (const { projectId, release, count } of pairs) { + processed += 1; + console.log(`[${processed}/${pairs.length}] Processing projectId=${projectId}, release=${release} (docs: ${count})`); + + try { + const docs = await db.collection(collectionName) + .find({ projectId, release }, { projection: { files: 1, commits: 1 } }) + .toArray(); + + const filesByName = new Map(); + const commitsByHash = new Map(); + + for (const doc of docs) { + if (Array.isArray(doc.files)) { + for (const file of doc.files) { + /** + * Keep first occurrence if duplicates conflict + */ + if (file && typeof file === 'object' && file.mapFileName && !filesByName.has(file.mapFileName)) { + filesByName.set(file.mapFileName, file); + } + } + } + if (Array.isArray(doc.commits)) { + for (const commit of doc.commits) { + if (commit && typeof commit === 'object' && commit.hash && !commitsByHash.has(commit.hash)) { + commitsByHash.set(commit.hash, commit); + } + } + } + } + + const mergedFiles = Array.from(filesByName.values()); + const mergedCommits = Array.from(commitsByHash.values()); + + /** + * Replace all docs for this pair with a single consolidated doc + */ + const ops = [ + { deleteMany: { filter: { projectId, release } } }, + { insertOne: { document: { projectId, release, files: mergedFiles, commits: mergedCommits } } }, + ]; + + await db.collection(collectionName).bulkWrite(ops, { ordered: true }); + console.log(`Consolidated projectId=${projectId}, release=${release}: files=${mergedFiles.length}, commits=${mergedCommits.length}`); + } catch (err) { + console.error(`Error consolidating projectId=${projectId}, release=${release}:`, err); + } + } + + /** + * Create the unique compound index + */ + try { + const hasIndex = await db.collection(collectionName).indexExists(indexName); + if (!hasIndex) { + await db.collection(collectionName).createIndex( + { projectId: 1, release: 1 }, + { name: indexName, unique: true, background: true } + ); + console.log(`Index ${indexName} created on ${collectionName} (projectId, release unique).`); + } else { + console.log(`Index ${indexName} already exists on ${collectionName}.`); + } + } catch (err) { + console.error(`Error creating index ${indexName} on ${collectionName}:`, err); + } + + }, + + async down(db) { + console.log(`Dropping index ${indexName} from ${collectionName}...`); + try { + const hasIndex = await db.collection(collectionName).indexExists(indexName); + if (hasIndex) { + await db.collection(collectionName).dropIndex(indexName); + console.log(`Index ${indexName} dropped from ${collectionName}.`); + } else { + console.log(`Index ${indexName} does not exist on ${collectionName}, skipping drop.`); + } + } catch (err) { + console.error(`Error dropping index ${indexName} from ${collectionName}:`, err); + } + console.log('Down migration completed (data changes are not reverted).'); + }, +}; diff --git a/package.json b/package.json index d27a60ab..9f094dc2 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "version": "0.0.1", "description": "Hawk workers", "repository": "git@github.com:codex-team/hawk.workers.git", - "license": "UNLICENSED", + "license": "BUSL-1.1", "workspaces": [ "workers/*" ], @@ -49,7 +49,7 @@ }, "dependencies": { "@hawk.so/nodejs": "^3.1.1", - "@hawk.so/types": "^0.1.32", + "@hawk.so/types": "^0.1.35", "@types/amqplib": "^0.8.2", "@types/jest": "^29.2.3", "@types/mongodb": "^3.5.15", diff --git a/tsconfig.json b/tsconfig.json index 1f1597bb..8dbeae47 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -58,7 +58,7 @@ // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ /* Experimental Options */ - // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ + "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ /* Advanced Options */ diff --git a/workers/archiver/src/index.ts b/workers/archiver/src/index.ts index 39d07e41..d45c863f 100644 --- a/workers/archiver/src/index.ts +++ b/workers/archiver/src/index.ts @@ -103,13 +103,18 @@ export default class ArchiverWorker extends Worker { const finishDate = new Date(); const dbSizeOnFinish = (await this.eventsDbConnection.stats()).dataSize; - await this.sendReport({ - dbSizeOnFinish, - dbSizeOnStart, - startDate, - projectsData, - finishDate, - }); + try { + await this.sendReport({ + dbSizeOnFinish, + dbSizeOnStart, + startDate, + projectsData, + finishDate, + }); + } catch (error) { + this.logger.error('Error sending report:', error); + } + this.logger.info(`Finish archiving at ${finishDate}.`); this.logger.info(`Database size on start: ${prettysize(dbSizeOnStart)}, on finish: ${prettysize(dbSizeOnFinish)}, delta: ${prettysize(dbSizeOnStart - dbSizeOnFinish)}`); } @@ -148,11 +153,11 @@ export default class ArchiverWorker extends Worker { await this.projectCollection.updateOne({ _id: project._id, }, - { - $inc: { - archivedEventsCount: deletedCount, - }, - }); + { + $inc: { + archivedEventsCount: deletedCount, + }, + }); } /** @@ -335,11 +340,17 @@ export default class ArchiverWorker extends Worker { report += `\n\n${totalArchivedEventsCount} events and ${totalRemovedReleasesCount} releases archived in ${archivingTimeInMinutes.toFixed(DIGITS_AFTER_POINT)} min`; report += `\nDatabase size changed from ${prettysize(reportData.dbSizeOnStart)} to ${prettysize(reportData.dbSizeOnFinish)} (–${prettysize(reportData.dbSizeOnStart - reportData.dbSizeOnFinish)})`; - await axios({ + const response = await axios({ method: 'post', url: process.env.REPORT_NOTIFY_URL, data: 'message=' + report + '&parse_mode=HTML', }); + + this.logger.info('Report notification response:', { + status: response.status, + statusText: response.statusText, + data: response.data + }); } /** diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index 2a89aed8..f7621149 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -223,20 +223,30 @@ export default class GrouperWorker extends Worker { /** * Store events counter by days */ - await this.saveDailyEvents(task.projectId, uniqueEventHash, task.timestamp, repetitionId, incrementDailyAffectedUsers); + await this.saveDailyEvents( + task.projectId, + uniqueEventHash, + task.timestamp, + repetitionId, + incrementDailyAffectedUsers + ); /** - * Add task for NotifierWorker + * Add task for NotifierWorker only if event is not ignored */ if (process.env.IS_NOTIFIER_WORKER_ENABLED) { - await this.addTask(WorkerNames.NOTIFIER, { - projectId: task.projectId, - event: { - title: task.payload.title, - groupHash: uniqueEventHash, - isNew: isFirstOccurrence, - }, - }); + const isIgnored = isFirstOccurrence ? false : !!existedEvent?.marks?.ignored; + + if (!isIgnored) { + await this.addTask(WorkerNames.NOTIFIER, { + projectId: task.projectId, + event: { + title: task.payload.title, + groupHash: uniqueEventHash, + isNew: isFirstOccurrence, + }, + }); + } } } diff --git a/workers/grouper/tests/index.test.ts b/workers/grouper/tests/index.test.ts index cd410fc2..6d1c9adc 100644 --- a/workers/grouper/tests/index.test.ts +++ b/workers/grouper/tests/index.test.ts @@ -589,6 +589,78 @@ describe('GrouperWorker', () => { }); }); + describe('Event marks handling', () => { + describe('Ignored events', () => { + it('should not add task for NotifierWorker when event is marked as ignored', async () => { + const mockAddTask = jest + .spyOn(worker as any, 'addTask') + .mockImplementation(() => Promise.resolve()); + + // Create an event first + const firstTask = generateTask({ title: 'Test ignored event' }); + await worker.handle(firstTask); + + // Mark the event as ignored by updating it in database + const eventHash = await (worker as any).getUniqueEventHash(firstTask); + await eventsCollection.updateOne( + { groupHash: eventHash }, + { $set: { marks: { ignored: true } } } + ); + + // Handle the same event again (repetition) + const secondTask = generateTask({ title: 'Test ignored event' }); + await worker.handle(secondTask); + + // Verify that addTask was called only once (for the first occurrence) + expect(mockAddTask).toHaveBeenCalledTimes(1); + + mockAddTask.mockRestore(); + }); + + it('should add task for NotifierWorker when event is not marked as ignored', async () => { + const mockAddTask = jest + .spyOn(worker as any, 'addTask') + .mockImplementation(() => Promise.resolve()); + + // Create an event first + const firstTask = generateTask({ title: 'Test non-ignored event' }); + await worker.handle(firstTask); + + // Handle the same event again (repetition) - without marking as ignored + const secondTask = generateTask({ title: 'Test non-ignored event' }); + await worker.handle(secondTask); + + // Verify that addTask was called twice (for both occurrences) + expect(mockAddTask).toHaveBeenCalledTimes(2); + + mockAddTask.mockRestore(); + }); + + it('should add task for NotifierWorker for first occurrence even if marks field is undefined', async () => { + const mockAddTask = jest + .spyOn(worker as any, 'addTask') + .mockImplementation(() => Promise.resolve()); + + // Create a new event (first occurrence) + const task = generateTask({ title: 'Test new event without marks' }); + await worker.handle(task); + + // Verify that addTask was called for the first occurrence + expect(mockAddTask).toHaveBeenCalledTimes(1); + expect(mockAddTask).toHaveBeenCalledWith('notifier', { + projectId: task.projectId, + event: { + title: task.payload.title, + groupHash: expect.any(String), + isNew: true, + }, + }); + + mockAddTask.mockRestore(); + }); + }); + }); + afterAll(async () => { await redisClient.quit(); await worker.finish(); diff --git a/workers/javascript/package.json b/workers/javascript/package.json index 4cd137f0..7bec49e3 100644 --- a/workers/javascript/package.json +++ b/workers/javascript/package.json @@ -1,6 +1,6 @@ { "name": "hawk-worker-javascript", - "version": "0.0.1", + "version": "0.1.0", "description": "Handles messages from JavaScript Catcher", "main": "src/index.ts", "license": "UNLICENSED", @@ -10,7 +10,8 @@ "@types/useragent": "^2.1.1", "source-map-js": "^1.2.0", "ts-node": "^8.3.0", - "typescript": "^3.5.3" + "typescript": "^3.5.3", + "lodash.clonedeep": "^4.5.0" }, "dependencies": { "useragent": "^2.3.0" diff --git a/workers/javascript/src/index.ts b/workers/javascript/src/index.ts index cc4fd2ea..80702454 100644 --- a/workers/javascript/src/index.ts +++ b/workers/javascript/src/index.ts @@ -7,13 +7,21 @@ import { GroupWorkerTask } from '../../grouper/types/group-worker-task'; import { SourceMapsRecord } from '../../release/types'; import * as pkg from '../package.json'; import { JavaScriptEventWorkerTask } from '../types/javascript-event-worker-task'; +import { BeautifyBacktracePayload } from '../types/beautify-backtrace-payload'; import HawkCatcher from '@hawk.so/nodejs'; -import Crypto from '../../../lib/utils/crypto'; import { BacktraceFrame, CatcherMessagePayload, CatcherMessageType, ErrorsCatcherType, SourceCodeLine, SourceMapDataExtended } from '@hawk.so/types'; import { beautifyUserAgent } from './utils'; import { Collection } from 'mongodb'; import { parse } from '@babel/parser'; import traverse from '@babel/traverse'; +/* eslint-disable-next-line no-unused-vars */ +import { memoize } from '../../../lib/memoize'; + +/** + * eslint does not count decorators as a variable usage + */ +/* eslint-disable-next-line no-unused-vars */ +const MEMOIZATION_TTL = Number(process.env.MEMOIZATION_TTL ?? 0); /** * Worker for handling Javascript events @@ -69,7 +77,11 @@ export default class JavascriptEventWorker extends EventWorker { this.logger.info('beautifyBacktrace called'); try { - event.payload.backtrace = await this.beautifyBacktrace(event); + event.payload.backtrace = await this.beautifyBacktrace({ + projectId: event.projectId, + release: event.payload.release.toString(), + backtrace: event.payload.backtrace, + }); } catch (err) { this.logger.error('Error while beautifing backtrace', err); } @@ -94,21 +106,14 @@ export default class JavascriptEventWorker extends EventWorker { * @param {JavaScriptEventWorkerTask} event — js error minified * @returns {BacktraceFrame[]} - parsed backtrace */ - private async beautifyBacktrace(event: JavaScriptEventWorkerTask): Promise { - const releaseRecord: SourceMapsRecord = await this.cache.get( - `releaseRecord:${event.projectId}:${event.payload.release.toString()}`, - () => { - return this.getReleaseRecord( - event.projectId, - event.payload.release.toString() - ); - } - ); + @memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'hash' }) + private async beautifyBacktrace({ projectId, release, backtrace }: BeautifyBacktracePayload): Promise { + const releaseRecord: SourceMapsRecord = await this.getReleaseRecord(projectId, release); if (!releaseRecord) { this.logger.info('beautifyBacktrace: no releaseRecord found'); - return event.payload.backtrace; + return backtrace; } this.logger.info(`beautifyBacktrace: release record found: ${JSON.stringify(releaseRecord)}`); @@ -116,30 +121,23 @@ export default class JavascriptEventWorker extends EventWorker { /** * If we have a source map associated with passed release, override some values in backtrace with original line/file */ - return Promise.all(event.payload.backtrace.map(async (frame: BacktraceFrame, index: number) => { + return Promise.all(backtrace.map(async (frame: BacktraceFrame, index: number) => { /** - * Get cached (or set if the value is missing) real backtrace frame + * Consume rbacktrace frame and catch errors (send them to hawk) */ - const result = await this.cache.get( - `consumeBacktraceFrame:${event.payload.release.toString()}:${Crypto.hash(frame)}:${index}`, - () => { - return this.consumeBacktraceFrame(frame, releaseRecord) - .catch((error) => { - this.logger.error('Error while consuming ' + error.stack); - - /** - * Send error to Hawk - */ - HawkCatcher.send(error, { - payload: event.payload as unknown as Record, - }); - - return event.payload.backtrace[index]; - }); - } - ); - - return result; + return await this.consumeBacktraceFrame(frame, releaseRecord) + .catch((error) => { + this.logger.error('Error while consuming ' + error.stack); + + /** + * Send error to Hawk + */ + HawkCatcher.send(error, { + payload: backtrace as unknown as Record, + }); + + return backtrace[index]; + }); })); } @@ -189,7 +187,7 @@ export default class JavascriptEventWorker extends EventWorker { /** * Load source map content from Grid fs */ - const mapContent = await this.loadSourceMapFile(mapForFrame); + const mapContent = await this.loadSourceMapFile(mapForFrame._id); if (!mapContent) { this.logger.info(`consumeBacktraceFrame: Can't load map content for ${JSON.stringify(mapForFrame)}`); @@ -197,9 +195,6 @@ export default class JavascriptEventWorker extends EventWorker { return stackFrame; } - /** - * @todo cache source map consumer for file-keys - */ const consumer = this.consumeSourceMap(mapContent); /** @@ -234,10 +229,10 @@ export default class JavascriptEventWorker extends EventWorker { */ lines = this.readSourceLines(consumer, originalLocation); - // const originalContent = consumer.sourceContentFor(originalLocation.source); + const originalContent = consumer.sourceContentFor(originalLocation.source); - // functionContext = this.getFunctionContext(originalContent, originalLocation.line) ?? originalLocation.name; - } catch(e) { + functionContext = await this.getFunctionContext(originalContent, originalLocation.line) ?? originalLocation.name; + } catch (e) { HawkCatcher.send(e); this.logger.error('Can\'t get function context'); this.logger.error(e); @@ -260,7 +255,7 @@ export default class JavascriptEventWorker extends EventWorker { * @param line - number of the line from the stack trace * @returns {string | null} - string of the function context or null if it could not be parsed */ - private _getFunctionContext(sourceCode: string, line: number): string | null { + private getFunctionContext(sourceCode: string, line: number): string | null { let functionName: string | null = null; let className: string | null = null; let isAsync = false; @@ -361,13 +356,13 @@ export default class JavascriptEventWorker extends EventWorker { /** * Downloads source map file from Grid FS * - * @param map - saved file info without content. + * @param mapId - id of the map file in the bucket */ - private loadSourceMapFile(map: SourceMapDataExtended): Promise { + private loadSourceMapFile(mapId: SourceMapDataExtended['_id']): Promise { return new Promise((resolve, reject) => { let buf = Buffer.from(''); - const readstream = this.db.getBucket().openDownloadStream(map._id) + const readstream = this.db.getBucket().openDownloadStream(mapId) .on('data', (chunk) => { buf = Buffer.concat([buf, chunk]); }) diff --git a/workers/javascript/tests/index.test.ts b/workers/javascript/tests/index.test.ts index 7fff5c15..531826e8 100644 --- a/workers/javascript/tests/index.test.ts +++ b/workers/javascript/tests/index.test.ts @@ -4,8 +4,7 @@ import { JavaScriptEventWorkerTask } from '../types/javascript-event-worker-task import { Db, MongoClient, ObjectId } from 'mongodb'; import * as WorkerNames from '../../../lib/workerNames'; import { ReleaseDBScheme } from '@hawk.so/types'; - -const itIf = it.skip; +import cloneDeep from 'lodash.clonedeep'; describe('JavaScript event worker', () => { let connection: MongoClient; @@ -155,10 +154,14 @@ describe('JavaScript event worker', () => { useNewUrlParser: true, useUnifiedTopology: true, }); - db = connection.db('hawk'); + db = connection.db(); // Use default database from connection URI, same as worker + }); + + afterEach(() => { + jest.restoreAllMocks(); }); - itIf('should process an event without errors and add a task with correct event information to grouper', async () => { + it('should process an event without errors and add a task with correct event information to grouper', async () => { /** * Arrange */ @@ -190,7 +193,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should parse user agent correctly', async () => { + it('should parse user agent correctly', async () => { /** * Arrange */ @@ -229,7 +232,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should parse source maps correctly', async () => { + it('should parse source maps correctly', async () => { /** * Arrange */ @@ -278,7 +281,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should use cache while processing source maps', async () => { + it('should use cache while processing source maps', async () => { /** * Arrange */ @@ -312,7 +315,131 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - afterAll(async () => { - await connection.close(); + it('should memoize beautifyBacktrace within several handle calls', async () => { + // Arrange + const worker = new JavascriptEventWorker(); + + await worker.start(); + + // Create event with two frames mapping to the same origin file + const workerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + workerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 1, + column: 100, + }, + { + file: 'file:///main.js', + line: 1, + column: 200, + }, + ] as any; + + const workerEventDuplicate = cloneDeep(workerEvent); + + // Create a release with a single map file used by both frames + const singleMapRelease = { + ...createReleaseMock({ + projectId: workerEvent.projectId, + release: workerEvent.payload.release, + }), + } as any; + const firstFileId = singleMapRelease.files[0]._id; + + singleMapRelease.files = [ + { + mapFileName: 'main.js.map', + originFileName: 'main.js', + _id: firstFileId, + }, + ]; + + await db.collection('releases').insertOne(singleMapRelease); + + /** + * Cast prototype to any because getReleaseRecord is ts private + */ + const getReleaseRecordSpy = jest.spyOn(JavascriptEventWorker.prototype as any, 'getReleaseRecord'); + + // Act + await worker.handle(workerEvent); + await worker.handle(workerEventDuplicate); + + // Assert: Since beautifyBacktrace is now memoized, the entire method should only be called once + expect(getReleaseRecordSpy).toHaveBeenCalledTimes(1); + + await worker.finish(); + }); + + it('should not memoize beautifyBacktrace within several calls with different arguments', async () => { + // Arrange + const worker = new JavascriptEventWorker(); + + await worker.start(); + + // Create event with two frames mapping to the same origin file + const workerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + workerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 1, + column: 100, + }, + ] as any; + + /** + * Worker event with different backtrace + */ + const anotherWorkerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + anotherWorkerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 10, + column: 14, + }, + ] as any; + + // Create a release with a single map file used by both frames + const singleMapRelease = { + ...createReleaseMock({ + projectId: workerEvent.projectId, + release: workerEvent.payload.release, + }), + } as any; + const firstFileId = singleMapRelease.files[0]._id; + + singleMapRelease.files = [ + { + mapFileName: 'main.js.map', + originFileName: 'main.js', + _id: firstFileId, + }, + ]; + + await db.collection('releases').insertOne(singleMapRelease); + + /** + * Cast prototype to any because getReleaseRecord is ts private + */ + const getReleaseRecordSpy = jest.spyOn(JavascriptEventWorker.prototype as any, 'getReleaseRecord'); + + // Act + await worker.handle(workerEvent); + await worker.handle(anotherWorkerEvent); + + // Assert: Since beautifyBacktrace is now memoized, the entire method should only be called once + expect(getReleaseRecordSpy).toHaveBeenCalledTimes(2); + + await worker.finish(); }); }); diff --git a/workers/javascript/types/beautify-backtrace-payload.d.ts b/workers/javascript/types/beautify-backtrace-payload.d.ts new file mode 100644 index 00000000..1d88c3b0 --- /dev/null +++ b/workers/javascript/types/beautify-backtrace-payload.d.ts @@ -0,0 +1,8 @@ +import { JavaScriptEventWorkerTask } from './javascript-event-worker-task'; + +/** + * Type that represents the payload of the beautify backtrace method + * It requires id of the project, release and backtrace to beautify + */ +export type BeautifyBacktracePayload = Pick + & Pick; diff --git a/workers/release/src/index.ts b/workers/release/src/index.ts index 3359dda7..93c2618f 100644 --- a/workers/release/src/index.ts +++ b/workers/release/src/index.ts @@ -9,8 +9,15 @@ import { Worker } from '../../../lib/worker'; import { DatabaseReadWriteError, NonCriticalError } from '../../../lib/workerErrors'; import * as pkg from '../package.json'; import { ReleaseWorkerTask, ReleaseWorkerAddReleasePayload, CommitDataUnparsed } from '../types'; -import { Collection, MongoClient } from 'mongodb'; +import { Collection, MongoClient, MongoError } from 'mongodb'; import { SourceMapDataExtended, SourceMapFileChunk, CommitData, SourcemapCollectedData, ReleaseDBScheme } from '@hawk.so/types'; + +/** + * Error code of MongoDB key duplication error + */ +/* eslint-disable @typescript-eslint/no-magic-numbers */ +const DB_DUPLICATE_KEY_ERROR = '11000'; + /** * Worker to save releases */ @@ -142,105 +149,104 @@ export default class ReleaseWorker extends Worker { * @param payload - source map data */ private async saveSourceMap(projectId: string, payload: ReleaseWorkerAddReleasePayload): Promise { + const files: SourceMapDataExtended[] = this.extendReleaseInfo(payload.files); + /** - * Start transaction to avoid race condition + * Use same transaction for read and related write operations */ - const session = await this.client.startSession(); - - try { - const files: SourceMapDataExtended[] = this.extendReleaseInfo(payload.files); + const existedRelease = await this.releasesCollection.findOne({ + projectId: projectId, + release: payload.release, + }); + /** + * Iterate all maps of the new release and save only new + */ + const savedFiles = await Promise.all(files.map(async (map: SourceMapDataExtended) => { /** - * Use same transaction for read and related write operations + * Skip already saved maps */ - await session.withTransaction(async () => { - const existedRelease = await this.releasesCollection.findOne({ - projectId: projectId, - release: payload.release, - }, { session }); - - /** - * Iterate all maps of the new release and save only new - */ - const savedFiles = await Promise.all(files.map(async (map: SourceMapDataExtended) => { - /** - * Skip already saved maps - */ - - const alreadySaved = existedRelease && existedRelease.files && existedRelease.files.find((savedFile) => { - return savedFile.mapFileName === map.mapFileName; - }); + const alreadySaved = existedRelease && existedRelease.files && !!existedRelease.files.find((savedFile) => { + return savedFile.mapFileName === map.mapFileName; + }); - if (alreadySaved) { - return; - } + if (alreadySaved) { + return; + } - try { - const fileInfo = await this.saveFile(map); - - /** - * Save id of saved file instead - */ - return { - ...map, - _id: fileInfo._id, - }; - } catch (error) { - this.logger.error(`Map ${map.mapFileName} was not saved: ${error}`); - } - })); + try { + const fileInfo = await this.saveFile(map); /** - * Filter undefined files and then prepare files that would be saved to releases table - * we do not need their content since it would be stored in gridFS + * Save id of saved file instead */ - const savedFilesWithoutContent: Omit[] = savedFiles.filter(file => { - return file !== undefined; - }).map(({ content, ...rest }) => { - return rest; - }); + return { + ...map, + _id: fileInfo._id, + }; + } catch (error) { + this.logger.error(`Map ${map.mapFileName} was not saved: ${error}`); + } + })); - /** - * Nothing to save: maps was previously saved - */ - if (savedFilesWithoutContent.length === 0) { - return; - } + /** + * Filter undefined files and then prepare files that would be saved to releases table + * we do not need their content since it would be stored in gridFS + */ + const savedFilesWithoutContent: Omit[] = savedFiles.filter(file => { + return file !== undefined; + }).map(({ content, ...rest }) => { + return rest; + }); - /** - * - insert new record with saved maps - * or - * - update previous record with adding new saved maps - */ - if (!existedRelease) { - this.logger.info('inserted new release'); + /** + * Nothing to save: maps was previously saved + */ + if (savedFilesWithoutContent.length === 0) { + return; + } + + try { + /** + * - insert new record with saved maps + * or + * - update previous record with adding new saved maps + */ + if (!existedRelease) { + this.logger.info('trying insert new release'); + + try { await this.releasesCollection.insertOne({ projectId: projectId, release: payload.release, files: savedFilesWithoutContent, - } as ReleaseDBScheme, { session }); + } as ReleaseDBScheme); + this.logger.info('inserted new release'); + } catch (err) { + if ((err as MongoError).code.toString() === DB_DUPLICATE_KEY_ERROR) { + this.logger.warn(`Duplicate key on insert, retrying update after small delay`); + /* eslint-disable @typescript-eslint/no-magic-numbers */ + await new Promise(resolve => setTimeout(resolve, 200)); + } else { + throw err; + } } + } - await this.releasesCollection.findOneAndUpdate({ - projectId: projectId, - release: payload.release, - }, { - $push: { - files: { - $each: savedFilesWithoutContent, - }, + await this.releasesCollection.findOneAndUpdate({ + projectId: projectId, + release: payload.release, + }, { + $push: { + files: { + $each: savedFilesWithoutContent, }, - }, { session }); + }, }); } catch (error) { this.logger.error(`Can't extract release info:\n${JSON.stringify(error)}`); throw new NonCriticalError('Can\'t parse source-map file'); - } finally { - /** - * End transaction - */ - await session.endSession(); } } @@ -272,7 +278,12 @@ export default class ReleaseWorker extends Worker { return [ { mapFileName: file.name, - originFileName: mapContent.file, + /** + * Some bundlers could skip file in the source map content since it duplicates in map name + * Like map name bundle.js.map is a source map for a bundle.js + * @see https://sourcemaps.info/spec.html - format + */ + originFileName: mapContent.file ?? file.name.replace(/\.map$/, ''), content: mapBodyString, } ]; }); diff --git a/yarn.lock b/yarn.lock index 2180bae4..935577aa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -428,10 +428,10 @@ dependencies: "@types/mongodb" "^3.5.34" -"@hawk.so/types@^0.1.32": - version "0.1.32" - resolved "https://registry.yarnpkg.com/@hawk.so/types/-/types-0.1.32.tgz#5eb662e91da922e1cbab7af0cf03bfa567ee98df" - integrity sha512-7gdx/fq31iLxmc0hZKs+wPYqtRT4rLvTi9p4am2My9SQ4t/l8if5QEfxh6G4WABKOmhiZLchivFA9Oj+Nn5EcQ== +"@hawk.so/types@^0.1.35": + version "0.1.35" + resolved "https://registry.yarnpkg.com/@hawk.so/types/-/types-0.1.35.tgz#6afd416dced1cc3282d721ca5621bf452b27aea1" + integrity sha512-uMTAeu6DlRlk+oputJBjTlrm1GzOkIwlMfGhpdOp3sRWe/YPGD6nMYlb9MZoVN6Yee7RIpYD7It+DPeUPAyIFw== dependencies: "@types/mongodb" "^3.5.34" @@ -4629,11 +4629,6 @@ jest@^29.2.2: import-local "^3.0.2" jest-cli "^29.7.0" -js-levenshtein@^1.1.6: - version "1.1.6" - resolved "https://registry.yarnpkg.com/js-levenshtein/-/js-levenshtein-1.1.6.tgz#c6cee58eb3550372df8deb85fad5ce66ce01d59d" - integrity sha512-X2BB11YZtrRqY4EnQcLX5Rh373zbK4alC1FW7D7MBhL2gtcC17cTnr6DmfHZeS0s2rTHjUTMMHfG7gO8SSdw+g== - js-tokens@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"