diff --git a/lib/memoize/index.test.ts b/lib/memoize/index.test.ts new file mode 100644 index 00000000..359c5b1e --- /dev/null +++ b/lib/memoize/index.test.ts @@ -0,0 +1,229 @@ +/* eslint-disable + no-unused-vars, + @typescript-eslint/explicit-function-return-type, + @typescript-eslint/no-unused-vars-experimental, + jsdoc/require-param-description +*/ +/** + * Ignore eslint jsdoc rules for mocked class + * Ignore eslint unused vars rule for decorator + */ + +import { memoize } from './index'; +import Crypto from '../utils/crypto'; + +describe('memoize decorator — per-test inline classes', () => { + afterEach(() => { + jest.useRealTimers(); + jest.restoreAllMocks(); + jest.clearAllMocks(); + }); + + it('should memoize return value with concat strategy across several calls', async () => { + class Sample { + public calls = 0; + + @memoize({ strategy: 'concat', ttl: 60_000, max: 50 }) + public async run(a: number, b: string) { + this.calls += 1; + return `${a}-${b}`; + } + } + + const sample = new Sample(); + + /** + * First call should memoize the method + */ + expect(await sample.run(1, 'x')).toBe('1-x'); + /** + * In this case + */ + expect(await sample.run(1, 'x')).toBe('1-x'); + expect(await sample.run(1, 'x')).toBe('1-x'); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value with set of arguments with concat strategy across several calls', async () => { + class Sample { + public calls = 0; + + @memoize({ strategy: 'concat' }) + public async run(a: unknown, b: unknown) { + this.calls += 1; + return `${String(a)}|${String(b)}`; + } + } + + const sample = new Sample(); + + /** + * Fill the memoization cache with values + */ + await sample.run(1, 'a'); + await sample.run(2, 'a'); + await sample.run(1, 'b'); + await sample.run(true, false); + await sample.run(undefined, null); + + expect(sample.calls).toBe(5); + + /** + * Those calls should not call the original method, they should return from memoize + */ + await sample.run(1, 'a'); + await sample.run(2, 'a'); + await sample.run(1, 'b'); + await sample.run(true, false); + await sample.run(undefined, null); + + expect(sample.calls).toBe(5); + }); + + it('should memoize return value for stringified objects across several calls', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'concat' }) + public async run(x: unknown, y: unknown) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + const o1 = { a: 1 }; + const o2 = { b: 2 }; + + await sample.run(o1, o2); + await sample.run(o1, o2); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value for method with non-default arguments (NaN, Infinity, -0, Symbol, Date, RegExp) still cache same-args', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'concat' }) + public async run(...args: unknown[]) { + this.calls += 1; + return args.map(String).join(','); + } + } + const sample = new Sample(); + + const sym = Symbol('t'); + const d = new Date('2020-01-01T00:00:00Z'); + const re = /a/i; + + const first = await sample.run(NaN, Infinity, -0, sym, d, re); + const second = await sample.run(NaN, Infinity, -0, sym, d, re); + + expect(second).toBe(first); + expect(sample.calls).toBe(1); + }); + + it('should call crypto hash with blake2b512 algo and base64url digest, should memoize return value with hash strategy', async () => { + const hashSpy = jest.spyOn(Crypto, 'hash'); + + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(...args: unknown[]) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({a: 1}, undefined, 0); + await sample.run({a: 1}, undefined, 0); + + expect(hashSpy).toHaveBeenCalledWith([{a: 1}, undefined, 0], 'blake2b512', 'base64url'); + expect(sample.calls).toBe(1); + }); + + it('should not memoize return value with hash strategy and different arguments', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(...args: unknown[]) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({ v: 1 }); + await sample.run({ v: 2 }); + await sample.run({ v: 3 }); + + expect(sample.calls).toBe(3); + }); + + it('should memoize return value with hash strategy across several calls with same args', async () => { + class Sample { + public calls = 0; + @memoize({ strategy: 'hash' }) + public async run(arg: unknown) { + this.calls += 1; + return 'ok'; + } + } + const sample = new Sample(); + + await sample.run({ a: 1 }); + await sample.run({ a: 1 }); + + expect(sample.calls).toBe(1); + }); + + it('should memoize return value exactly for passed ttl millis', async () => { + jest.resetModules(); + jest.useFakeTimers({ legacyFakeTimers: false }); + jest.setSystemTime(new Date('2025-01-01T00:00:00Z')); + + const { memoize: memoizeWithMockedTimers } = await import('../memoize/index'); + + class Sample { + public calls = 0; + @memoizeWithMockedTimers({ strategy: 'concat', ttl: 1_000 }) + public async run(x: string) { + this.calls += 1; + return x; + } + } + const sample = new Sample(); + + await sample.run('k1'); + expect(sample.calls).toBe(1); + + /** + * Skip time beyond the ttl + */ + jest.advanceTimersByTime(1_001); + + await sample.run('k1'); + expect(sample.calls).toBe(2); + + }); + + it('error calls should never be momized', async () => { + class Sample { + public calls = 0; + @memoize() + public async run(x: number) { + this.calls += 1; + if (x === 1) throw new Error('boom'); + return x * 2; + } + } + const sample = new Sample(); + + /** + * Compute with throw + */ + await expect(sample.run(1)).rejects.toThrow('boom'); + await expect(sample.run(1)).rejects.toThrow('boom'); + expect(sample.calls).toBe(2); + }); +}); diff --git a/lib/memoize/index.ts b/lib/memoize/index.ts new file mode 100644 index 00000000..10430b69 --- /dev/null +++ b/lib/memoize/index.ts @@ -0,0 +1,98 @@ +import LRUCache from 'lru-cache'; +import Crypto from '../utils/crypto'; + +/** + * Pick the strategy of cache key form + * It could be concatenated list of arguments like 'projectId:eventId' + * Or it could be hashed json object — blake2b512 algorithm + */ +export type MemoizeKeyStrategy = 'concat' | 'hash'; + +/** + * Options of the memoize decorator + */ +export interface MemoizeOptions { + /** + * Max number of values stored in LRU cache at the same time + */ + max?: number; + + /** + * TTL in milliseconds + */ + ttl?: number; + + /** + * Strategy for key generation + */ + strategy?: MemoizeKeyStrategy; +} + +/** + * Async-only, per-method LRU-backed memoization decorator. + * Cache persists for the lifetime of the class instance (e.g. worker). + * + * @param options + */ +export function memoize(options: MemoizeOptions = {}): MethodDecorator { + /* eslint-disable @typescript-eslint/no-magic-numbers */ + const { + max = 50, + ttl = 1000 * 60 * 30, + strategy = 'concat', + } = options; + /* eslint-enable */ + + return function ( + _target, + propertyKey, + descriptor: PropertyDescriptor + ): PropertyDescriptor { + const originalMethod = descriptor.value; + + if (typeof originalMethod !== 'function') { + throw new Error('@Memoize can only decorate methods'); + } + + descriptor.value = async function (...args: unknown[]): Promise { + /** + * Create a cache key for each decorated method + */ + const cacheKey = `memoizeCache:${String(propertyKey)}`; + + /** + * Create a new cache if it does not exists yet (for certain function) + */ + const cache: LRUCache = this[cacheKey] ??= new LRUCache({ + max, + maxAge: ttl, + }); + + const key = strategy === 'hash' + ? Crypto.hash(args, 'blake2b512', 'base64url') + : args.map((arg) => JSON.stringify(arg)).join('__ARG_JOIN__'); + + /** + * Check if we have a cached result + */ + const cachedResult = cache.get(key); + + if (cachedResult !== undefined) { + return cachedResult; + } + + try { + const result = await originalMethod.apply(this, args); + + cache.set(key, result); + + return result; + } catch (err) { + cache.del(key); + throw err; + } + }; + + return descriptor; + }; +} diff --git a/lib/utils/crypto.ts b/lib/utils/crypto.ts index f4785c8a..74f1a2c1 100644 --- a/lib/utils/crypto.ts +++ b/lib/utils/crypto.ts @@ -1,4 +1,4 @@ -import crypto from 'crypto'; +import crypto, { BinaryToTextEncoding } from 'crypto'; /** * Crypto helper @@ -9,12 +9,13 @@ export default class Crypto { * * @param value — data to be hashed * @param algo — type of algorithm to be used for hashing + * @param digest - type of the representation of the hashed value */ - public static hash(value: unknown, algo = 'sha256'): string { - const stringifiedValue = JSON.stringify(value); + public static hash(value: unknown, algo = 'sha256', digest: BinaryToTextEncoding = 'hex'): string { + const stringifiedValue = typeof value === 'string' ? value : JSON.stringify(value); return crypto.createHash(algo) .update(stringifiedValue) - .digest('hex'); + .digest(digest); } } diff --git a/tsconfig.json b/tsconfig.json index 1f1597bb..8dbeae47 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -58,7 +58,7 @@ // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ /* Experimental Options */ - // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ + "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ /* Advanced Options */ diff --git a/workers/javascript/package.json b/workers/javascript/package.json index 4cd137f0..7bec49e3 100644 --- a/workers/javascript/package.json +++ b/workers/javascript/package.json @@ -1,6 +1,6 @@ { "name": "hawk-worker-javascript", - "version": "0.0.1", + "version": "0.1.0", "description": "Handles messages from JavaScript Catcher", "main": "src/index.ts", "license": "UNLICENSED", @@ -10,7 +10,8 @@ "@types/useragent": "^2.1.1", "source-map-js": "^1.2.0", "ts-node": "^8.3.0", - "typescript": "^3.5.3" + "typescript": "^3.5.3", + "lodash.clonedeep": "^4.5.0" }, "dependencies": { "useragent": "^2.3.0" diff --git a/workers/javascript/src/index.ts b/workers/javascript/src/index.ts index cc4fd2ea..80702454 100644 --- a/workers/javascript/src/index.ts +++ b/workers/javascript/src/index.ts @@ -7,13 +7,21 @@ import { GroupWorkerTask } from '../../grouper/types/group-worker-task'; import { SourceMapsRecord } from '../../release/types'; import * as pkg from '../package.json'; import { JavaScriptEventWorkerTask } from '../types/javascript-event-worker-task'; +import { BeautifyBacktracePayload } from '../types/beautify-backtrace-payload'; import HawkCatcher from '@hawk.so/nodejs'; -import Crypto from '../../../lib/utils/crypto'; import { BacktraceFrame, CatcherMessagePayload, CatcherMessageType, ErrorsCatcherType, SourceCodeLine, SourceMapDataExtended } from '@hawk.so/types'; import { beautifyUserAgent } from './utils'; import { Collection } from 'mongodb'; import { parse } from '@babel/parser'; import traverse from '@babel/traverse'; +/* eslint-disable-next-line no-unused-vars */ +import { memoize } from '../../../lib/memoize'; + +/** + * eslint does not count decorators as a variable usage + */ +/* eslint-disable-next-line no-unused-vars */ +const MEMOIZATION_TTL = Number(process.env.MEMOIZATION_TTL ?? 0); /** * Worker for handling Javascript events @@ -69,7 +77,11 @@ export default class JavascriptEventWorker extends EventWorker { this.logger.info('beautifyBacktrace called'); try { - event.payload.backtrace = await this.beautifyBacktrace(event); + event.payload.backtrace = await this.beautifyBacktrace({ + projectId: event.projectId, + release: event.payload.release.toString(), + backtrace: event.payload.backtrace, + }); } catch (err) { this.logger.error('Error while beautifing backtrace', err); } @@ -94,21 +106,14 @@ export default class JavascriptEventWorker extends EventWorker { * @param {JavaScriptEventWorkerTask} event — js error minified * @returns {BacktraceFrame[]} - parsed backtrace */ - private async beautifyBacktrace(event: JavaScriptEventWorkerTask): Promise { - const releaseRecord: SourceMapsRecord = await this.cache.get( - `releaseRecord:${event.projectId}:${event.payload.release.toString()}`, - () => { - return this.getReleaseRecord( - event.projectId, - event.payload.release.toString() - ); - } - ); + @memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'hash' }) + private async beautifyBacktrace({ projectId, release, backtrace }: BeautifyBacktracePayload): Promise { + const releaseRecord: SourceMapsRecord = await this.getReleaseRecord(projectId, release); if (!releaseRecord) { this.logger.info('beautifyBacktrace: no releaseRecord found'); - return event.payload.backtrace; + return backtrace; } this.logger.info(`beautifyBacktrace: release record found: ${JSON.stringify(releaseRecord)}`); @@ -116,30 +121,23 @@ export default class JavascriptEventWorker extends EventWorker { /** * If we have a source map associated with passed release, override some values in backtrace with original line/file */ - return Promise.all(event.payload.backtrace.map(async (frame: BacktraceFrame, index: number) => { + return Promise.all(backtrace.map(async (frame: BacktraceFrame, index: number) => { /** - * Get cached (or set if the value is missing) real backtrace frame + * Consume rbacktrace frame and catch errors (send them to hawk) */ - const result = await this.cache.get( - `consumeBacktraceFrame:${event.payload.release.toString()}:${Crypto.hash(frame)}:${index}`, - () => { - return this.consumeBacktraceFrame(frame, releaseRecord) - .catch((error) => { - this.logger.error('Error while consuming ' + error.stack); - - /** - * Send error to Hawk - */ - HawkCatcher.send(error, { - payload: event.payload as unknown as Record, - }); - - return event.payload.backtrace[index]; - }); - } - ); - - return result; + return await this.consumeBacktraceFrame(frame, releaseRecord) + .catch((error) => { + this.logger.error('Error while consuming ' + error.stack); + + /** + * Send error to Hawk + */ + HawkCatcher.send(error, { + payload: backtrace as unknown as Record, + }); + + return backtrace[index]; + }); })); } @@ -189,7 +187,7 @@ export default class JavascriptEventWorker extends EventWorker { /** * Load source map content from Grid fs */ - const mapContent = await this.loadSourceMapFile(mapForFrame); + const mapContent = await this.loadSourceMapFile(mapForFrame._id); if (!mapContent) { this.logger.info(`consumeBacktraceFrame: Can't load map content for ${JSON.stringify(mapForFrame)}`); @@ -197,9 +195,6 @@ export default class JavascriptEventWorker extends EventWorker { return stackFrame; } - /** - * @todo cache source map consumer for file-keys - */ const consumer = this.consumeSourceMap(mapContent); /** @@ -234,10 +229,10 @@ export default class JavascriptEventWorker extends EventWorker { */ lines = this.readSourceLines(consumer, originalLocation); - // const originalContent = consumer.sourceContentFor(originalLocation.source); + const originalContent = consumer.sourceContentFor(originalLocation.source); - // functionContext = this.getFunctionContext(originalContent, originalLocation.line) ?? originalLocation.name; - } catch(e) { + functionContext = await this.getFunctionContext(originalContent, originalLocation.line) ?? originalLocation.name; + } catch (e) { HawkCatcher.send(e); this.logger.error('Can\'t get function context'); this.logger.error(e); @@ -260,7 +255,7 @@ export default class JavascriptEventWorker extends EventWorker { * @param line - number of the line from the stack trace * @returns {string | null} - string of the function context or null if it could not be parsed */ - private _getFunctionContext(sourceCode: string, line: number): string | null { + private getFunctionContext(sourceCode: string, line: number): string | null { let functionName: string | null = null; let className: string | null = null; let isAsync = false; @@ -361,13 +356,13 @@ export default class JavascriptEventWorker extends EventWorker { /** * Downloads source map file from Grid FS * - * @param map - saved file info without content. + * @param mapId - id of the map file in the bucket */ - private loadSourceMapFile(map: SourceMapDataExtended): Promise { + private loadSourceMapFile(mapId: SourceMapDataExtended['_id']): Promise { return new Promise((resolve, reject) => { let buf = Buffer.from(''); - const readstream = this.db.getBucket().openDownloadStream(map._id) + const readstream = this.db.getBucket().openDownloadStream(mapId) .on('data', (chunk) => { buf = Buffer.concat([buf, chunk]); }) diff --git a/workers/javascript/tests/index.test.ts b/workers/javascript/tests/index.test.ts index 7fff5c15..531826e8 100644 --- a/workers/javascript/tests/index.test.ts +++ b/workers/javascript/tests/index.test.ts @@ -4,8 +4,7 @@ import { JavaScriptEventWorkerTask } from '../types/javascript-event-worker-task import { Db, MongoClient, ObjectId } from 'mongodb'; import * as WorkerNames from '../../../lib/workerNames'; import { ReleaseDBScheme } from '@hawk.so/types'; - -const itIf = it.skip; +import cloneDeep from 'lodash.clonedeep'; describe('JavaScript event worker', () => { let connection: MongoClient; @@ -155,10 +154,14 @@ describe('JavaScript event worker', () => { useNewUrlParser: true, useUnifiedTopology: true, }); - db = connection.db('hawk'); + db = connection.db(); // Use default database from connection URI, same as worker + }); + + afterEach(() => { + jest.restoreAllMocks(); }); - itIf('should process an event without errors and add a task with correct event information to grouper', async () => { + it('should process an event without errors and add a task with correct event information to grouper', async () => { /** * Arrange */ @@ -190,7 +193,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should parse user agent correctly', async () => { + it('should parse user agent correctly', async () => { /** * Arrange */ @@ -229,7 +232,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should parse source maps correctly', async () => { + it('should parse source maps correctly', async () => { /** * Arrange */ @@ -278,7 +281,7 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - itIf('should use cache while processing source maps', async () => { + it('should use cache while processing source maps', async () => { /** * Arrange */ @@ -312,7 +315,131 @@ describe('JavaScript event worker', () => { await worker.finish(); }); - afterAll(async () => { - await connection.close(); + it('should memoize beautifyBacktrace within several handle calls', async () => { + // Arrange + const worker = new JavascriptEventWorker(); + + await worker.start(); + + // Create event with two frames mapping to the same origin file + const workerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + workerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 1, + column: 100, + }, + { + file: 'file:///main.js', + line: 1, + column: 200, + }, + ] as any; + + const workerEventDuplicate = cloneDeep(workerEvent); + + // Create a release with a single map file used by both frames + const singleMapRelease = { + ...createReleaseMock({ + projectId: workerEvent.projectId, + release: workerEvent.payload.release, + }), + } as any; + const firstFileId = singleMapRelease.files[0]._id; + + singleMapRelease.files = [ + { + mapFileName: 'main.js.map', + originFileName: 'main.js', + _id: firstFileId, + }, + ]; + + await db.collection('releases').insertOne(singleMapRelease); + + /** + * Cast prototype to any because getReleaseRecord is ts private + */ + const getReleaseRecordSpy = jest.spyOn(JavascriptEventWorker.prototype as any, 'getReleaseRecord'); + + // Act + await worker.handle(workerEvent); + await worker.handle(workerEventDuplicate); + + // Assert: Since beautifyBacktrace is now memoized, the entire method should only be called once + expect(getReleaseRecordSpy).toHaveBeenCalledTimes(1); + + await worker.finish(); + }); + + it('should not memoize beautifyBacktrace within several calls with different arguments', async () => { + // Arrange + const worker = new JavascriptEventWorker(); + + await worker.start(); + + // Create event with two frames mapping to the same origin file + const workerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + workerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 1, + column: 100, + }, + ] as any; + + /** + * Worker event with different backtrace + */ + const anotherWorkerEvent = { + ...createEventMock({ withBacktrace: true }), + } as JavaScriptEventWorkerTask; + + anotherWorkerEvent.payload.backtrace = [ + { + file: 'file:///main.js', + line: 10, + column: 14, + }, + ] as any; + + // Create a release with a single map file used by both frames + const singleMapRelease = { + ...createReleaseMock({ + projectId: workerEvent.projectId, + release: workerEvent.payload.release, + }), + } as any; + const firstFileId = singleMapRelease.files[0]._id; + + singleMapRelease.files = [ + { + mapFileName: 'main.js.map', + originFileName: 'main.js', + _id: firstFileId, + }, + ]; + + await db.collection('releases').insertOne(singleMapRelease); + + /** + * Cast prototype to any because getReleaseRecord is ts private + */ + const getReleaseRecordSpy = jest.spyOn(JavascriptEventWorker.prototype as any, 'getReleaseRecord'); + + // Act + await worker.handle(workerEvent); + await worker.handle(anotherWorkerEvent); + + // Assert: Since beautifyBacktrace is now memoized, the entire method should only be called once + expect(getReleaseRecordSpy).toHaveBeenCalledTimes(2); + + await worker.finish(); }); }); diff --git a/workers/javascript/types/beautify-backtrace-payload.d.ts b/workers/javascript/types/beautify-backtrace-payload.d.ts new file mode 100644 index 00000000..1d88c3b0 --- /dev/null +++ b/workers/javascript/types/beautify-backtrace-payload.d.ts @@ -0,0 +1,8 @@ +import { JavaScriptEventWorkerTask } from './javascript-event-worker-task'; + +/** + * Type that represents the payload of the beautify backtrace method + * It requires id of the project, release and backtrace to beautify + */ +export type BeautifyBacktracePayload = Pick + & Pick; diff --git a/yarn.lock b/yarn.lock index 13207d35..935577aa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4629,11 +4629,6 @@ jest@^29.2.2: import-local "^3.0.2" jest-cli "^29.7.0" -js-levenshtein@^1.1.6: - version "1.1.6" - resolved "https://registry.yarnpkg.com/js-levenshtein/-/js-levenshtein-1.1.6.tgz#c6cee58eb3550372df8deb85fad5ce66ce01d59d" - integrity sha512-X2BB11YZtrRqY4EnQcLX5Rh373zbK4alC1FW7D7MBhL2gtcC17cTnr6DmfHZeS0s2rTHjUTMMHfG7gO8SSdw+g== - js-tokens@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"