-
Notifications
You must be signed in to change notification settings - Fork 5
Enable safeRestore config and retry op on recovery #548
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,143 @@ | ||
| import { randomUUID as v4 } from 'crypto'; | ||
| import fs from 'fs'; | ||
| import { join } from 'path'; | ||
| import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; | ||
| import { StoreName } from '../../../src/datastore/DataStore'; | ||
| import { LMDBStoreFactory } from '../../../src/datastore/LMDBStoreFactory'; | ||
|
|
||
| describe('LMDB retry after recovery', () => { | ||
| let testDir: string; | ||
| let factory: LMDBStoreFactory; | ||
|
|
||
| beforeEach(() => { | ||
| testDir = join(process.cwd(), 'node_modules', '.cache', 'lmdb-retry-test', v4()); | ||
| fs.mkdirSync(testDir, { recursive: true }); | ||
| factory = new LMDBStoreFactory(testDir); | ||
| }); | ||
|
|
||
| afterEach(async () => { | ||
| await factory.close(); | ||
| }); | ||
|
|
||
| describe('sync operations retry on transient failure', () => { | ||
| it('should return data after transient get() failure triggers recovery and retry', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| await store.put('key', 'value'); | ||
|
|
||
| const lmdbStore = store as any; | ||
| const realStore = lmdbStore.store; | ||
|
|
||
| realStore.get = () => { | ||
| throw new Error('MDB_CORRUPTED: Located page was wrong type'); | ||
| }; | ||
|
|
||
| // Recovery replaces the store handle, retry succeeds on the new handle | ||
| const result = store.get<string>('key'); | ||
| expect(result).toBe('value'); | ||
| }); | ||
|
|
||
| it('should throw if recovery fails and retry also fails', () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| const lmdbStore = store as any; | ||
|
|
||
| // Mock handleError to not actually recover | ||
| const originalHandleError = (factory as any).handleError.bind(factory); | ||
| (factory as any).handleError = () => { | ||
| /* no-op: simulates recovery failure */ | ||
| }; | ||
|
|
||
| const realStore = lmdbStore.store; | ||
| realStore.get = () => { | ||
| throw new Error('MDB_CORRUPTED: permanent'); | ||
| }; | ||
|
|
||
| expect(() => store.get<string>('key')).toThrow('MDB_CORRUPTED: permanent'); | ||
| (factory as any).handleError = originalHandleError; | ||
| }); | ||
|
|
||
| it('should retry keys() after transient failure', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| await store.put('k1', 'v1'); | ||
|
|
||
| const lmdbStore = store as any; | ||
| const realStore = lmdbStore.store; | ||
| const originalGetKeys = realStore.getKeys.bind(realStore); | ||
| let callCount = 0; | ||
|
|
||
| realStore.getKeys = (opts: any) => { | ||
| callCount++; | ||
| if (callCount === 1) throw new Error('MDB_BAD_TXN: Transaction must abort'); | ||
| return originalGetKeys(opts); | ||
| }; | ||
|
|
||
| const keys = store.keys(10); | ||
| expect(keys).toContain('k1'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('async operations retry on transient failure', () => { | ||
| it('should succeed after transient put() failure', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| const lmdbStore = store as any; | ||
| const realStore = lmdbStore.store; | ||
|
|
||
| realStore.put = () => { | ||
| throw new Error('MDB_PAGE_NOTFOUND: Requested page not found'); | ||
| }; | ||
|
|
||
| await store.put('key', 'value'); | ||
| expect(store.get('key')).toBe('value'); | ||
| }); | ||
|
|
||
| it('should throw on async if retry also fails', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| const lmdbStore = store as any; | ||
|
|
||
| (factory as any).handleError = () => { | ||
| /* no-op */ | ||
| }; | ||
|
|
||
| const realStore = lmdbStore.store; | ||
| realStore.put = () => { | ||
| throw new Error('MDB_PANIC: unrecoverable'); | ||
| }; | ||
|
|
||
| await expect(store.put('key', 'value')).rejects.toThrow('MDB_PANIC: unrecoverable'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('recovery is called exactly once per failure', () => { | ||
| it('should call handleError once on transient failure', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| await store.put('key', 'value'); | ||
|
|
||
| const handleErrorSpy = vi.spyOn(factory as any, 'handleError'); | ||
| const lmdbStore = store as any; | ||
| const realStore = lmdbStore.store; | ||
|
|
||
| realStore.get = () => { | ||
| throw new Error('MDB_CORRUPTED: test'); | ||
| }; | ||
|
|
||
| store.get<string>('key'); | ||
| expect(handleErrorSpy).toHaveBeenCalledTimes(1); | ||
| handleErrorSpy.mockRestore(); | ||
| }); | ||
|
|
||
| it('should work normally after a successful retry', async () => { | ||
| const store = factory.get(StoreName.public_schemas); | ||
| await store.put('key1', 'value1'); | ||
|
|
||
| const lmdbStore = store as any; | ||
| const realStore = lmdbStore.store; | ||
|
|
||
| realStore.get = () => { | ||
| throw new Error('MDB_BAD_TXN: abort'); | ||
| }; | ||
|
|
||
| expect(store.get<string>('key1')).toBe('value1'); | ||
| await store.put('key2', 'value2'); | ||
| expect(store.get<string>('key2')).toBe('value2'); | ||
| }); | ||
| }); | ||
| }); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how does this work?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
readerCheck clears out stale reader transaction entries from the reader lock table.
LMDB uses a shared lock table to track active read transactions across processes/threads. If a process crashes or exits without properly closing its read transaction, its entry remains in the table as a stale reader. Stale readers are problematic because LMDB can't reclaim database pages that were in use at the time of that reader's snapshot - this causes the database file to grow continuously since old pages can never be freed.
db.readerCheck() scans the reader lock table, detects entries belonging to processes/PIDs that no longer exist, and clears them. It returns the number of stale entries that were removed.