Skip to content

Commit f285289

Browse files
authored
Enable safeRestore config and retry op on recovery (#548)
1 parent 968168e commit f285289

3 files changed

Lines changed: 164 additions & 8 deletions

File tree

src/datastore/LMDBStoreFactory.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ export class LMDBStoreFactory implements DataStoreFactory {
235235
if (this.closed) return;
236236

237237
try {
238+
const staleLocks = this.env.readerCheck();
239+
if (staleLocks > 0) {
240+
this.log.info(`Removed ${staleLocks} stale reader locks for LMDB`);
241+
}
238242
const envStat = stats(this.env);
239243
this.telemetry.histogram('version', VersionNumber);
240244
this.telemetry.histogram('env.size.bytes', envStat.totalSize, { unit: 'By' });
@@ -271,6 +275,11 @@ function createEnv(lmdbDir: string) {
271275
mapSize: TotalMaxDbSize,
272276
encoding: Encoding,
273277
encryptionKey: encryptionStrategy(VersionNumber),
278+
// Forces use of the last safely flushed transaction on open, rather than the last committed
279+
// (but possibly unflushed) one. Prevents corruption when the process is killed mid-flush.
280+
// https://github.com/kriszyp/lmdb-js#readme ("safeRestore")
281+
// https://github.com/kriszyp/lmdb-js/blob/master/open.js#L188 (flag 0x800)
282+
...({ safeRestore: true } as Record<string, unknown>),
274283
};
275284

276285
if (isWindows) {

src/datastore/lmdb/LMDBStore.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ export class LMDBStore implements DataStore {
1212
constructor(
1313
public readonly name: StoreName,
1414
private store: Database<unknown, string>,
15-
private readonly onError?: ErrorHandler,
16-
private readonly validateDatabase?: () => void,
15+
private readonly onError: ErrorHandler,
16+
private readonly validateDatabase: () => void,
1717
) {
1818
this.telemetry = TelemetryService.instance.get(`LMDB.${name}`);
1919
}
@@ -27,11 +27,13 @@ export class LMDBStore implements DataStore {
2727
op,
2828
() => {
2929
try {
30-
this.validateDatabase?.();
30+
this.validateDatabase();
3131
return fn();
3232
} catch (e) {
33-
this.onError?.(e);
34-
throw e;
33+
this.onError(e);
34+
this.telemetry.count(`retry.${op}`, 1);
35+
this.validateDatabase();
36+
return fn();
3537
}
3638
},
3739
{ captureErrorAttributes: true },
@@ -43,11 +45,13 @@ export class LMDBStore implements DataStore {
4345
op,
4446
async () => {
4547
try {
46-
this.validateDatabase?.();
48+
this.validateDatabase();
4749
return await fn();
4850
} catch (e) {
49-
this.onError?.(e);
50-
throw e;
51+
this.onError(e);
52+
this.telemetry.count(`retry.${op}`, 1);
53+
this.validateDatabase();
54+
return await fn();
5155
}
5256
},
5357
{ captureErrorAttributes: true },
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import { randomUUID as v4 } from 'crypto';
2+
import fs from 'fs';
3+
import { join } from 'path';
4+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
5+
import { StoreName } from '../../../src/datastore/DataStore';
6+
import { LMDBStoreFactory } from '../../../src/datastore/LMDBStoreFactory';
7+
8+
describe('LMDB retry after recovery', () => {
9+
let testDir: string;
10+
let factory: LMDBStoreFactory;
11+
12+
beforeEach(() => {
13+
testDir = join(process.cwd(), 'node_modules', '.cache', 'lmdb-retry-test', v4());
14+
fs.mkdirSync(testDir, { recursive: true });
15+
factory = new LMDBStoreFactory(testDir);
16+
});
17+
18+
afterEach(async () => {
19+
await factory.close();
20+
});
21+
22+
describe('sync operations retry on transient failure', () => {
23+
it('should return data after transient get() failure triggers recovery and retry', async () => {
24+
const store = factory.get(StoreName.public_schemas);
25+
await store.put('key', 'value');
26+
27+
const lmdbStore = store as any;
28+
const realStore = lmdbStore.store;
29+
30+
realStore.get = () => {
31+
throw new Error('MDB_CORRUPTED: Located page was wrong type');
32+
};
33+
34+
// Recovery replaces the store handle, retry succeeds on the new handle
35+
const result = store.get<string>('key');
36+
expect(result).toBe('value');
37+
});
38+
39+
it('should throw if recovery fails and retry also fails', () => {
40+
const store = factory.get(StoreName.public_schemas);
41+
const lmdbStore = store as any;
42+
43+
// Mock handleError to not actually recover
44+
const originalHandleError = (factory as any).handleError.bind(factory);
45+
(factory as any).handleError = () => {
46+
/* no-op: simulates recovery failure */
47+
};
48+
49+
const realStore = lmdbStore.store;
50+
realStore.get = () => {
51+
throw new Error('MDB_CORRUPTED: permanent');
52+
};
53+
54+
expect(() => store.get<string>('key')).toThrow('MDB_CORRUPTED: permanent');
55+
(factory as any).handleError = originalHandleError;
56+
});
57+
58+
it('should retry keys() after transient failure', async () => {
59+
const store = factory.get(StoreName.public_schemas);
60+
await store.put('k1', 'v1');
61+
62+
const lmdbStore = store as any;
63+
const realStore = lmdbStore.store;
64+
const originalGetKeys = realStore.getKeys.bind(realStore);
65+
let callCount = 0;
66+
67+
realStore.getKeys = (opts: any) => {
68+
callCount++;
69+
if (callCount === 1) throw new Error('MDB_BAD_TXN: Transaction must abort');
70+
return originalGetKeys(opts);
71+
};
72+
73+
const keys = store.keys(10);
74+
expect(keys).toContain('k1');
75+
});
76+
});
77+
78+
describe('async operations retry on transient failure', () => {
79+
it('should succeed after transient put() failure', async () => {
80+
const store = factory.get(StoreName.public_schemas);
81+
const lmdbStore = store as any;
82+
const realStore = lmdbStore.store;
83+
84+
realStore.put = () => {
85+
throw new Error('MDB_PAGE_NOTFOUND: Requested page not found');
86+
};
87+
88+
await store.put('key', 'value');
89+
expect(store.get('key')).toBe('value');
90+
});
91+
92+
it('should throw on async if retry also fails', async () => {
93+
const store = factory.get(StoreName.public_schemas);
94+
const lmdbStore = store as any;
95+
96+
(factory as any).handleError = () => {
97+
/* no-op */
98+
};
99+
100+
const realStore = lmdbStore.store;
101+
realStore.put = () => {
102+
throw new Error('MDB_PANIC: unrecoverable');
103+
};
104+
105+
await expect(store.put('key', 'value')).rejects.toThrow('MDB_PANIC: unrecoverable');
106+
});
107+
});
108+
109+
describe('recovery is called exactly once per failure', () => {
110+
it('should call handleError once on transient failure', async () => {
111+
const store = factory.get(StoreName.public_schemas);
112+
await store.put('key', 'value');
113+
114+
const handleErrorSpy = vi.spyOn(factory as any, 'handleError');
115+
const lmdbStore = store as any;
116+
const realStore = lmdbStore.store;
117+
118+
realStore.get = () => {
119+
throw new Error('MDB_CORRUPTED: test');
120+
};
121+
122+
store.get<string>('key');
123+
expect(handleErrorSpy).toHaveBeenCalledTimes(1);
124+
handleErrorSpy.mockRestore();
125+
});
126+
127+
it('should work normally after a successful retry', async () => {
128+
const store = factory.get(StoreName.public_schemas);
129+
await store.put('key1', 'value1');
130+
131+
const lmdbStore = store as any;
132+
const realStore = lmdbStore.store;
133+
134+
realStore.get = () => {
135+
throw new Error('MDB_BAD_TXN: abort');
136+
};
137+
138+
expect(store.get<string>('key1')).toBe('value1');
139+
await store.put('key2', 'value2');
140+
expect(store.get<string>('key2')).toBe('value2');
141+
});
142+
});
143+
});

0 commit comments

Comments
 (0)