Skip to content

Commit b5b9c6b

Browse files
committed
Merge PR #72: keep WAL journal mode during bulk write
2 parents f92fd66 + a0e809d commit b5b9c6b

File tree

5 files changed

+192
-13
lines changed

5 files changed

+192
-13
lines changed

Makefile.cbm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,8 @@ TEST_STORE_SRCS = \
248248
tests/test_store_nodes.c \
249249
tests/test_store_edges.c \
250250
tests/test_store_search.c \
251-
tests/test_store_arch.c
251+
tests/test_store_arch.c \
252+
tests/test_store_bulk.c
252253

253254
TEST_CYPHER_SRCS = \
254255
tests/test_cypher.c

src/store/store.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -453,23 +453,21 @@ int cbm_store_rollback(cbm_store_t *s) {
453453
/* ── Bulk write ─────────────────────────────────────────────────── */
454454

455455
int cbm_store_begin_bulk(cbm_store_t *s) {
456-
int rc = exec_sql(s, "PRAGMA journal_mode = MEMORY;");
457-
if (rc != CBM_STORE_OK) {
458-
return rc;
459-
}
460-
rc = exec_sql(s, "PRAGMA synchronous = OFF;");
456+
/* Stay in WAL mode throughout. Switching to MEMORY journal mode would
457+
* make the database unrecoverable if the process crashes mid-write,
458+
* because the in-memory rollback journal is lost on crash.
459+
* WAL mode is crash-safe: uncommitted WAL entries are simply discarded
460+
* on the next open. Performance is preserved via synchronous=OFF and a
461+
* larger cache, which are safe with WAL. */
462+
int rc = exec_sql(s, "PRAGMA synchronous = OFF;");
461463
if (rc != CBM_STORE_OK) {
462464
return rc;
463465
}
464466
return exec_sql(s, "PRAGMA cache_size = -65536;"); /* 64 MB */
465467
}
466468

467469
int cbm_store_end_bulk(cbm_store_t *s) {
468-
int rc = exec_sql(s, "PRAGMA journal_mode = WAL;");
469-
if (rc != CBM_STORE_OK) {
470-
return rc;
471-
}
472-
rc = exec_sql(s, "PRAGMA synchronous = NORMAL;");
470+
int rc = exec_sql(s, "PRAGMA synchronous = NORMAL;");
473471
if (rc != CBM_STORE_OK) {
474472
return rc;
475473
}

src/store/store.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,11 @@ int cbm_store_rollback(cbm_store_t *s);
212212

213213
/* ── Bulk write optimization ────────────────────────────────────── */
214214

215-
/* Switch to MEMORY journal for maximum write throughput. */
215+
/* Tune pragmas for bulk write throughput (synchronous=OFF, large cache).
216+
* WAL journal mode is preserved throughout for crash safety. */
216217
int cbm_store_begin_bulk(cbm_store_t *s);
217218

218-
/* Restore WAL journal mode after bulk writes. */
219+
/* Restore normal pragmas (synchronous=NORMAL, default cache) after bulk writes. */
219220
int cbm_store_end_bulk(cbm_store_t *s);
220221

221222
/* Drop user indexes for faster bulk inserts. */

tests/test_main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ extern void suite_sqlite_writer(void);
3737
extern void suite_go_lsp(void);
3838
extern void suite_c_lsp(void);
3939
extern void suite_store_arch(void);
40+
extern void suite_store_bulk(void);
4041
extern void suite_httplink(void);
4142
extern void suite_traces(void);
4243
extern void suite_configlink(void);
@@ -69,6 +70,7 @@ int main(void) {
6970
RUN_SUITE(store_nodes);
7071
RUN_SUITE(store_edges);
7172
RUN_SUITE(store_search);
73+
RUN_SUITE(store_bulk);
7274

7375
/* Cypher (M6) */
7476
RUN_SUITE(cypher);

tests/test_store_bulk.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
/*
2+
* test_store_bulk.c — Crash-safety tests for bulk write mode.
3+
*
4+
* Verifies that cbm_store_begin_bulk / cbm_store_end_bulk never switch away
5+
* from WAL journal mode. Switching to MEMORY journal mode during bulk writes
6+
* makes the database unrecoverable on a crash because the in-memory rollback
7+
* journal is lost. WAL mode is inherently crash-safe: uncommitted WAL entries
8+
* are discarded on the next open.
9+
*
10+
* Tests:
11+
* bulk_pragma_wal_invariant — journal_mode stays "wal" after begin_bulk
12+
* bulk_pragma_end_wal_invariant — journal_mode stays "wal" after end_bulk
13+
* bulk_crash_recovery — DB is readable after simulated crash mid-bulk
14+
*/
15+
#include "test_framework.h"
16+
#include <store/store.h>
17+
#include <sqlite3.h>
18+
#include <stdio.h>
19+
#include <stdlib.h>
20+
#include <string.h>
21+
#ifndef _WIN32
22+
#include <unistd.h>
23+
#include <sys/wait.h>
24+
#endif
25+
26+
/* ── Helpers ──────────────────────────────────────────────────── */
27+
28+
/* Query journal_mode via a separate read-only connection so the result is
29+
* independent of any state held inside the cbm_store_t under test. */
30+
static char *get_journal_mode(const char *db_path) {
31+
sqlite3 *db;
32+
if (sqlite3_open_v2(db_path, &db, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
33+
return NULL;
34+
sqlite3_stmt *stmt;
35+
char *mode = NULL;
36+
if (sqlite3_prepare_v2(db, "PRAGMA journal_mode;", -1, &stmt, NULL) == SQLITE_OK) {
37+
if (sqlite3_step(stmt) == SQLITE_ROW)
38+
mode = strdup((const char *)sqlite3_column_text(stmt, 0));
39+
sqlite3_finalize(stmt);
40+
}
41+
sqlite3_close(db);
42+
return mode;
43+
}
44+
45+
static void make_temp_path(char *buf, size_t n) {
46+
snprintf(buf, n, "/tmp/cmm_bulk_test_%d.db", (int)getpid());
47+
}
48+
49+
static void cleanup_db(const char *path) {
50+
remove(path);
51+
char aux[512];
52+
snprintf(aux, sizeof(aux), "%s-wal", path);
53+
remove(aux);
54+
snprintf(aux, sizeof(aux), "%s-shm", path);
55+
remove(aux);
56+
}
57+
58+
/* ── Tests ──────────────────────────────────────────────────────── */
59+
60+
/* begin_bulk must NOT switch journal_mode away from WAL. */
61+
TEST(bulk_pragma_wal_invariant) {
62+
char db_path[256];
63+
make_temp_path(db_path, sizeof(db_path));
64+
cleanup_db(db_path);
65+
66+
cbm_store_t *s = cbm_store_open_path(db_path);
67+
ASSERT_NOT_NULL(s);
68+
69+
char *before = get_journal_mode(db_path);
70+
ASSERT_NOT_NULL(before);
71+
ASSERT_STR_EQ(before, "wal");
72+
free(before);
73+
74+
int rc = cbm_store_begin_bulk(s);
75+
ASSERT_EQ(rc, CBM_STORE_OK);
76+
77+
char *after = get_journal_mode(db_path);
78+
ASSERT_NOT_NULL(after);
79+
ASSERT_STR_EQ(after, "wal"); /* FAILS with bug, PASSES with fix */
80+
free(after);
81+
82+
cbm_store_end_bulk(s);
83+
cbm_store_close(s);
84+
cleanup_db(db_path);
85+
PASS();
86+
}
87+
88+
/* end_bulk must also leave journal_mode as WAL. */
89+
TEST(bulk_pragma_end_wal_invariant) {
90+
char db_path[256];
91+
make_temp_path(db_path, sizeof(db_path));
92+
cleanup_db(db_path);
93+
94+
cbm_store_t *s = cbm_store_open_path(db_path);
95+
ASSERT_NOT_NULL(s);
96+
97+
cbm_store_begin_bulk(s);
98+
cbm_store_end_bulk(s);
99+
100+
char *mode = get_journal_mode(db_path);
101+
ASSERT_NOT_NULL(mode);
102+
ASSERT_STR_EQ(mode, "wal");
103+
free(mode);
104+
105+
cbm_store_close(s);
106+
cleanup_db(db_path);
107+
PASS();
108+
}
109+
110+
/* Simulate a crash mid-bulk-write: fork a child that calls begin_bulk, opens
111+
* an explicit transaction, and then calls _exit() without committing or calling
112+
* end_bulk. The parent verifies the database is still openable and that
113+
* committed baseline data is intact and uncommitted data is absent.
114+
*
115+
* This test uses fork()/waitpid() and is therefore POSIX-only. */
116+
#ifndef _WIN32
117+
TEST(bulk_crash_recovery) {
118+
char db_path[256];
119+
make_temp_path(db_path, sizeof(db_path));
120+
cleanup_db(db_path);
121+
122+
/* Write committed baseline data. */
123+
cbm_store_t *s = cbm_store_open_path(db_path);
124+
ASSERT_NOT_NULL(s);
125+
int rc = cbm_store_upsert_project(s, "baseline", "/tmp/baseline");
126+
ASSERT_EQ(rc, CBM_STORE_OK);
127+
cbm_store_close(s);
128+
129+
/* Child: enter bulk mode, start a transaction, write, then crash. */
130+
pid_t pid = fork();
131+
if (pid == 0) {
132+
cbm_store_t *cs = cbm_store_open_path(db_path);
133+
if (!cs)
134+
_exit(1);
135+
cbm_store_begin_bulk(cs);
136+
cbm_store_begin(cs); /* explicit open transaction */
137+
cbm_store_upsert_project(cs, "crashed", "/tmp/crashed");
138+
/* Crash: no COMMIT, no end_bulk, no close. */
139+
_exit(0);
140+
}
141+
ASSERT_GT(pid, 0);
142+
int status;
143+
waitpid(pid, &status, 0);
144+
/* Confirm child exited normally so the write actually occurred. */
145+
ASSERT(WIFEXITED(status) && WEXITSTATUS(status) == 0);
146+
147+
/* Recovery: database must open cleanly. */
148+
cbm_store_t *recovered = cbm_store_open_path(db_path);
149+
ASSERT_NOT_NULL(recovered); /* NULL would indicate corruption */
150+
151+
/* Baseline commit must survive. */
152+
cbm_project_t p = {0};
153+
rc = cbm_store_get_project(recovered, "baseline", &p);
154+
ASSERT_EQ(rc, CBM_STORE_OK);
155+
ASSERT_STR_EQ(p.name, "baseline");
156+
cbm_project_free_fields(&p);
157+
158+
/* Uncommitted "crashed" write must NOT appear after recovery. */
159+
cbm_project_t p2 = {0};
160+
int rc2 = cbm_store_get_project(recovered, "crashed", &p2);
161+
ASSERT_NEQ(rc2, CBM_STORE_OK); /* row must be absent */
162+
163+
cbm_store_close(recovered);
164+
cleanup_db(db_path);
165+
PASS();
166+
}
167+
#endif /* _WIN32 */
168+
169+
/* ── Suite ──────────────────────────────────────────────────────── */
170+
171+
SUITE(store_bulk) {
172+
RUN_TEST(bulk_pragma_wal_invariant);
173+
RUN_TEST(bulk_pragma_end_wal_invariant);
174+
#ifndef _WIN32
175+
RUN_TEST(bulk_crash_recovery);
176+
#endif
177+
}

0 commit comments

Comments
 (0)