Skip to content

Commit a1d849a

Browse files
committed
Fix all memory leaks, vendor sqlite3, add Docker test infrastructure
Leak fixes (13 sources, 1.4MB → 0 bytes): - Cypher: free col_names after rb_set_columns, free rejected inline prop nodes, store WITH aliases in qualified_name for auto-cleanup - Store: add cbm_project_free_fields, fix find_node_ids_by_qns leak - Graph buffer: free strdup'd hash keys during delete operations - SQL scanner: free old start_tag before reassignment in deserialize Platform independence: - Vendor sqlite3 amalgamation (3.49.1) — eliminates system libsqlite3 dependency, enables full ASan/LeakSanitizer instrumentation - Add -I for vendored tree-sitter unicode headers — eliminates hidden dependency on system libicu-dev - Add test-infrastructure/ with Docker Compose for local GCC+ASan testing that mirrors CI exactly
1 parent bc66541 commit a1d849a

15 files changed

Lines changed: 261887 additions & 530 deletions

File tree

Makefile.cbm

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
# internal/cbm/vendored/ts_runtime/include — tree-sitter API
2222
CBM_DIR = internal/cbm
2323
TS_INCLUDE = $(CBM_DIR)/vendored/ts_runtime/include
24+
# Vendored tree-sitter src/ contains unicode/ headers (umachine.h, utf.h, utf8.h).
25+
# This ensures we use our vendored copies instead of requiring system libicu-dev.
26+
TS_SRC = $(CBM_DIR)/vendored/ts_runtime/src
2427

2528
# ── Optional libgit2 (faster git history parsing) ────────────────
2629
# Auto-detected via pkg-config. Falls back to popen("git log ...") if absent.
@@ -71,9 +74,9 @@ CFLAGS_TSAN = $(CFLAGS_COMMON) -g -O1 \
7174
CXXFLAGS_TSAN = $(CXXFLAGS_COMMON) -g -O1 \
7275
-fsanitize=thread -fno-omit-frame-pointer
7376

74-
LDFLAGS = -lm -lstdc++ -lpthread -lsqlite3 -lz $(LIBGIT2_LIBS)
75-
LDFLAGS_TEST = -lm -lstdc++ -lpthread -lsqlite3 -lz -fsanitize=address,undefined $(LIBGIT2_LIBS)
76-
LDFLAGS_TSAN = -lm -lstdc++ -lpthread -lsqlite3 -lz -fsanitize=thread $(LIBGIT2_LIBS)
77+
LDFLAGS = -lm -lstdc++ -lpthread -lz $(LIBGIT2_LIBS)
78+
LDFLAGS_TEST = -lm -lstdc++ -lpthread -lz -fsanitize=address,undefined $(LIBGIT2_LIBS)
79+
LDFLAGS_TSAN = -lm -lstdc++ -lpthread -lz -fsanitize=thread $(LIBGIT2_LIBS)
7780

7881
# ── Source files ─────────────────────────────────────────────────
7982

@@ -200,6 +203,11 @@ MIMALLOC_CFLAGS_TEST = -std=c11 -g -O1 -w \
200203
-Ivendored/mimalloc/src \
201204
-DMI_OVERRIDE=0
202205

206+
# sqlite3 (vendored amalgamation — compiled ourselves for ASan instrumentation)
207+
SQLITE3_SRC = vendored/sqlite3/sqlite3.c
208+
SQLITE3_CFLAGS = -std=c11 -O2 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1
209+
SQLITE3_CFLAGS_TEST = -std=c11 -g -O1 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1
210+
203211
# yyjson (vendored)
204212
YYJSON_SRC = vendored/yyjson/yyjson.c
205213

@@ -276,10 +284,10 @@ BUILD_DIR = build/c
276284
# ── Object file compilation (grammars need relaxed warnings) ─────
277285

278286
# Grammar + tree-sitter runtime: compiled without -Werror (upstream code has warnings)
279-
GRAMMAR_CFLAGS = -std=c11 -D_DEFAULT_SOURCE -O2 -w -I$(CBM_DIR) -I$(TS_INCLUDE)
280-
GRAMMAR_CFLAGS_TEST = -std=c11 -D_DEFAULT_SOURCE -g -O1 -w -I$(CBM_DIR) -I$(TS_INCLUDE) \
287+
GRAMMAR_CFLAGS = -std=c11 -D_DEFAULT_SOURCE -O2 -w -I$(CBM_DIR) -I$(TS_INCLUDE) -I$(TS_SRC)
288+
GRAMMAR_CFLAGS_TEST = -std=c11 -D_DEFAULT_SOURCE -g -O1 -w -I$(CBM_DIR) -I$(TS_INCLUDE) -I$(TS_SRC) \
281289
-fsanitize=address,undefined -fno-omit-frame-pointer
282-
GRAMMAR_CFLAGS_TSAN = -std=c11 -D_DEFAULT_SOURCE -g -O1 -w -I$(CBM_DIR) -I$(TS_INCLUDE) \
290+
GRAMMAR_CFLAGS_TSAN = -std=c11 -D_DEFAULT_SOURCE -g -O1 -w -I$(CBM_DIR) -I$(TS_INCLUDE) -I$(TS_SRC) \
283291
-fsanitize=thread -fno-omit-frame-pointer
284292

285293
# Object files for grammars + ts_runtime + lsp_all + preprocessor
@@ -339,7 +347,17 @@ $(BUILD_DIR)/mimalloc.o: $(MIMALLOC_SRC) | $(BUILD_DIR)
339347
$(BUILD_DIR)/prod_mimalloc.o: $(MIMALLOC_SRC) | $(BUILD_DIR)
340348
$(CC) $(MIMALLOC_CFLAGS) -c -o $@ $<
341349

342-
OBJS_VENDORED_TEST = $(MIMALLOC_OBJ_TEST) $(GRAMMAR_OBJS_TEST) $(TS_RUNTIME_OBJ_TEST) $(LSP_OBJ_TEST) $(PP_OBJ_TEST) $(MONGOOSE_OBJ_TEST)
350+
# sqlite3 object files (vendored amalgamation)
351+
SQLITE3_OBJ_TEST = $(BUILD_DIR)/sqlite3.o
352+
SQLITE3_OBJ_PROD = $(BUILD_DIR)/prod_sqlite3.o
353+
354+
$(BUILD_DIR)/sqlite3.o: $(SQLITE3_SRC) | $(BUILD_DIR)
355+
$(CC) $(SQLITE3_CFLAGS_TEST) -fsanitize=address,undefined -fno-omit-frame-pointer -c -o $@ $<
356+
357+
$(BUILD_DIR)/prod_sqlite3.o: $(SQLITE3_SRC) | $(BUILD_DIR)
358+
$(CC) $(SQLITE3_CFLAGS) -c -o $@ $<
359+
360+
OBJS_VENDORED_TEST = $(MIMALLOC_OBJ_TEST) $(SQLITE3_OBJ_TEST) $(GRAMMAR_OBJS_TEST) $(TS_RUNTIME_OBJ_TEST) $(LSP_OBJ_TEST) $(PP_OBJ_TEST) $(MONGOOSE_OBJ_TEST)
343361

344362
$(BUILD_DIR)/test-runner: $(ALL_TEST_SRCS) $(PROD_SRCS) $(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(SQLITE_WRITER_SRC) $(OBJS_VENDORED_TEST) | $(BUILD_DIR)
345363
$(CC) $(CFLAGS_TEST) -o $@ \
@@ -376,7 +394,7 @@ $(BUILD_DIR)/prod_lsp_all.o: $(CBM_DIR)/lsp_all.c | $(BUILD_DIR)
376394
$(BUILD_DIR)/prod_preprocessor.o: $(CBM_DIR)/preprocessor.cpp | $(BUILD_DIR)
377395
$(CXX) $(CXXFLAGS_PROD) -w -I$(CBM_DIR)/vendored -c -o $@ $<
378396

379-
OBJS_VENDORED_PROD = $(MIMALLOC_OBJ_PROD) $(GRAMMAR_OBJS_PROD) $(TS_RUNTIME_OBJ_PROD) $(LSP_OBJ_PROD) $(PP_OBJ_PROD) $(MONGOOSE_OBJ_PROD)
397+
OBJS_VENDORED_PROD = $(MIMALLOC_OBJ_PROD) $(SQLITE3_OBJ_PROD) $(GRAMMAR_OBJS_PROD) $(TS_RUNTIME_OBJ_PROD) $(LSP_OBJ_PROD) $(PP_OBJ_PROD) $(MONGOOSE_OBJ_PROD)
380398

381399
MAIN_SRC = src/main.c
382400

internal/cbm/vendored/grammars/sql/scanner.c

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cypher/cypher.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2052,7 +2052,7 @@ static void scan_pattern_nodes(cbm_store_t *store, const char *project, int max_
20522052
}
20532053
cbm_store_search_free(&sout);
20542054
}
2055-
/* Apply inline property filters */
2055+
/* Apply inline property filters — free rejected nodes' strings */
20562056
if (first->prop_count > 0) {
20572057
int kept = 0;
20582058
for (int i = 0; i < *out_count; i++) {
@@ -2061,6 +2061,8 @@ static void scan_pattern_nodes(cbm_store_t *store, const char *project, int max_
20612061
(*out_nodes)[kept] = (*out_nodes)[i];
20622062
}
20632063
kept++;
2064+
} else {
2065+
node_fields_free(&(*out_nodes)[i]);
20642066
}
20652067
}
20662068
*out_count = kept;
@@ -2646,17 +2648,20 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
26462648
} else {
26472649
snprintf(vbuf, sizeof(vbuf), "%d", aggs[a].counts[ci]);
26482650
}
2649-
/* Store as a "virtual node" with the value in name */
2650-
cbm_node_t vn = {.name = heap_strdup(vbuf)};
2651+
/* Store as a "virtual node" with the value in name,
2652+
* alias in qualified_name (freed by node_fields_free). */
2653+
cbm_node_t vn = {.name = heap_strdup(vbuf),
2654+
.qualified_name = heap_strdup(alias)};
26512655
if (vb.var_count < 16) {
2652-
vb.var_names[vb.var_count] = heap_strdup(alias);
2656+
vb.var_names[vb.var_count] = vn.qualified_name;
26532657
vb.var_nodes[vb.var_count] = vn;
26542658
vb.var_count++;
26552659
}
26562660
} else {
2657-
cbm_node_t vn = {.name = heap_strdup(aggs[a].group_vals[ci])};
2661+
cbm_node_t vn = {.name = heap_strdup(aggs[a].group_vals[ci]),
2662+
.qualified_name = heap_strdup(alias)};
26582663
if (vb.var_count < 16) {
2659-
vb.var_names[vb.var_count] = heap_strdup(alias);
2664+
vb.var_names[vb.var_count] = vn.qualified_name;
26602665
vb.var_nodes[vb.var_count] = vn;
26612666
vb.var_count++;
26622667
}
@@ -2695,9 +2700,10 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
26952700
char func_buf[512];
26962701
const char *val =
26972702
project_item(&bindings[bi], &wc->items[ci], func_buf, sizeof(func_buf));
2698-
cbm_node_t vn = {.name = heap_strdup(val)};
2703+
cbm_node_t vn = {.name = heap_strdup(val),
2704+
.qualified_name = heap_strdup(alias)};
26992705
if (vb.var_count < 16) {
2700-
vb.var_names[vb.var_count] = heap_strdup(alias);
2706+
vb.var_names[vb.var_count] = vn.qualified_name;
27012707
vb.var_nodes[vb.var_count] = vn;
27022708
vb.var_count++;
27032709
}
@@ -2875,6 +2881,14 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
28752881
}
28762882
}
28772883
rb_set_columns(rb, col_names, ret->count);
2884+
/* Free heap_strdup'd column names (rb_set_columns made its own copies).
2885+
* Only func/property branches heap-allocate; alias takes priority. */
2886+
for (int i = 0; i < ret->count && i < 32; i++) {
2887+
cbm_return_item_t *item = &ret->items[i];
2888+
if (!item->alias && (item->func || (!item->kase && item->property))) {
2889+
free((void *)col_names[i]);
2890+
}
2891+
}
28782892

28792893
if (has_agg) {
28802894
/* Generalized aggregation: COUNT, SUM, AVG, MIN, MAX, COLLECT */

src/graph_buffer/graph_buffer.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,10 @@ int cbm_gbuf_delete_by_label(cbm_gbuf_t *gb, const char *label) {
422422

423423
/* Remove from primary indexes */
424424
cbm_ht_delete(gb->node_by_qn, n->qualified_name);
425-
void *removed_key = cbm_ht_delete(gb->node_by_id, id_buf);
426-
(void)removed_key;
425+
/* Free the strdup'd key stored in node_by_id (get before delete) */
426+
const char *stored_key = cbm_ht_get_key(gb->node_by_id, id_buf);
427+
cbm_ht_delete(gb->node_by_id, id_buf);
428+
free((void *)stored_key);
427429
}
428430

429431
/* Clear the label array */
@@ -442,11 +444,12 @@ int cbm_gbuf_delete_by_label(cbm_gbuf_t *gb, const char *label) {
442444
make_id_key(tgt_id, sizeof(tgt_id), e->target_id);
443445

444446
if (cbm_ht_get(deleted_set, src_id) || cbm_ht_get(deleted_set, tgt_id)) {
445-
/* Remove from edge dedup index */
447+
/* Remove from edge dedup index (free the strdup'd key) */
446448
char key[EDGE_KEY_BUF];
447449
make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type);
448-
void *k = cbm_ht_delete(gb->edge_by_key, key);
449-
(void)k;
450+
const char *ekey = cbm_ht_get_key(gb->edge_by_key, key);
451+
cbm_ht_delete(gb->edge_by_key, key);
452+
free((void *)ekey);
450453

451454
/* Remove from secondary indexes incrementally */
452455
make_src_type_key(key, sizeof(key), e->source_id, e->type);
@@ -490,6 +493,7 @@ int cbm_gbuf_delete_by_label(cbm_gbuf_t *gb, const char *label) {
490493
}
491494
gb->edges.count = write_idx;
492495

496+
cbm_ht_foreach(deleted_set, free_key_only, NULL);
493497
cbm_ht_free(deleted_set);
494498
return 0;
495499
}
@@ -636,8 +640,9 @@ int cbm_gbuf_delete_edges_by_type(cbm_gbuf_t *gb, const char *type) {
636640
if (strcmp(e->type, type) == 0) {
637641
char key[EDGE_KEY_BUF];
638642
make_edge_key(key, sizeof(key), e->source_id, e->target_id, e->type);
639-
void *k = cbm_ht_delete(gb->edge_by_key, key);
640-
(void)k;
643+
const char *ekey = cbm_ht_get_key(gb->edge_by_key, key);
644+
cbm_ht_delete(gb->edge_by_key, key);
645+
free((void *)ekey);
641646
free_edge_strings(e);
642647
free(e);
643648
} else {

src/store/store.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ int cbm_store_find_node_ids_by_qns(cbm_store_t *s, const char *project, const ch
731731
memset(out_ids, 0, (size_t)qn_count * sizeof(int64_t));
732732

733733
int found = 0;
734-
cbm_node_t node;
734+
cbm_node_t node = {0};
735735
for (int i = 0; i < qn_count; i++) {
736736
if (!qns[i]) {
737737
continue;
@@ -740,9 +740,10 @@ int cbm_store_find_node_ids_by_qns(cbm_store_t *s, const char *project, const ch
740740
if (rc == CBM_STORE_OK) {
741741
out_ids[i] = node.id;
742742
found++;
743+
cbm_node_free_fields(&node);
744+
memset(&node, 0, sizeof(node));
743745
}
744746
}
745-
// NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
746747
return found;
747748
}
748749

@@ -4278,14 +4279,18 @@ void cbm_store_free_edges(cbm_edge_t *edges, int count) {
42784279
free(edges);
42794280
}
42804281

4282+
void cbm_project_free_fields(cbm_project_t *p) {
4283+
free((void *)p->name);
4284+
free((void *)p->indexed_at);
4285+
free((void *)p->root_path);
4286+
}
4287+
42814288
void cbm_store_free_projects(cbm_project_t *projects, int count) {
42824289
if (!projects) {
42834290
return;
42844291
}
42854292
for (int i = 0; i < count; i++) {
4286-
free((void *)projects[i].name);
4287-
free((void *)projects[i].indexed_at);
4288-
free((void *)projects[i].root_path);
4293+
cbm_project_free_fields(&projects[i]);
42894294
}
42904295
free(projects);
42914296
}

src/store/store.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,9 @@ int cbm_louvain(const int64_t *nodes, int node_count, const cbm_louvain_edge_t *
567567
/* Free heap-allocated strings in a stack-allocated node (does NOT free the node itself). */
568568
void cbm_node_free_fields(cbm_node_t *n);
569569

570+
/* Free heap-allocated strings in a stack-allocated project (does NOT free the project itself). */
571+
void cbm_project_free_fields(cbm_project_t *p);
572+
570573
/* Free an array of nodes returned by find_nodes_by_* functions. */
571574
void cbm_store_free_nodes(cbm_node_t *nodes, int count);
572575

test-infrastructure/Dockerfile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Mirrors the Ubuntu CI environment exactly:
2+
# - Ubuntu 24.04 (same as GitHub Actions ubuntu-latest / ubuntu-24.04-arm)
3+
# - GCC (system default) with ASan + UBSan + LeakSanitizer
4+
# - libsqlite3-dev + zlib1g-dev (same as CI "Install deps" step)
5+
#
6+
# Build: docker build -t cbm-test test-infrastructure/
7+
# Run: docker run --rm -v $(pwd):/src cbm-test
8+
9+
FROM ubuntu:noble
10+
11+
# Minimal: gcc + zlib only. sqlite3 is vendored (compiled from source with ASan).
12+
RUN apt-get update && apt-get install -y --no-install-recommends \
13+
gcc g++ make \
14+
zlib1g-dev \
15+
pkg-config \
16+
&& rm -rf /var/lib/apt/lists/*
17+
18+
WORKDIR /src
19+
20+
# Default: run test.sh with GCC (mirrors CI exactly)
21+
ENTRYPOINT ["scripts/test.sh"]
22+
CMD ["CC=gcc", "CXX=g++"]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Lint environment — mirrors CI lint job exactly:
2+
# - clang-format-20 (from LLVM apt repo)
3+
# - cppcheck 2.20.0 (built from source, same as CI)
4+
#
5+
# Build: docker build -t cbm-lint -f test-infrastructure/Dockerfile.lint test-infrastructure/
6+
# Run: docker run --rm -v $(pwd):/src cbm-lint
7+
8+
FROM ubuntu:noble
9+
10+
RUN apt-get update && apt-get install -y --no-install-recommends \
11+
gcc g++ make cmake \
12+
libsqlite3-dev zlib1g-dev \
13+
pkg-config wget gnupg git ca-certificates \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
# clang-format-20 (same version as CI)
17+
RUN wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc \
18+
&& echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main" > /etc/apt/sources.list.d/llvm-20.list \
19+
&& apt-get update && apt-get install -y --no-install-recommends clang-format-20 \
20+
&& rm -rf /var/lib/apt/lists/*
21+
22+
# cppcheck 2.20.0 (same version as CI)
23+
RUN git clone --depth 1 --branch 2.20.0 https://github.com/danmar/cppcheck.git /tmp/cppcheck \
24+
&& cmake -S /tmp/cppcheck -B /tmp/cppcheck/build -DCMAKE_BUILD_TYPE=Release -DHAVE_RULES=OFF -DCMAKE_INSTALL_PREFIX=/usr/local \
25+
&& cmake --build /tmp/cppcheck/build -j$(nproc) \
26+
&& cmake --install /tmp/cppcheck/build \
27+
&& rm -rf /tmp/cppcheck
28+
29+
WORKDIR /src
30+
31+
ENTRYPOINT ["scripts/lint.sh"]
32+
CMD ["CLANG_FORMAT=clang-format-20"]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Local Ubuntu test environment — mirrors GitHub Actions CI exactly.
2+
#
3+
# Usage:
4+
# docker compose -f test-infrastructure/docker-compose.yml run --rm test
5+
# docker compose -f test-infrastructure/docker-compose.yml run --rm lint
6+
#
7+
# Shortcut (from repo root):
8+
# ./test-infrastructure/run.sh # test
9+
# ./test-infrastructure/run.sh lint # lint only
10+
11+
services:
12+
test:
13+
build:
14+
context: ..
15+
dockerfile: test-infrastructure/Dockerfile
16+
volumes:
17+
- ..:/src
18+
# GCC + ASan + UBSan + LeakSanitizer (exactly what CI runs)
19+
command: ["CC=gcc", "CXX=g++"]
20+
21+
lint:
22+
build:
23+
context: ..
24+
dockerfile: test-infrastructure/Dockerfile.lint
25+
volumes:
26+
- ..:/src

test-infrastructure/run.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/bash
2+
# Quick runner for local Docker-based CI testing.
3+
#
4+
# Usage:
5+
# ./test-infrastructure/run.sh # Run GCC tests (ASan + LeakSanitizer)
6+
# ./test-infrastructure/run.sh lint # Run linters (clang-format + cppcheck)
7+
# ./test-infrastructure/run.sh both # Lint then test
8+
# ./test-infrastructure/run.sh shell # Drop into container shell for debugging
9+
10+
set -euo pipefail
11+
12+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
13+
COMPOSE="docker compose -f $ROOT/test-infrastructure/docker-compose.yml"
14+
15+
case "${1:-test}" in
16+
test)
17+
echo "=== Running GCC + ASan + LeakSanitizer tests (mirrors Ubuntu CI) ==="
18+
$COMPOSE run --rm test
19+
;;
20+
lint)
21+
echo "=== Running linters (clang-format-20 + cppcheck 2.20.0) ==="
22+
$COMPOSE run --rm lint
23+
;;
24+
both)
25+
echo "=== Running linters ==="
26+
$COMPOSE run --rm lint
27+
echo "=== Running tests ==="
28+
$COMPOSE run --rm test
29+
;;
30+
shell)
31+
echo "=== Dropping into test container shell ==="
32+
$COMPOSE run --rm --entrypoint bash test
33+
;;
34+
*)
35+
echo "Usage: $0 {test|lint|both|shell}"
36+
exit 1
37+
;;
38+
esac

0 commit comments

Comments
 (0)