Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 88 additions & 22 deletions src/cypher/cypher.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,30 @@ static void lex_string_literal(const char *input, int len, int *pos, char quote,
int start = *pos;
char buf[CBM_SZ_4K];
int blen = 0;
const int max_blen = CBM_SZ_4K - 1;
while (*pos < len && input[*pos] != quote) {
if (input[*pos] == '\\' && *pos + SKIP_ONE < len) {
(*pos)++;
switch (input[*pos]) {
case 'n':
buf[blen++] = '\n';
break;
case 't':
buf[blen++] = '\t';
break;
case '\\':
buf[blen++] = '\\';
break;
default:
buf[blen++] = input[*pos];
break;
if (blen < max_blen) {
switch (input[*pos]) {
case 'n':
buf[blen++] = '\n';
break;
case 't':
buf[blen++] = '\t';
break;
case '\\':
buf[blen++] = '\\';
break;
default:
buf[blen++] = input[*pos];
break;
}
}
} else {
buf[blen++] = input[*pos];
if (blen < max_blen) {
buf[blen++] = input[*pos];
}
}
(*pos)++;
}
Expand Down Expand Up @@ -469,6 +474,9 @@ static int parse_props(parser_t *p, cbm_prop_filter_t **out, int *count) {
int cap = CYP_INIT_CAP4;
int n = 0;
cbm_prop_filter_t *arr = malloc(cap * sizeof(cbm_prop_filter_t));
if (!arr) {
return CBM_NOT_FOUND;
}
Comment on lines 474 to +479
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds an initial malloc NULL-check, but the function still calls safe_realloc and heap_strdup later without checking for NULL. Since safe_realloc returns NULL (and frees the old pointer), an OOM during growth or key/value duplication can still lead to NULL dereferences (e.g., later strcmp on a NULL props[i].value). Consider propagating an error if any allocation/duplication fails and freeing partially-built arr.

Copilot uses AI. Check for mistakes.

while (!check(p, TOK_RBRACE) && !check(p, TOK_EOF)) {
const cbm_token_t *key = expect(p, TOK_IDENT);
Expand All @@ -487,8 +495,18 @@ static int parse_props(parser_t *p, cbm_prop_filter_t **out, int *count) {
}

if (n >= cap) {
cap *= PAIR_LEN;
arr = safe_realloc(arr, cap * sizeof(cbm_prop_filter_t));
int new_cap = cap * PAIR_LEN;
void *tmp = realloc(arr, new_cap * sizeof(cbm_prop_filter_t));
if (!tmp) {
for (int i = 0; i < n; i++) {
free((void *)arr[i].key);
free((void *)arr[i].value);
}
free(arr);
return CBM_NOT_FOUND;
}
Comment on lines 497 to +507
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

safe_realloc frees the original buffer on failure (see platform.h), so if this growth realloc fails you return with arr already freed but any previously heap_strdup'd keys/values are leaked (and you can't clean them up anymore). Use a non-destructive realloc pattern (tmp=realloc; on failure, free arr[i].key/value for i<n, then free(arr)) so OOM handling is leak-free.

Copilot uses AI. Check for mistakes.
arr = tmp;
cap = new_cap;
}
arr[n].key = heap_strdup(key->text);
arr[n].value = heap_strdup(val->text);
Comment on lines +508 to 512
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_props assigns arr[n].key/value from heap_strdup without checking for NULL. If heap_strdup fails, later code like check_inline_props() will call strcmp(props[i].value, ...) and can segfault. After heap_strdup, validate the returned pointers; on failure, free any previously allocated entries (and arr itself) and return an error.

Copilot uses AI. Check for mistakes.
Expand Down Expand Up @@ -569,6 +587,9 @@ static int parse_rel_types(parser_t *p, cbm_rel_pattern_t *out) {
int cap = CYP_INIT_CAP4;
int n = 0;
const char **types = malloc(cap * sizeof(const char *));
if (!types) {
return CBM_NOT_FOUND;
}
Comment on lines 587 to +592
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds an initial malloc NULL-check, but types can still become NULL if a later safe_realloc fails (it returns NULL and frees the old buffer), and heap_strdup can return NULL as well. To fully prevent OOM crashes, please check realloc/duplication results, free any already-duplicated strings on failure, and return an error.

Copilot uses AI. Check for mistakes.

const cbm_token_t *t = expect(p, TOK_IDENT);
if (!t) {
Expand All @@ -587,8 +608,17 @@ static int parse_rel_types(parser_t *p, cbm_rel_pattern_t *out) {
return CBM_NOT_FOUND;
}
if (n >= cap) {
cap *= PAIR_LEN;
types = safe_realloc(types, cap * sizeof(const char *));
int new_cap = cap * PAIR_LEN;
void *tmp = realloc(types, new_cap * sizeof(const char *));
if (!tmp) {
for (int i = 0; i < n; i++) {
free((void *)types[i]);
}
free(types);
return CBM_NOT_FOUND;
}
Comment on lines 610 to +619
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same safe_realloc issue as in parse_props: on realloc failure the types array is freed, but already-duplicated strings in types[0..n-1] leak. Prefer realloc into a temporary pointer so you can free the existing types[i] strings and the array on failure.

Copilot uses AI. Check for mistakes.
types = (const char **)tmp;
cap = new_cap;
}
types[n++] = heap_strdup(t->text);
}
Comment on lines +620 to 624
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_rel_types stores relationship type strings via heap_strdup but never checks for NULL. If an allocation fails, rel->types will contain NULL and later strcmp/SQLite binding paths can dereference it. After heap_strdup, check for NULL and, if needed, free any previously duplicated types and return an error.

Copilot uses AI. Check for mistakes.
Expand Down Expand Up @@ -762,14 +792,32 @@ static cbm_expr_t *parse_in_list(parser_t *p, cbm_condition_t *c) {
int vcap = CYP_INIT_CAP8;
int vn = 0;
const char **vals = malloc(vcap * sizeof(const char *));
if (!vals) {
free((void *)c->variable);
free((void *)c->property);
free((void *)c->op);
return NULL;
Comment on lines 792 to +799
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to check the initial malloc, but the list-growth path later uses safe_realloc without checking for NULL (it frees the old pointer on failure), and each element is populated via heap_strdup which may return NULL. Either can still produce a NULL dereference later when evaluating IN conditions (e.g., strcmp(actual, c->in_values[i])). Please add checks and clean up partial allocations on failure.

Copilot uses AI. Check for mistakes.
}
while (!check(p, TOK_RBRACKET) && !check(p, TOK_EOF)) {
if (vn > 0) {
match(p, TOK_COMMA);
}
if (check(p, TOK_STRING) || check(p, TOK_NUMBER)) {
if (vn >= vcap) {
vcap *= PAIR_LEN;
vals = safe_realloc(vals, vcap * sizeof(const char *));
int new_vcap = vcap * PAIR_LEN;
void *tmp = realloc((void *)vals, new_vcap * sizeof(const char *));
if (!tmp) {
for (int i = 0; i < vn; i++) {
free((void *)vals[i]);
}
free((void *)vals);
free((void *)c->variable);
free((void *)c->property);
free((void *)c->op);
return NULL;
}
vals = (const char **)tmp;
vcap = new_vcap;
}
vals[vn++] = heap_strdup(advance(p)->text);
} else {
Comment on lines +818 to 823
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_in_list appends list items via heap_strdup without checking for NULL. eval_condition later does strcmp(actual, c->in_values[i]) and will crash if any entry is NULL. Check heap_strdup’s return; on failure, free vals (including already-duplicated strings) and c’s allocated fields, then return NULL with an error set.

Copilot uses AI. Check for mistakes.
Expand Down Expand Up @@ -1061,8 +1109,15 @@ static const char *parse_value_literal(parser_t *p) {
static cbm_case_expr_t *parse_case_expr(parser_t *p) {
/* CASE already consumed */
cbm_case_expr_t *kase = calloc(CBM_ALLOC_ONE, sizeof(cbm_case_expr_t));
if (!kase) {
return NULL;
}
int bcap = CYP_INIT_CAP4;
kase->branches = malloc(bcap * sizeof(cbm_case_branch_t));
if (!kase->branches) {
free(kase);
return NULL;
}
Comment on lines 1111 to +1120
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This checks calloc/malloc for the initial CASE expression allocations, but the branch-growth path later uses safe_realloc without checking for NULL (and safe_realloc frees the old buffer on failure). To avoid NULL dereferences/leaks on OOM, please check the realloc result and free kase/branches before returning an error.

Copilot uses AI. Check for mistakes.

while (check(p, TOK_WHEN)) {
advance(p);
Expand All @@ -1073,8 +1128,19 @@ static cbm_case_expr_t *parse_case_expr(parser_t *p) {
}
const char *then_val = parse_value_literal(p);
if (kase->branch_count >= bcap) {
bcap *= PAIR_LEN;
kase->branches = safe_realloc(kase->branches, bcap * sizeof(cbm_case_branch_t));
int new_bcap = bcap * PAIR_LEN;
void *tmp = realloc(kase->branches, new_bcap * sizeof(cbm_case_branch_t));
if (!tmp) {
expr_free(when);
for (int i = 0; i < kase->branch_count; i++) {
expr_free(kase->branches[i].when_expr);
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On realloc failure, the cleanup loop frees only when_expr for existing branches but not branches[i].then_val. That leaks the already-duplicated THEN values (and can be substantial for large queries) right in the OOM path this change is trying to harden. Free then_val for each existing branch in the failure cleanup before freeing branches/kase.

Suggested change
expr_free(kase->branches[i].when_expr);
expr_free(kase->branches[i].when_expr);
free((void *)kase->branches[i].then_val);

Copilot uses AI. Check for mistakes.
}
free(kase->branches);
free(kase);
return NULL;
}
Comment on lines 1130 to +1141
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On branch growth, safe_realloc frees the existing kase->branches array on failure; returning here then leaks all previously parsed when_expr trees and then_val strings already stored in the old branches (since you no longer have the array to iterate). Switch to a non-destructive realloc pattern and, on failure, call free_case_expr(kase) (or equivalent partial cleanup) before returning.

Copilot uses AI. Check for mistakes.
kase->branches = tmp;
bcap = new_bcap;
}
kase->branches[kase->branch_count++] =
(cbm_case_branch_t){.when_expr = when, .then_val = then_val};
Expand Down
22 changes: 19 additions & 3 deletions src/store/store.c
Original file line number Diff line number Diff line change
Expand Up @@ -2552,7 +2552,12 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t
const char *sql = "SELECT label, COUNT(*) FROM nodes WHERE project = ?1 GROUP BY label "
"ORDER BY COUNT(*) DESC;";
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
if (stmt) {
sqlite3_finalize(stmt);
}
return CBM_NOT_FOUND;
}
Comment on lines +2555 to +2560
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If sqlite3_prepare_v2 fails it can still leave a non-NULL statement handle in some cases (e.g., partial compile). Returning without finalizing risks leaking stmt. Consider capturing rc, and if stmt is non-NULL always sqlite3_finalize(stmt) before returning (and optionally record the SQLite error).

Copilot uses AI. Check for mistakes.
bind_text(stmt, SKIP_ONE, project);

int cap = ST_INIT_CAP_8;
Expand All @@ -2577,7 +2582,13 @@ int cbm_store_get_schema(cbm_store_t *s, const char *project, cbm_schema_info_t
const char *sql = "SELECT type, COUNT(*) FROM edges WHERE project = ?1 GROUP BY type ORDER "
"BY COUNT(*) DESC;";
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
if (stmt) {
sqlite3_finalize(stmt);
}
cbm_store_schema_free(out);
return CBM_NOT_FOUND;
}
Comment on lines +2585 to +2591
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the edge-types sqlite3_prepare_v2 fails, this returns immediately without freeing any node-label data already stored in out from the previous block. Some callers only call cbm_store_schema_free on CBM_STORE_OK (e.g., UI layout3d), so this can leak memory. Consider using a single cleanup path that frees any partially-filled out before returning an error.

Copilot uses AI. Check for mistakes.
Comment on lines +2585 to +2591
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above: on prepare failure, ensure any non-NULL stmt is finalized before returning. Right now this path can leak stmt (and you already free out, which is good).

Copilot uses AI. Check for mistakes.
bind_text(stmt, SKIP_ONE, project);

int cap = ST_INIT_CAP_8;
Expand Down Expand Up @@ -3283,7 +3294,12 @@ static bool pkg_in_list(const char *pkg, char **list, int count) {
static int collect_pkg_names(cbm_store_t *s, const char *sql, const char *project, char **pkgs,
int max_pkgs) {
sqlite3_stmt *stmt = NULL;
sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On prepare failure, consider finalizing stmt if it is non-NULL before returning to avoid leaking a partially prepared statement. Also consider recording the underlying SQLite error so callers have some diagnostics when DB corruption triggers this path.

Suggested change
if (sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL) != SQLITE_OK || !stmt) {
int rc = sqlite3_prepare_v2(s->db, sql, CBM_NOT_FOUND, &stmt, NULL);
if (rc != SQLITE_OK || !stmt) {
if (stmt != NULL) {
sqlite3_finalize(stmt);
}

Copilot uses AI. Check for mistakes.
if (stmt) {
sqlite3_finalize(stmt);
}
return CBM_NOT_FOUND;
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collect_pkg_names returns CBM_NOT_FOUND on sqlite3_prepare_v2 failure, which is the same numeric value as CBM_STORE_ERR (-1) but semantically inconsistent with other store helpers that return CBM_STORE_ERR and call store_set_error_sqlite(). Consider returning CBM_STORE_ERR (or 0 if you intend “no pkgs”) and setting the store error so downstream callers can diagnose DB corruption.

Suggested change
return CBM_NOT_FOUND;
store_set_error_sqlite(s, "sqlite3_prepare_v2 failed in collect_pkg_names");
return CBM_STORE_ERR;

Copilot uses AI. Check for mistakes.
}
bind_text(stmt, SKIP_ONE, project);
int count = 0;
while (sqlite3_step(stmt) == SQLITE_ROW && count < max_pkgs) {
Expand Down
27 changes: 27 additions & 0 deletions tests/test_cypher.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,32 @@ TEST(cypher_lex_single_quote_string) {
PASS();
}

TEST(cypher_lex_string_overflow) {
/* Build a string literal longer than 4096 bytes to verify we don't
* overflow the stack buffer in lex_string_literal. */
const int big = 5000;
/* query: "AAAA...A" (quotes included) */
char *query = malloc(big + 3); /* quote + big chars + quote + NUL */
ASSERT_NOT_NULL(query);
query[0] = '"';
memset(query + 1, 'A', big);
query[big + 1] = '"';
query[big + 2] = '\0';

cbm_lex_result_t r = {0};
int rc = cbm_lex(query, &r);
ASSERT_EQ(rc, 0);
ASSERT_NULL(r.error);
ASSERT_GTE(r.count, 1);
ASSERT_EQ(r.tokens[0].type, TOK_STRING);
/* The string should be truncated to CBM_SZ_4K - 1 (4095) characters. */
ASSERT_EQ((int)strlen(r.tokens[0].text), 4095);

cbm_lex_free(&r);
free(query);
PASS();
}

TEST(cypher_lex_number) {
cbm_lex_result_t r = {0};
int rc = cbm_lex("42 3.14", &r);
Expand Down Expand Up @@ -2064,6 +2090,7 @@ SUITE(cypher) {
RUN_TEST(cypher_lex_relationship);
RUN_TEST(cypher_lex_string_literal);
RUN_TEST(cypher_lex_single_quote_string);
RUN_TEST(cypher_lex_string_overflow);
RUN_TEST(cypher_lex_number);
RUN_TEST(cypher_lex_operators);
RUN_TEST(cypher_lex_keywords_case_insensitive);
Expand Down
Loading