Skip to content

Commit 178bea2

Browse files
authored
fix(cbm): convert 7 O(N²) import extractors to O(N) TSTreeCursor iteration (#131)
Fixes #130
1 parent c00723e commit 178bea2

File tree

2 files changed

+168
-28
lines changed

2 files changed

+168
-28
lines changed

internal/cbm/extract_imports.c

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,13 @@ static const char *path_last(CBMArena *a, const char *path) {
5555
static void parse_go_imports(CBMExtractCtx *ctx) {
5656
CBMArena *a = ctx->arena;
5757

58-
uint32_t root_count = ts_node_child_count(ctx->root);
59-
for (uint32_t i = 0; i < root_count; i++) {
60-
TSNode decl = ts_node_child(ctx->root, i);
58+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
59+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
60+
ts_tree_cursor_delete(&cursor);
61+
return;
62+
}
63+
do {
64+
TSNode decl = ts_tree_cursor_current_node(&cursor);
6165
if (strcmp(ts_node_type(decl), "import_declaration") != 0) {
6266
continue;
6367
}
@@ -121,7 +125,8 @@ static void parse_go_imports(CBMExtractCtx *ctx) {
121125
}
122126
}
123127
}
124-
}
128+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
129+
ts_tree_cursor_delete(&cursor);
125130
}
126131

127132
// --- Python imports ---
@@ -131,9 +136,13 @@ static void parse_go_imports(CBMExtractCtx *ctx) {
131136
static void parse_python_imports(CBMExtractCtx *ctx) {
132137
CBMArena *a = ctx->arena;
133138

134-
uint32_t count = ts_node_child_count(ctx->root);
135-
for (uint32_t i = 0; i < count; i++) {
136-
TSNode node = ts_node_child(ctx->root, i);
139+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
140+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
141+
ts_tree_cursor_delete(&cursor);
142+
return;
143+
}
144+
do {
145+
TSNode node = ts_tree_cursor_current_node(&cursor);
137146
const char *kind = ts_node_type(node);
138147

139148
if (strcmp(kind, "import_statement") == 0) {
@@ -227,7 +236,8 @@ static void parse_python_imports(CBMExtractCtx *ctx) {
227236
}
228237
}
229238
}
230-
}
239+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
240+
ts_tree_cursor_delete(&cursor);
231241
}
232242

233243
// --- ES module imports (JS/TS/TSX) ---
@@ -347,9 +357,13 @@ static void parse_es_imports(CBMExtractCtx *ctx) {
347357
static void parse_java_imports(CBMExtractCtx *ctx) {
348358
CBMArena *a = ctx->arena;
349359

350-
uint32_t count = ts_node_child_count(ctx->root);
351-
for (uint32_t i = 0; i < count; i++) {
352-
TSNode node = ts_node_child(ctx->root, i);
360+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
361+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
362+
ts_tree_cursor_delete(&cursor);
363+
return;
364+
}
365+
do {
366+
TSNode node = ts_tree_cursor_current_node(&cursor);
353367
if (strcmp(ts_node_type(node), "import_declaration") != 0) {
354368
continue;
355369
}
@@ -368,7 +382,8 @@ static void parse_java_imports(CBMExtractCtx *ctx) {
368382
break;
369383
}
370384
}
371-
}
385+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
386+
ts_tree_cursor_delete(&cursor);
372387
}
373388

374389
// --- Rust imports ---
@@ -377,9 +392,13 @@ static void parse_java_imports(CBMExtractCtx *ctx) {
377392
static void parse_rust_imports(CBMExtractCtx *ctx) {
378393
CBMArena *a = ctx->arena;
379394

380-
uint32_t count = ts_node_child_count(ctx->root);
381-
for (uint32_t i = 0; i < count; i++) {
382-
TSNode node = ts_node_child(ctx->root, i);
395+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
396+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
397+
ts_tree_cursor_delete(&cursor);
398+
return;
399+
}
400+
do {
401+
TSNode node = ts_tree_cursor_current_node(&cursor);
383402
if (strcmp(ts_node_type(node), "use_declaration") != 0) {
384403
continue;
385404
}
@@ -399,7 +418,8 @@ static void parse_rust_imports(CBMExtractCtx *ctx) {
399418

400419
CBMImport imp = {.local_name = path_last(a, full), .module_path = full};
401420
cbm_imports_push(&ctx->result->imports, a, imp);
402-
}
421+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
422+
ts_tree_cursor_delete(&cursor);
403423
}
404424

405425
// --- C/C++ imports ---
@@ -408,9 +428,13 @@ static void parse_rust_imports(CBMExtractCtx *ctx) {
408428
static void parse_c_imports(CBMExtractCtx *ctx) {
409429
CBMArena *a = ctx->arena;
410430

411-
uint32_t count = ts_node_child_count(ctx->root);
412-
for (uint32_t i = 0; i < count; i++) {
413-
TSNode node = ts_node_child(ctx->root, i);
431+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
432+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
433+
ts_tree_cursor_delete(&cursor);
434+
return;
435+
}
436+
do {
437+
TSNode node = ts_tree_cursor_current_node(&cursor);
414438
const char *kind = ts_node_type(node);
415439
if (strcmp(kind, "preproc_include") != 0 && strcmp(kind, "preproc_import") != 0) {
416440
continue;
@@ -447,7 +471,8 @@ static void parse_c_imports(CBMExtractCtx *ctx) {
447471

448472
CBMImport imp = {.local_name = path_last(a, path), .module_path = path};
449473
cbm_imports_push(&ctx->result->imports, a, imp);
450-
}
474+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
475+
ts_tree_cursor_delete(&cursor);
451476
}
452477

453478
// --- Ruby imports ---
@@ -458,9 +483,13 @@ static void parse_ruby_imports(CBMExtractCtx *ctx) {
458483

459484
// Walk for call nodes with "require" or "require_relative"
460485
// Simple: walk top-level children
461-
uint32_t count = ts_node_child_count(ctx->root);
462-
for (uint32_t i = 0; i < count; i++) {
463-
TSNode node = ts_node_child(ctx->root, i);
486+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
487+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
488+
ts_tree_cursor_delete(&cursor);
489+
return;
490+
}
491+
do {
492+
TSNode node = ts_tree_cursor_current_node(&cursor);
464493
const char *kind = ts_node_type(node);
465494
if (strcmp(kind, "call") != 0 && strcmp(kind, "command_call") != 0) {
466495
continue;
@@ -511,7 +540,8 @@ static void parse_ruby_imports(CBMExtractCtx *ctx) {
511540

512541
CBMImport imp = {.local_name = path_last(a, arg_text), .module_path = arg_text};
513542
cbm_imports_push(&ctx->result->imports, a, imp);
514-
}
543+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
544+
ts_tree_cursor_delete(&cursor);
515545
}
516546

517547
// --- Lua imports ---
@@ -520,9 +550,13 @@ static void parse_ruby_imports(CBMExtractCtx *ctx) {
520550
static void parse_lua_imports(CBMExtractCtx *ctx) {
521551
CBMArena *a = ctx->arena;
522552

523-
uint32_t count = ts_node_child_count(ctx->root);
524-
for (uint32_t i = 0; i < count; i++) {
525-
TSNode node = ts_node_child(ctx->root, i);
553+
TSTreeCursor cursor = ts_tree_cursor_new(ctx->root);
554+
if (!ts_tree_cursor_goto_first_child(&cursor)) {
555+
ts_tree_cursor_delete(&cursor);
556+
return;
557+
}
558+
do {
559+
TSNode node = ts_tree_cursor_current_node(&cursor);
526560
// Lua: local X = require("Y") → assignment_statement or variable_declaration
527561
// containing function_call(require, "Y")
528562
char *text = cbm_node_text(a, node, ctx->source);
@@ -567,7 +601,8 @@ static void parse_lua_imports(CBMExtractCtx *ctx) {
567601
char *mod = cbm_arena_strndup(a, start, (size_t)(end - start));
568602
CBMImport imp = {.local_name = path_last(a, mod), .module_path = mod};
569603
cbm_imports_push(&ctx->result->imports, a, imp);
570-
}
604+
} while (ts_tree_cursor_goto_next_sibling(&cursor));
605+
ts_tree_cursor_delete(&cursor);
571606
}
572607

573608
// --- Generic import parsing for languages with simple import_declaration ---

tests/test_extraction.c

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,104 @@ TEST(js_imports) {
16711671
PASS();
16721672
}
16731673

1674+
TEST(go_imports) {
1675+
CBMFileResult *r =
1676+
extract("package main\n\nimport \"fmt\"\nimport (\n \"os\"\n net \"net/http\"\n)\n",
1677+
CBM_LANG_GO, "t", "main.go");
1678+
ASSERT_NOT_NULL(r);
1679+
ASSERT_FALSE(r->has_error);
1680+
ASSERT_GT(r->imports.count, 0);
1681+
ASSERT(has_import(r, "fmt"));
1682+
cbm_free_result(r);
1683+
PASS();
1684+
}
1685+
1686+
TEST(java_imports) {
1687+
CBMFileResult *r =
1688+
extract("import java.util.List;\nimport java.util.ArrayList;\nimport static java.lang.Math.PI;\n"
1689+
"public class Foo {}\n",
1690+
CBM_LANG_JAVA, "t", "Foo.java");
1691+
ASSERT_NOT_NULL(r);
1692+
ASSERT_FALSE(r->has_error);
1693+
ASSERT_GT(r->imports.count, 0);
1694+
ASSERT(has_import(r, "java.util.List"));
1695+
cbm_free_result(r);
1696+
PASS();
1697+
}
1698+
1699+
TEST(rust_imports) {
1700+
CBMFileResult *r =
1701+
extract("use std::collections::HashMap;\nuse std::io::{self, Write};\nuse serde::Serialize;\n"
1702+
"fn main() {}\n",
1703+
CBM_LANG_RUST, "t", "main.rs");
1704+
ASSERT_NOT_NULL(r);
1705+
ASSERT_FALSE(r->has_error);
1706+
ASSERT_GT(r->imports.count, 0);
1707+
ASSERT(has_import(r, "std::collections::HashMap"));
1708+
cbm_free_result(r);
1709+
PASS();
1710+
}
1711+
1712+
TEST(c_imports) {
1713+
CBMFileResult *r =
1714+
extract("#include <stdio.h>\n#include <stdlib.h>\n#include \"mylib.h\"\n\nint main() { return 0; }\n",
1715+
CBM_LANG_C, "t", "main.c");
1716+
ASSERT_NOT_NULL(r);
1717+
ASSERT_FALSE(r->has_error);
1718+
ASSERT_GT(r->imports.count, 0);
1719+
ASSERT(has_import(r, "stdio.h"));
1720+
cbm_free_result(r);
1721+
PASS();
1722+
}
1723+
1724+
TEST(ruby_imports) {
1725+
CBMFileResult *r =
1726+
extract("require 'json'\nrequire 'net/http'\nrequire_relative 'helpers'\n\nclass Foo; end\n",
1727+
CBM_LANG_RUBY, "t", "app.rb");
1728+
ASSERT_NOT_NULL(r);
1729+
ASSERT_FALSE(r->has_error);
1730+
ASSERT_GT(r->imports.count, 0);
1731+
ASSERT(has_import(r, "json"));
1732+
cbm_free_result(r);
1733+
PASS();
1734+
}
1735+
1736+
TEST(lua_imports) {
1737+
CBMFileResult *r =
1738+
extract("local json = require(\"dkjson\")\nlocal http = require(\"socket.http\")\n\nlocal function greet() end\n",
1739+
CBM_LANG_LUA, "t", "main.lua");
1740+
ASSERT_NOT_NULL(r);
1741+
ASSERT_FALSE(r->has_error);
1742+
ASSERT_GT(r->imports.count, 0);
1743+
ASSERT(has_import(r, "dkjson"));
1744+
cbm_free_result(r);
1745+
PASS();
1746+
}
1747+
1748+
TEST(import_stress_go) {
1749+
/* Stress test: 5,000 single-line Go imports.
1750+
* Verifies O(N) behaviour — would hang indefinitely with the O(N²) loop. */
1751+
const int N = 5000;
1752+
/* Each line: import "pkg/NNNNN"\n = ~20 chars; total ~100KB */
1753+
int buf_size = N * 24 + 64;
1754+
char *src = malloc((size_t)buf_size);
1755+
ASSERT_NOT_NULL(src);
1756+
1757+
int pos = 0;
1758+
pos += snprintf(src + pos, (size_t)(buf_size - pos), "package stress\n");
1759+
for (int k = 0; k < N; k++) {
1760+
pos += snprintf(src + pos, (size_t)(buf_size - pos), "import \"pkg/%05d\"\n", k);
1761+
}
1762+
1763+
CBMFileResult *r = extract(src, CBM_LANG_GO, "t", "stress.go");
1764+
free(src);
1765+
ASSERT_NOT_NULL(r);
1766+
ASSERT_FALSE(r->has_error);
1767+
ASSERT_EQ(r->imports.count, N);
1768+
cbm_free_result(r);
1769+
PASS();
1770+
}
1771+
16741772
/* ═══════════════════════════════════════════════════════════════════
16751773
* config_extraction_test.go ports (25 tests)
16761774
* ═══════════════════════════════════════════════════════════════════ */
@@ -2192,6 +2290,13 @@ SUITE(extraction) {
21922290
RUN_TEST(go_calls);
21932291
RUN_TEST(python_imports);
21942292
RUN_TEST(js_imports);
2293+
RUN_TEST(go_imports);
2294+
RUN_TEST(java_imports);
2295+
RUN_TEST(rust_imports);
2296+
RUN_TEST(c_imports);
2297+
RUN_TEST(ruby_imports);
2298+
RUN_TEST(lua_imports);
2299+
RUN_TEST(import_stress_go);
21952300

21962301
/* config_extraction_test.go ports */
21972302
RUN_TEST(toml_basic_table_and_pair);

0 commit comments

Comments
 (0)