Skip to content

Commit 33a7d1d

Browse files
author
Your Name
committed
feat(pipeline): add Hapi.js route extraction for object-literal patterns
Add cbm_extract_hapi_routes() that handles the Hapi.js route registration pattern: { method: 'GET', path: '/api/...', handler: ... }. Uses a mini-parser that finds method:/path: property pairs within the same object literal by tracking enclosing brace scope. Also extracts handler references. Wired into both the prescan (parallel) path in pass_parallel.c and the disk fallback path in pass_httplinks.c for both per-function and module-level source scanning. Tested: simcapture-cloud went from 0 to 1,665 routes (vs GitNexus 12). CBM now finds every route definition AND API call site, while GitNexus only found external service proxy routes.
1 parent 44f1720 commit 33a7d1d

File tree

4 files changed

+201
-0
lines changed

4 files changed

+201
-0
lines changed

src/pipeline/httplink.c

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,6 +1379,193 @@ int cbm_extract_express_routes(const char *name, const char *qn, const char *sou
13791379
return count;
13801380
}
13811381

1382+
/* ── Route extraction: Hapi.js ─────────────────────────────────── */
1383+
1384+
/* Extract a quoted string value after a colon, e.g. method: 'GET' → "GET".
1385+
* Returns the number of chars consumed from `src` (0 on failure). */
1386+
static int hapi_extract_string_value(const char *src, char *out, int outsz) {
1387+
const char *p = src;
1388+
/* Skip whitespace after colon */
1389+
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
1390+
char quote = *p;
1391+
if (quote != '\'' && quote != '"' && quote != '`') return 0;
1392+
p++;
1393+
const char *start = p;
1394+
while (*p && *p != quote) p++;
1395+
if (*p != quote) return 0;
1396+
int len = (int)(p - start);
1397+
if (len >= outsz) len = outsz - 1;
1398+
memcpy(out, start, (size_t)len);
1399+
out[len] = '\0';
1400+
return (int)(p + 1 - src);
1401+
}
1402+
1403+
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
1404+
int cbm_extract_hapi_routes(const char *name, const char *qn, const char *source,
1405+
cbm_route_handler_t *out, int max_out) {
1406+
if (!source || !*source) {
1407+
return 0;
1408+
}
1409+
1410+
int count = 0;
1411+
const char *p = source;
1412+
1413+
/* Scan for object literals containing method: and path: properties.
1414+
* Hapi pattern:
1415+
* { method: 'GET', path: '/api/users', handler: ... }
1416+
* or:
1417+
* { method: 'POST', path: '/api/users', handler: UsersController.create }
1418+
*
1419+
* We look for "method:" followed by a string value, then scan nearby for
1420+
* "path:" followed by a string value (or vice versa). */
1421+
while (*p && count < max_out) {
1422+
/* Find next "method:" or "method :" */
1423+
const char *mkey = strstr(p, "method");
1424+
if (!mkey) break;
1425+
1426+
/* Verify it looks like a property key (preceded by space/newline/comma/brace) */
1427+
if (mkey > source) {
1428+
char before = *(mkey - 1);
1429+
if (before != ' ' && before != '\t' && before != '\n' && before != '\r' &&
1430+
before != ',' && before != '{') {
1431+
p = mkey + 6;
1432+
continue;
1433+
}
1434+
}
1435+
1436+
const char *after_method = mkey + 6;
1437+
/* Skip optional whitespace and colon */
1438+
while (*after_method == ' ' || *after_method == '\t') after_method++;
1439+
if (*after_method != ':') {
1440+
p = after_method;
1441+
continue;
1442+
}
1443+
after_method++; /* skip ':' */
1444+
1445+
char method_val[16] = {0};
1446+
int consumed = hapi_extract_string_value(after_method, method_val, sizeof(method_val));
1447+
if (consumed == 0) {
1448+
p = after_method;
1449+
continue;
1450+
}
1451+
1452+
/* Uppercase the method */
1453+
for (int j = 0; method_val[j]; j++) {
1454+
method_val[j] = (char)toupper((unsigned char)method_val[j]);
1455+
}
1456+
1457+
/* Validate it's a real HTTP method */
1458+
if (strcmp(method_val, "GET") != 0 && strcmp(method_val, "POST") != 0 &&
1459+
strcmp(method_val, "PUT") != 0 && strcmp(method_val, "DELETE") != 0 &&
1460+
strcmp(method_val, "PATCH") != 0 && strcmp(method_val, "OPTIONS") != 0 &&
1461+
strcmp(method_val, "HEAD") != 0 && strcmp(method_val, "*") != 0) {
1462+
p = after_method + consumed;
1463+
continue;
1464+
}
1465+
1466+
/* Search for "path:" within the same object literal — look forward from the
1467+
* method: position. Both method: and path: are in the same {...} block,
1468+
* typically within 300 chars of each other. Also search a small window
1469+
* backward in case path: comes before method: in the object. */
1470+
const char *search_start = (mkey - 300 > source) ? mkey - 300 : source;
1471+
const char *search_end_limit = mkey + 500;
1472+
char path_val[256] = {0};
1473+
bool found_path = false;
1474+
1475+
/* Find the enclosing '{' to scope the search to this object literal */
1476+
const char *obj_start = mkey;
1477+
int brace_depth = 0;
1478+
while (obj_start > source) {
1479+
obj_start--;
1480+
if (*obj_start == '{') {
1481+
if (brace_depth == 0) break;
1482+
brace_depth--;
1483+
} else if (*obj_start == '}') {
1484+
brace_depth++;
1485+
}
1486+
}
1487+
if (*obj_start == '{') {
1488+
search_start = obj_start;
1489+
}
1490+
1491+
const char *pkey = search_start;
1492+
while ((pkey = strstr(pkey, "path")) != NULL && pkey < search_end_limit) {
1493+
/* Verify it looks like a property key */
1494+
if (pkey > source) {
1495+
char pb = *(pkey - 1);
1496+
if (pb != ' ' && pb != '\t' && pb != '\n' && pb != '\r' &&
1497+
pb != ',' && pb != '{') {
1498+
pkey += 4;
1499+
continue;
1500+
}
1501+
}
1502+
const char *after_path = pkey + 4;
1503+
while (*after_path == ' ' || *after_path == '\t') after_path++;
1504+
if (*after_path != ':') {
1505+
pkey += 4;
1506+
continue;
1507+
}
1508+
after_path++;
1509+
int pc = hapi_extract_string_value(after_path, path_val, sizeof(path_val));
1510+
if (pc > 0 && path_val[0] == '/') {
1511+
found_path = true;
1512+
break;
1513+
}
1514+
pkey += 4;
1515+
}
1516+
1517+
if (found_path) {
1518+
/* Optionally extract handler reference — scope to same object */
1519+
char handler_val[256] = {0};
1520+
const char *hkey = strstr(obj_start, "handler");
1521+
while (hkey && hkey < search_end_limit) {
1522+
/* Verify property key */
1523+
if (hkey > source) {
1524+
char hb = *(hkey - 1);
1525+
if (hb != ' ' && hb != '\t' && hb != '\n' && hb != '\r' &&
1526+
hb != ',' && hb != '{') {
1527+
hkey = strstr(hkey + 7, "handler");
1528+
continue;
1529+
}
1530+
}
1531+
const char *after_handler = hkey + 7;
1532+
while (*after_handler == ' ' || *after_handler == '\t') after_handler++;
1533+
if (*after_handler == ':') {
1534+
after_handler++;
1535+
while (*after_handler == ' ' || *after_handler == '\t') after_handler++;
1536+
/* Handler can be identifier.identifier or just identifier */
1537+
const char *hs = after_handler;
1538+
while (*after_handler && *after_handler != ',' && *after_handler != '\n' &&
1539+
*after_handler != '}' && *after_handler != ' ') {
1540+
after_handler++;
1541+
}
1542+
int hlen = (int)(after_handler - hs);
1543+
if (hlen > 0 && hlen < (int)sizeof(handler_val)) {
1544+
memcpy(handler_val, hs, (size_t)hlen);
1545+
handler_val[hlen] = '\0';
1546+
}
1547+
}
1548+
break;
1549+
}
1550+
1551+
cbm_route_handler_t *r = &out[count];
1552+
memset(r, 0, sizeof(*r));
1553+
strncpy(r->method, method_val, sizeof(r->method) - 1);
1554+
strncpy(r->path, path_val, sizeof(r->path) - 1);
1555+
strncpy(r->function_name, name ? name : "", sizeof(r->function_name) - 1);
1556+
strncpy(r->qualified_name, qn ? qn : "", sizeof(r->qualified_name) - 1);
1557+
if (handler_val[0]) {
1558+
strncpy(r->handler_ref, handler_val, sizeof(r->handler_ref) - 1);
1559+
}
1560+
count++;
1561+
}
1562+
1563+
p = after_method + consumed;
1564+
}
1565+
1566+
return count;
1567+
}
1568+
13821569
/* ── Route extraction: Laravel ─────────────────────────────────── */
13831570

13841571
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)

src/pipeline/httplink.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ int cbm_extract_ktor_routes(const char *name, const char *qn, const char *source
113113
int cbm_extract_express_routes(const char *name, const char *qn, const char *source,
114114
cbm_route_handler_t *out, int max_out);
115115

116+
/* Hapi.js object-literal routes: { method: 'GET', path: '/api/...', handler: ... } */
117+
int cbm_extract_hapi_routes(const char *name, const char *qn, const char *source,
118+
cbm_route_handler_t *out, int max_out);
119+
116120
/* Extract PHP Laravel routes from source.
117121
* Returns count. */
118122
int cbm_extract_laravel_routes(const char *name, const char *qn, const char *source,

src/pipeline/pass_httplinks.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,9 @@ static int discover_node_routes(const cbm_gbuf_node_t *n, const cbm_pipeline_ctx
277277
nr = cbm_extract_express_routes(n->name, n->qualified_name, source, out + total,
278278
max_out - total);
279279
total += nr;
280+
nr = cbm_extract_hapi_routes(n->name, n->qualified_name, source, out + total,
281+
max_out - total);
282+
total += nr;
280283
}
281284
if (has_suffix(fp, ".php")) {
282285
nr = cbm_extract_laravel_routes(n->name, n->qualified_name, source, out + total,
@@ -323,6 +326,8 @@ static int discover_module_routes(const cbm_gbuf_node_t *mod, const cbm_pipeline
323326
if (is_js) {
324327
total += cbm_extract_express_routes(mod->name, mod->qualified_name, source, out + total,
325328
max_out - total);
329+
total += cbm_extract_hapi_routes(mod->name, mod->qualified_name, source, out + total,
330+
max_out - total);
326331
}
327332
free(source);
328333
return total;

src/pipeline/pass_parallel.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ static void prescan_routes(const char *source, int source_len, const CBMFileResu
572572
nr = cbm_extract_express_routes(def->name, def->qualified_name, func_src,
573573
routes + total, 16 - total);
574574
total += nr;
575+
nr = cbm_extract_hapi_routes(def->name, def->qualified_name, func_src,
576+
routes + total, 16 - total);
577+
total += nr;
575578
nr = cbm_extract_laravel_routes(def->name, def->qualified_name, func_src,
576579
routes + total, 16 - total);
577580
total += nr;
@@ -608,6 +611,8 @@ static void prescan_routes(const char *source, int source_len, const CBMFileResu
608611
if (is_js) {
609612
total += cbm_extract_express_routes(basename, "", source, mod_routes + total,
610613
16 - total);
614+
total += cbm_extract_hapi_routes(basename, "", source, mod_routes + total,
615+
16 - total);
611616
}
612617
for (int r = 0; r < total; r++) {
613618
prescan_add_route(ps, &mod_routes[r]);

0 commit comments

Comments
 (0)