Skip to content

Commit afba50e

Browse files
committed
Generalized route registration + infra binding bridge
Route registration detection now works for unresolved local variables (app.include_router, app.mount) by matching callee name suffix when QN resolution fails. Scans ALL arguments for URL paths — handles prefix=, path=, route= keyword args across frameworks. New route_reg_suffixes: include_router, mount, add_url_rule, register_blueprint, use, register, add_route, add_api_route. Infrastructure bindings: YAML/JSON extraction captures topic→URL pairs from subscription configs (push_endpoint, uri). Creates INFRA_MAPS edges linking async topic Routes to endpoint URLs. Improved match_infra_routes: matches prefix Routes from include_router, supports service-only matching for generic "/" handlers, correct Cloud Run hostname parsing for new project-number format.
1 parent d4a05f2 commit afba50e

File tree

7 files changed

+503
-42
lines changed

7 files changed

+503
-42
lines changed

internal/cbm/cbm.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ void cbm_stringref_push(CBMStringRefArray *arr, CBMArena *a, CBMStringRef sr) {
129129
arr->items[arr->count++] = sr;
130130
}
131131

132+
void cbm_infrabinding_push(CBMInfraBindingArray *arr, CBMArena *a, CBMInfraBinding ib) {
133+
GROW_ARRAY(arr, a);
134+
arr->items[arr->count++] = ib;
135+
}
136+
132137
void cbm_impltrait_push(CBMImplTraitArray *arr, CBMArena *a, CBMImplTrait it) {
133138
GROW_ARRAY(arr, a);
134139
arr->items[arr->count++] = it;

internal/cbm/cbm.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,15 @@ typedef struct {
177177
CBMStringRefKind kind; // URL, CONFIG
178178
} CBMStringRef;
179179

180+
/* Infrastructure binding: topic/queue → endpoint URL.
181+
* Extracted from YAML/HCL/JSON subscription/scheduler configs.
182+
* Used by pass_route_nodes to connect async Route nodes to handler services. */
183+
typedef struct {
184+
const char *source_name; // topic, queue, or schedule name
185+
const char *target_url; // push_endpoint, uri, or http_target URL
186+
const char *broker; // "pubsub", "cloud_tasks", "cloud_scheduler", "sqs", "kafka"
187+
} CBMInfraBinding;
188+
180189
// Rust: impl Trait for Struct
181190
typedef struct {
182191
const char *trait_name; // trait name (raw text)
@@ -259,6 +268,12 @@ typedef struct {
259268
int cap;
260269
} CBMStringRefArray;
261270

271+
typedef struct {
272+
CBMInfraBinding *items;
273+
int count;
274+
int cap;
275+
} CBMInfraBindingArray;
276+
262277
typedef struct {
263278
CBMImplTrait *items;
264279
int count;
@@ -281,6 +296,7 @@ typedef struct {
281296
CBMImplTraitArray impl_traits; // Rust: impl Trait for Struct pairs
282297
CBMResolvedCallArray resolved_calls; // LSP-resolved calls (high confidence)
283298
CBMStringRefArray string_refs; // URL/config string literals from AST
299+
CBMInfraBindingArray infra_bindings; // topic→URL pairs from IaC configs
284300

285301
const char *module_qn; // module qualified name
286302
const char **exports; // NULL-terminated (NULL if none)
@@ -391,6 +407,7 @@ void cbm_typerefs_push(CBMTypeRefArray *arr, CBMArena *a, CBMTypeRef tr);
391407
void cbm_envaccess_push(CBMEnvAccessArray *arr, CBMArena *a, CBMEnvAccess ea);
392408
void cbm_typeassign_push(CBMTypeAssignArray *arr, CBMArena *a, CBMTypeAssign ta);
393409
void cbm_stringref_push(CBMStringRefArray *arr, CBMArena *a, CBMStringRef sr);
410+
void cbm_infrabinding_push(CBMInfraBindingArray *arr, CBMArena *a, CBMInfraBinding ib);
394411
void cbm_impltrait_push(CBMImplTraitArray *arr, CBMArena *a, CBMImplTrait it);
395412
void cbm_resolvedcall_push(CBMResolvedCallArray *arr, CBMArena *a, CBMResolvedCall rc);
396413

internal/cbm/extract_unified.c

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,171 @@ static void walk_yaml_mapping(CBMExtractCtx *ctx, TSNode node, const char *prefi
336336
}
337337
}
338338

339+
/* ── Infrastructure binding extraction ─────────────────────────────
340+
* Scan YAML/JSON/HCL list items for topic→URL pairs.
341+
* Patterns detected:
342+
* YAML: {topic: X, config: {push_endpoint: URL}} (Pub/Sub subscription)
343+
* YAML: {uri: URL, body: ...} (Cloud Scheduler)
344+
* YAML: {queue: X, uri: URL} (Cloud Tasks)
345+
* HCL: resource "google_pubsub_subscription" { topic=X, push_config{push_endpoint=URL} }
346+
*
347+
* Works by collecting key-value pairs in each mapping, then checking for
348+
* known source+target patterns. Language-agnostic: the key names are the signal. */
349+
350+
/* Source key names (topic/queue/schedule identifier) */
351+
static int is_source_key(const char *key) {
352+
return (strcmp(key, "topic") == 0 || strcmp(key, "queue") == 0 ||
353+
strcmp(key, "queue_name") == 0 || strcmp(key, "subscription") == 0 ||
354+
strcmp(key, "subject") == 0 || strcmp(key, "channel") == 0 ||
355+
strcmp(key, "stream") == 0);
356+
}
357+
358+
/* Target key names (endpoint URL) */
359+
static int is_target_key(const char *key) {
360+
return (strcmp(key, "push_endpoint") == 0 || strcmp(key, "uri") == 0 ||
361+
strcmp(key, "url") == 0 || strcmp(key, "endpoint") == 0 ||
362+
strcmp(key, "http_target") == 0 || strcmp(key, "target_url") == 0 ||
363+
strcmp(key, "webhook_url") == 0 || strcmp(key, "callback_url") == 0);
364+
}
365+
366+
/* Infer broker type from surrounding context */
367+
static const char *infer_broker(const char *file_path, const char *source_key) {
368+
if (strstr(file_path, "pubsub") || strstr(file_path, "pub-sub") ||
369+
strstr(file_path, "pub_sub")) {
370+
return "pubsub";
371+
}
372+
if (strstr(file_path, "scheduler") || strstr(file_path, "schedule") ||
373+
strstr(file_path, "cron")) {
374+
return "cloud_scheduler";
375+
}
376+
if (strstr(file_path, "task") || strcmp(source_key, "queue") == 0 ||
377+
strcmp(source_key, "queue_name") == 0) {
378+
return "cloud_tasks";
379+
}
380+
if (strstr(file_path, "kafka") || strcmp(source_key, "stream") == 0) {
381+
return "kafka";
382+
}
383+
if (strstr(file_path, "sqs") || strstr(file_path, "sns")) {
384+
return "sqs";
385+
}
386+
return "async";
387+
}
388+
389+
/* Scan a YAML mapping for source+target key pairs.
390+
* Collects all key-value pairs at this level and one level deep (for nested config:). */
391+
static void scan_mapping_for_bindings(CBMExtractCtx *ctx, TSNode mapping) {
392+
const char *sources[8] = {NULL};
393+
const char *source_keys[8] = {NULL};
394+
int n_sources = 0;
395+
const char *targets[8] = {NULL};
396+
int n_targets = 0;
397+
398+
uint32_t nc = ts_node_named_child_count(mapping);
399+
for (uint32_t i = 0; i < nc; i++) {
400+
TSNode pair = ts_node_named_child(mapping, i);
401+
if (strcmp(ts_node_type(pair), "block_mapping_pair") != 0) {
402+
continue;
403+
}
404+
TSNode key = ts_node_child_by_field_name(pair, "key", 3);
405+
TSNode val = ts_node_child_by_field_name(pair, "value", 5);
406+
if (ts_node_is_null(key) || ts_node_is_null(val)) {
407+
continue;
408+
}
409+
char *k = cbm_node_text(ctx->arena, key, ctx->source);
410+
if (!k) {
411+
continue;
412+
}
413+
414+
/* Check if this is a source or target key with a scalar value */
415+
const char *vtype = ts_node_type(val);
416+
if (strcmp(vtype, "block_node") != 0 && strcmp(vtype, "block_mapping") != 0) {
417+
char *v = cbm_node_text(ctx->arena, val, ctx->source);
418+
if (v && v[0]) {
419+
/* Strip quotes */
420+
int vlen = (int)strlen(v);
421+
if (vlen >= 2 && (v[0] == '"' || v[0] == '\'')) {
422+
v = cbm_arena_strndup(ctx->arena, v + 1, (size_t)(vlen - 2));
423+
}
424+
if (is_source_key(k) && n_sources < 8) {
425+
sources[n_sources] = v;
426+
source_keys[n_sources] = k;
427+
n_sources++;
428+
}
429+
if (is_target_key(k) && n_targets < 8 && v && strstr(v, "://")) {
430+
targets[n_targets++] = v;
431+
}
432+
}
433+
} else {
434+
/* Nested mapping (e.g., config: {push_endpoint: URL}) — scan one level */
435+
uint32_t vnc = ts_node_named_child_count(val);
436+
for (uint32_t vi = 0; vi < vnc; vi++) {
437+
TSNode vc = ts_node_named_child(val, vi);
438+
const char *vck = ts_node_type(vc);
439+
if (strcmp(vck, "block_mapping") == 0) {
440+
/* Scan nested mapping for target keys */
441+
uint32_t mnc = ts_node_named_child_count(vc);
442+
for (uint32_t mi = 0; mi < mnc; mi++) {
443+
TSNode mp = ts_node_named_child(vc, mi);
444+
if (strcmp(ts_node_type(mp), "block_mapping_pair") != 0) {
445+
continue;
446+
}
447+
TSNode mk = ts_node_child_by_field_name(mp, "key", 3);
448+
TSNode mv = ts_node_child_by_field_name(mp, "value", 5);
449+
if (ts_node_is_null(mk) || ts_node_is_null(mv)) {
450+
continue;
451+
}
452+
char *mktext = cbm_node_text(ctx->arena, mk, ctx->source);
453+
if (mktext && is_target_key(mktext) && n_targets < 8) {
454+
char *mvtext = cbm_node_text(ctx->arena, mv, ctx->source);
455+
if (mvtext && mvtext[0]) {
456+
int mvlen = (int)strlen(mvtext);
457+
if (mvlen >= 2 && (mvtext[0] == '"' || mvtext[0] == '\'')) {
458+
mvtext = cbm_arena_strndup(ctx->arena, mvtext + 1,
459+
(size_t)(mvlen - 2));
460+
}
461+
if (mvtext && strstr(mvtext, "://")) {
462+
targets[n_targets++] = mvtext;
463+
}
464+
}
465+
}
466+
}
467+
}
468+
}
469+
}
470+
}
471+
472+
/* Emit bindings for each source × target pair */
473+
for (int si = 0; si < n_sources; si++) {
474+
for (int ti = 0; ti < n_targets; ti++) {
475+
if (!sources[si] || !targets[ti]) {
476+
continue;
477+
}
478+
CBMInfraBinding ib = {
479+
.source_name = sources[si],
480+
.target_url = targets[ti],
481+
.broker = infer_broker(ctx->rel_path, source_keys[si]),
482+
};
483+
cbm_infrabinding_push(&ctx->result->infra_bindings, ctx->arena, ib);
484+
}
485+
}
486+
}
487+
488+
/* Walk a YAML block_sequence looking for list items with infra bindings */
489+
static void scan_yaml_for_infra_bindings(CBMExtractCtx *ctx, TSNode node) {
490+
const char *kind = ts_node_type(node);
491+
492+
/* List items are block_sequence → block_sequence_item → block_mapping */
493+
if (strcmp(kind, "block_mapping") == 0) {
494+
scan_mapping_for_bindings(ctx, node);
495+
}
496+
497+
/* Recurse into children */
498+
uint32_t nc = ts_node_named_child_count(node);
499+
for (uint32_t i = 0; i < nc; i++) {
500+
scan_yaml_for_infra_bindings(ctx, ts_node_named_child(node, i));
501+
}
502+
}
503+
339504
/* Handle YAML files: walk top-level block_mapping recursively */
340505
static void handle_yaml_nested(CBMExtractCtx *ctx, TSNode node) {
341506
if (ctx->language != CBM_LANG_YAML) {
@@ -393,6 +558,15 @@ void cbm_extract_unified(CBMExtractCtx *ctx) {
393558
handle_string_refs(ctx, node, &state);
394559
handle_yaml_nested(ctx, node);
395560

561+
/* Scan YAML/JSON for infra bindings (topic→URL pairs) */
562+
if (ctx->language == CBM_LANG_YAML || ctx->language == CBM_LANG_JSON) {
563+
const char *nk = ts_node_type(node);
564+
if (strcmp(nk, "block_sequence") == 0 || strcmp(nk, "block_mapping") == 0 ||
565+
strcmp(nk, "array") == 0 || strcmp(nk, "document") == 0) {
566+
scan_yaml_for_infra_bindings(ctx, node);
567+
}
568+
}
569+
396570
// 4. Push scope markers for boundary nodes
397571
if (spec->function_node_types && cbm_kind_in_set(node, spec->function_node_types)) {
398572
const char *fqn = compute_func_qn(ctx, node, spec, &state);

internal/cbm/service_patterns.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,16 @@ static const method_suffix_t route_reg_suffixes[] = {
393393
{".MapPost", "POST"},
394394
{".MapPut", "PUT"},
395395
{".MapDelete", "DELETE"},
396+
/* Router mounting / prefix registration (any method) */
397+
{".include_router", "ANY"},
398+
{".mount", "ANY"},
399+
{".add_url_rule", "ANY"},
400+
{".register_blueprint", "ANY"},
401+
{".use", "ANY"},
402+
{".register", "ANY"},
403+
{".add_route", "ANY"},
404+
{".add_api_route", "ANY"},
405+
{".add_api_websocket_route", "ANY"},
396406
{NULL, NULL},
397407
};
398408

0 commit comments

Comments
 (0)