@@ -336,6 +336,171 @@ static void walk_yaml_mapping(CBMExtractCtx *ctx, TSNode node, const char *prefi
336336 }
337337}
338338
339+ /* ── Infrastructure binding extraction ─────────────────────────────
340+ * Scan YAML/JSON/HCL list items for topic→URL pairs.
341+ * Patterns detected:
342+ * YAML: {topic: X, config: {push_endpoint: URL}} (Pub/Sub subscription)
343+ * YAML: {uri: URL, body: ...} (Cloud Scheduler)
344+ * YAML: {queue: X, uri: URL} (Cloud Tasks)
345+ * HCL: resource "google_pubsub_subscription" { topic=X, push_config{push_endpoint=URL} }
346+ *
347+ * Works by collecting key-value pairs in each mapping, then checking for
348+ * known source+target patterns. Language-agnostic: the key names are the signal. */
349+
350+ /* Source key names (topic/queue/schedule identifier) */
351+ static int is_source_key (const char * key ) {
352+ return (strcmp (key , "topic" ) == 0 || strcmp (key , "queue" ) == 0 ||
353+ strcmp (key , "queue_name" ) == 0 || strcmp (key , "subscription" ) == 0 ||
354+ strcmp (key , "subject" ) == 0 || strcmp (key , "channel" ) == 0 ||
355+ strcmp (key , "stream" ) == 0 );
356+ }
357+
358+ /* Target key names (endpoint URL) */
359+ static int is_target_key (const char * key ) {
360+ return (strcmp (key , "push_endpoint" ) == 0 || strcmp (key , "uri" ) == 0 ||
361+ strcmp (key , "url" ) == 0 || strcmp (key , "endpoint" ) == 0 ||
362+ strcmp (key , "http_target" ) == 0 || strcmp (key , "target_url" ) == 0 ||
363+ strcmp (key , "webhook_url" ) == 0 || strcmp (key , "callback_url" ) == 0 );
364+ }
365+
366+ /* Infer broker type from surrounding context */
367+ static const char * infer_broker (const char * file_path , const char * source_key ) {
368+ if (strstr (file_path , "pubsub" ) || strstr (file_path , "pub-sub" ) ||
369+ strstr (file_path , "pub_sub" )) {
370+ return "pubsub" ;
371+ }
372+ if (strstr (file_path , "scheduler" ) || strstr (file_path , "schedule" ) ||
373+ strstr (file_path , "cron" )) {
374+ return "cloud_scheduler" ;
375+ }
376+ if (strstr (file_path , "task" ) || strcmp (source_key , "queue" ) == 0 ||
377+ strcmp (source_key , "queue_name" ) == 0 ) {
378+ return "cloud_tasks" ;
379+ }
380+ if (strstr (file_path , "kafka" ) || strcmp (source_key , "stream" ) == 0 ) {
381+ return "kafka" ;
382+ }
383+ if (strstr (file_path , "sqs" ) || strstr (file_path , "sns" )) {
384+ return "sqs" ;
385+ }
386+ return "async" ;
387+ }
388+
389+ /* Scan a YAML mapping for source+target key pairs.
390+ * Collects all key-value pairs at this level and one level deep (for nested config:). */
391+ static void scan_mapping_for_bindings (CBMExtractCtx * ctx , TSNode mapping ) {
392+ const char * sources [8 ] = {NULL };
393+ const char * source_keys [8 ] = {NULL };
394+ int n_sources = 0 ;
395+ const char * targets [8 ] = {NULL };
396+ int n_targets = 0 ;
397+
398+ uint32_t nc = ts_node_named_child_count (mapping );
399+ for (uint32_t i = 0 ; i < nc ; i ++ ) {
400+ TSNode pair = ts_node_named_child (mapping , i );
401+ if (strcmp (ts_node_type (pair ), "block_mapping_pair" ) != 0 ) {
402+ continue ;
403+ }
404+ TSNode key = ts_node_child_by_field_name (pair , "key" , 3 );
405+ TSNode val = ts_node_child_by_field_name (pair , "value" , 5 );
406+ if (ts_node_is_null (key ) || ts_node_is_null (val )) {
407+ continue ;
408+ }
409+ char * k = cbm_node_text (ctx -> arena , key , ctx -> source );
410+ if (!k ) {
411+ continue ;
412+ }
413+
414+ /* Check if this is a source or target key with a scalar value */
415+ const char * vtype = ts_node_type (val );
416+ if (strcmp (vtype , "block_node" ) != 0 && strcmp (vtype , "block_mapping" ) != 0 ) {
417+ char * v = cbm_node_text (ctx -> arena , val , ctx -> source );
418+ if (v && v [0 ]) {
419+ /* Strip quotes */
420+ int vlen = (int )strlen (v );
421+ if (vlen >= 2 && (v [0 ] == '"' || v [0 ] == '\'' )) {
422+ v = cbm_arena_strndup (ctx -> arena , v + 1 , (size_t )(vlen - 2 ));
423+ }
424+ if (is_source_key (k ) && n_sources < 8 ) {
425+ sources [n_sources ] = v ;
426+ source_keys [n_sources ] = k ;
427+ n_sources ++ ;
428+ }
429+ if (is_target_key (k ) && n_targets < 8 && v && strstr (v , "://" )) {
430+ targets [n_targets ++ ] = v ;
431+ }
432+ }
433+ } else {
434+ /* Nested mapping (e.g., config: {push_endpoint: URL}) — scan one level */
435+ uint32_t vnc = ts_node_named_child_count (val );
436+ for (uint32_t vi = 0 ; vi < vnc ; vi ++ ) {
437+ TSNode vc = ts_node_named_child (val , vi );
438+ const char * vck = ts_node_type (vc );
439+ if (strcmp (vck , "block_mapping" ) == 0 ) {
440+ /* Scan nested mapping for target keys */
441+ uint32_t mnc = ts_node_named_child_count (vc );
442+ for (uint32_t mi = 0 ; mi < mnc ; mi ++ ) {
443+ TSNode mp = ts_node_named_child (vc , mi );
444+ if (strcmp (ts_node_type (mp ), "block_mapping_pair" ) != 0 ) {
445+ continue ;
446+ }
447+ TSNode mk = ts_node_child_by_field_name (mp , "key" , 3 );
448+ TSNode mv = ts_node_child_by_field_name (mp , "value" , 5 );
449+ if (ts_node_is_null (mk ) || ts_node_is_null (mv )) {
450+ continue ;
451+ }
452+ char * mktext = cbm_node_text (ctx -> arena , mk , ctx -> source );
453+ if (mktext && is_target_key (mktext ) && n_targets < 8 ) {
454+ char * mvtext = cbm_node_text (ctx -> arena , mv , ctx -> source );
455+ if (mvtext && mvtext [0 ]) {
456+ int mvlen = (int )strlen (mvtext );
457+ if (mvlen >= 2 && (mvtext [0 ] == '"' || mvtext [0 ] == '\'' )) {
458+ mvtext = cbm_arena_strndup (ctx -> arena , mvtext + 1 ,
459+ (size_t )(mvlen - 2 ));
460+ }
461+ if (mvtext && strstr (mvtext , "://" )) {
462+ targets [n_targets ++ ] = mvtext ;
463+ }
464+ }
465+ }
466+ }
467+ }
468+ }
469+ }
470+ }
471+
472+ /* Emit bindings for each source × target pair */
473+ for (int si = 0 ; si < n_sources ; si ++ ) {
474+ for (int ti = 0 ; ti < n_targets ; ti ++ ) {
475+ if (!sources [si ] || !targets [ti ]) {
476+ continue ;
477+ }
478+ CBMInfraBinding ib = {
479+ .source_name = sources [si ],
480+ .target_url = targets [ti ],
481+ .broker = infer_broker (ctx -> rel_path , source_keys [si ]),
482+ };
483+ cbm_infrabinding_push (& ctx -> result -> infra_bindings , ctx -> arena , ib );
484+ }
485+ }
486+ }
487+
488+ /* Walk a YAML block_sequence looking for list items with infra bindings */
489+ static void scan_yaml_for_infra_bindings (CBMExtractCtx * ctx , TSNode node ) {
490+ const char * kind = ts_node_type (node );
491+
492+ /* List items are block_sequence → block_sequence_item → block_mapping */
493+ if (strcmp (kind , "block_mapping" ) == 0 ) {
494+ scan_mapping_for_bindings (ctx , node );
495+ }
496+
497+ /* Recurse into children */
498+ uint32_t nc = ts_node_named_child_count (node );
499+ for (uint32_t i = 0 ; i < nc ; i ++ ) {
500+ scan_yaml_for_infra_bindings (ctx , ts_node_named_child (node , i ));
501+ }
502+ }
503+
339504/* Handle YAML files: walk top-level block_mapping recursively */
340505static void handle_yaml_nested (CBMExtractCtx * ctx , TSNode node ) {
341506 if (ctx -> language != CBM_LANG_YAML ) {
@@ -393,6 +558,15 @@ void cbm_extract_unified(CBMExtractCtx *ctx) {
393558 handle_string_refs (ctx , node , & state );
394559 handle_yaml_nested (ctx , node );
395560
561+ /* Scan YAML/JSON for infra bindings (topic→URL pairs) */
562+ if (ctx -> language == CBM_LANG_YAML || ctx -> language == CBM_LANG_JSON ) {
563+ const char * nk = ts_node_type (node );
564+ if (strcmp (nk , "block_sequence" ) == 0 || strcmp (nk , "block_mapping" ) == 0 ||
565+ strcmp (nk , "array" ) == 0 || strcmp (nk , "document" ) == 0 ) {
566+ scan_yaml_for_infra_bindings (ctx , node );
567+ }
568+ }
569+
396570 // 4. Push scope markers for boundary nodes
397571 if (spec -> function_node_types && cbm_kind_in_set (node , spec -> function_node_types )) {
398572 const char * fqn = compute_func_qn (ctx , node , spec , & state );
0 commit comments