Skip to content

Commit d4a05f2

Browse files
committed
Enrich DATA_FLOWS with caller args + handler params mapping
DATA_FLOWS edges now carry actual value propagation data: - caller_args: argument expressions from the HTTP/ASYNC call - handler_params: parameter names from the handler function - edge_type: HTTP_CALLS or ASYNC_CALLS (how the data crosses) - via/route: the Route node connecting caller to handler Skip creating DATA_FLOWS when a direct CALLS edge already exists between caller and handler (avoids redundancy).
1 parent 85c6c6f commit d4a05f2

File tree

1 file changed

+140
-33
lines changed

1 file changed

+140
-33
lines changed

src/pipeline/pass_route_nodes.c

Lines changed: 140 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,60 @@ static void match_infra_routes(cbm_gbuf_t *gb) {
245245
/* Phase 3: Create DATA_FLOWS edges by linking callers through Route to handlers.
246246
* For each HTTP_CALLS/ASYNC_CALLS edge (caller → Route), find the HANDLES edge
247247
* (handler → Route) and create DATA_FLOWS (caller → handler) with route context. */
248+
/* Check if a direct CALLS edge already exists between two nodes */
249+
static int has_direct_call(const cbm_gbuf_t *gb, int64_t source, int64_t target) {
250+
const cbm_gbuf_edge_t **edges = NULL;
251+
int count = 0;
252+
cbm_gbuf_find_edges_by_source_type(gb, source, "CALLS", &edges, &count);
253+
for (int i = 0; i < count; i++) {
254+
if (edges[i]->target_id == target) {
255+
return 1;
256+
}
257+
}
258+
return 0;
259+
}
260+
261+
/* Extract param_names from a node's properties_json.
262+
* Returns a comma-separated string in buf, or empty string. */
263+
static void extract_param_names(const cbm_gbuf_node_t *node, char *buf, int bufsize) {
264+
buf[0] = '\0';
265+
if (!node || !node->properties_json) {
266+
return;
267+
}
268+
const char *p = strstr(node->properties_json, "\"param_names\":");
269+
if (!p) {
270+
return;
271+
}
272+
p = strchr(p, '[');
273+
if (!p) {
274+
return;
275+
}
276+
p++; /* skip '[' */
277+
const char *end = strchr(p, ']');
278+
if (!end || end <= p) {
279+
return;
280+
}
281+
int len = (int)(end - p);
282+
if (len >= bufsize) {
283+
len = bufsize - 1;
284+
}
285+
memcpy(buf, p, (size_t)len);
286+
buf[len] = '\0';
287+
}
288+
289+
/* Extract the "args" JSON fragment from an edge's properties.
290+
* Returns pointer into the properties string (not copied). */
291+
static const char *find_args_in_props(const char *props) {
292+
if (!props) {
293+
return NULL;
294+
}
295+
const char *p = strstr(props, "\"args\":[");
296+
if (!p) {
297+
return NULL;
298+
}
299+
return p + 7; /* skip "args":[ , points to first { or ] */
300+
}
301+
248302
static void create_data_flows(cbm_gbuf_t *gb) {
249303
const cbm_gbuf_node_t **routes = NULL;
250304
int route_count = 0;
@@ -253,66 +307,119 @@ static void create_data_flows(cbm_gbuf_t *gb) {
253307
}
254308

255309
int flows = 0;
310+
int skipped = 0;
256311

257-
/* For each Route node, find callers (HTTP_CALLS/ASYNC_CALLS → Route)
258-
* and handlers (HANDLES → Route), then create DATA_FLOWS links. */
259312
for (int ri = 0; ri < route_count; ri++) {
260313
const cbm_gbuf_node_t *route = routes[ri];
261314

262-
/* Find HTTP_CALLS → Route */
315+
/* Collect caller edges (HTTP_CALLS + ASYNC_CALLS → Route) */
263316
const cbm_gbuf_edge_t **http_edges = NULL;
264317
int http_count = 0;
265318
cbm_gbuf_find_edges_by_target_type(gb, route->id, "HTTP_CALLS", &http_edges, &http_count);
266319

267-
/* Find ASYNC_CALLS → Route */
268320
const cbm_gbuf_edge_t **async_edges = NULL;
269321
int async_count = 0;
270322
cbm_gbuf_find_edges_by_target_type(gb, route->id, "ASYNC_CALLS", &async_edges,
271323
&async_count);
272324

273-
/* Find HANDLES → Route */
274-
const cbm_gbuf_edge_t **handles_edges = NULL;
275-
int handles_count = 0;
276-
cbm_gbuf_find_edges_by_target_type(gb, route->id, "HANDLES", &handles_edges,
277-
&handles_count);
278-
279-
/* Collect caller IDs */
280-
int64_t callers[64];
325+
/* Collect caller edge references (need properties for arg mapping) */
326+
struct {
327+
int64_t source_id;
328+
const char *props;
329+
const char *edge_type;
330+
} caller_edges[64];
281331
int n_callers = 0;
282332
for (int ei = 0; ei < http_count && n_callers < 64; ei++) {
283-
callers[n_callers++] = http_edges[ei]->source_id;
333+
caller_edges[n_callers].source_id = http_edges[ei]->source_id;
334+
caller_edges[n_callers].props = http_edges[ei]->properties_json;
335+
caller_edges[n_callers].edge_type = "HTTP_CALLS";
336+
n_callers++;
284337
}
285338
for (int ei = 0; ei < async_count && n_callers < 64; ei++) {
286-
callers[n_callers++] = async_edges[ei]->source_id;
339+
caller_edges[n_callers].source_id = async_edges[ei]->source_id;
340+
caller_edges[n_callers].props = async_edges[ei]->properties_json;
341+
caller_edges[n_callers].edge_type = "ASYNC_CALLS";
342+
n_callers++;
287343
}
288344

289-
/* Collect handler IDs */
290-
int64_t handlers[16];
291-
int n_handlers = 0;
292-
for (int ei = 0; ei < handles_count && n_handlers < 16; ei++) {
293-
handlers[n_handlers++] = handles_edges[ei]->source_id;
294-
}
345+
/* Collect handler nodes (HANDLES → Route) */
346+
const cbm_gbuf_edge_t **handles_edges = NULL;
347+
int handles_count = 0;
348+
cbm_gbuf_find_edges_by_target_type(gb, route->id, "HANDLES", &handles_edges,
349+
&handles_count);
295350

296-
/* Create DATA_FLOWS: each caller → each handler through this Route */
297351
for (int ci = 0; ci < n_callers; ci++) {
298-
for (int hi = 0; hi < n_handlers; hi++) {
299-
if (callers[ci] == handlers[hi]) {
300-
continue; /* skip self-links */
352+
for (int hi = 0; hi < handles_count; hi++) {
353+
int64_t caller_id = caller_edges[ci].source_id;
354+
int64_t handler_id = handles_edges[hi]->source_id;
355+
356+
if (caller_id == handler_id) {
357+
continue;
301358
}
302-
char props[512];
303-
snprintf(props, sizeof(props), "{\"via_route\":\"%s\",\"route_qn\":\"%s\"}",
304-
route->name ? route->name : "",
305-
route->qualified_name ? route->qualified_name : "");
306-
cbm_gbuf_insert_edge(gb, callers[ci], handlers[hi], "DATA_FLOWS", props);
359+
360+
/* Skip if direct CALLS edge already exists */
361+
if (has_direct_call(gb, caller_id, handler_id)) {
362+
skipped++;
363+
continue;
364+
}
365+
366+
/* Build value mapping: caller args → handler params */
367+
const char *args_json = find_args_in_props(caller_edges[ci].props);
368+
369+
const cbm_gbuf_node_t *handler_node = cbm_gbuf_find_by_id(gb, handler_id);
370+
char handler_params[512];
371+
extract_param_names(handler_node, handler_params, sizeof(handler_params));
372+
373+
/* Build DATA_FLOWS properties with actual value mapping */
374+
char props[2048];
375+
int n = snprintf(
376+
props, sizeof(props), "{\"via\":\"%s\",\"route\":\"%s\",\"edge_type\":\"%s\"",
377+
route->name ? route->name : "",
378+
route->qualified_name ? route->qualified_name : "", caller_edges[ci].edge_type);
379+
380+
if (n > 0 && (size_t)n < sizeof(props) - 100) {
381+
size_t pos = (size_t)n;
382+
383+
/* Include handler param_names */
384+
if (handler_params[0]) {
385+
int w = snprintf(props + pos, sizeof(props) - pos,
386+
",\"handler_params\":[%s]", handler_params);
387+
if (w > 0) {
388+
pos += (size_t)w;
389+
}
390+
}
391+
392+
/* Include caller args (copy from source edge) */
393+
if (args_json) {
394+
int w = snprintf(props + pos, sizeof(props) - pos, ",\"caller_args\":[%.*s",
395+
400, args_json);
396+
if (w > 0) {
397+
pos += (size_t)w;
398+
/* Find closing ] in the copied fragment */
399+
char *close = strchr(props + (pos - (size_t)w) + 14, ']');
400+
if (close && close < props + sizeof(props) - 2) {
401+
pos = (size_t)(close - props) + 1;
402+
}
403+
}
404+
}
405+
406+
if (pos < sizeof(props) - 1) {
407+
props[pos] = '}';
408+
props[pos + 1] = '\0';
409+
}
410+
}
411+
cbm_gbuf_insert_edge(gb, caller_id, handler_id, "DATA_FLOWS", props);
307412
flows++;
308413
}
309414
}
310415
}
311416

312-
if (flows > 0) {
313-
char buf[16];
314-
snprintf(buf, sizeof(buf), "%d", flows);
315-
cbm_log_info("pass.data_flows", "created", buf);
417+
if (flows > 0 || skipped > 0) {
418+
char buf1[16];
419+
char buf2[16];
420+
snprintf(buf1, sizeof(buf1), "%d", flows);
421+
snprintf(buf2, sizeof(buf2), "%d", skipped);
422+
cbm_log_info("pass.data_flows", "created", buf1, "skipped_has_call", buf2);
316423
}
317424
}
318425

0 commit comments

Comments
 (0)