Skip to content

Commit 17db27a

Browse files
committed
Schema properties, DATA_FLOWS, YAML nested extraction, trace_path tool
- Serialize return_type, parent_class, base_classes on definition nodes - DATA_FLOWS edges link callers through Route nodes to handlers - YAML nested field extraction with dotted key paths (key_path property) - Rename trace_call_path → trace_path with mode parameter: calls (default), data_flow, cross_service - Backward compatible: trace_call_path alias still accepted - Updated CLI help text and all tests
1 parent b5e288a commit 17db27a

File tree

13 files changed

+918
-490
lines changed

13 files changed

+918
-490
lines changed

internal/cbm/cbm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ typedef enum {
173173
typedef struct {
174174
const char *value; // the string literal content
175175
const char *enclosing_func_qn; // QN of enclosing function
176+
const char *key_path; // dotted key path from YAML/JSON nesting (NULL if flat)
176177
CBMStringRefKind kind; // URL, CONFIG
177178
} CBMStringRef;
178179

internal/cbm/extract_unified.c

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,116 @@ static void handle_string_refs(CBMExtractCtx *ctx, TSNode node, const WalkState
248248
cbm_stringref_push(&ctx->result->string_refs, ctx->arena, ref);
249249
}
250250

251+
// --- YAML nested field extraction (D2) ---
252+
253+
/* Recursively walk YAML block_mapping_pair nodes, building dotted key paths.
254+
* Emits string_refs with key_path for leaf values that are URLs or config values.
255+
* Example: body.operational_info.post_url → "https://..." */
256+
static void walk_yaml_mapping(CBMExtractCtx *ctx, TSNode node, const char *prefix) {
257+
uint32_t nc = ts_node_named_child_count(node);
258+
for (uint32_t i = 0; i < nc; i++) {
259+
TSNode child = ts_node_named_child(node, i);
260+
const char *ck = ts_node_type(child);
261+
262+
if (strcmp(ck, "block_mapping_pair") != 0) {
263+
continue;
264+
}
265+
266+
/* Get key */
267+
TSNode key = ts_node_child_by_field_name(child, "key", 3);
268+
if (ts_node_is_null(key)) {
269+
continue;
270+
}
271+
char *key_text = cbm_node_text(ctx->arena, key, ctx->source);
272+
if (!key_text || !key_text[0]) {
273+
continue;
274+
}
275+
276+
/* Build dotted path */
277+
const char *path =
278+
prefix ? cbm_arena_sprintf(ctx->arena, "%s.%s", prefix, key_text) : key_text;
279+
280+
/* Get value node */
281+
TSNode val = ts_node_child_by_field_name(child, "value", 5);
282+
if (ts_node_is_null(val)) {
283+
continue;
284+
}
285+
const char *vk = ts_node_type(val);
286+
287+
/* If value is a nested mapping (block_node → block_mapping), recurse */
288+
if (strcmp(vk, "block_node") == 0 || strcmp(vk, "block_mapping") == 0) {
289+
/* Walk children for nested block_mapping */
290+
uint32_t vnc = ts_node_named_child_count(val);
291+
for (uint32_t vi = 0; vi < vnc; vi++) {
292+
TSNode vc = ts_node_named_child(val, vi);
293+
const char *vctype = ts_node_type(vc);
294+
if (strcmp(vctype, "block_mapping") == 0 ||
295+
strcmp(vctype, "block_mapping_pair") == 0) {
296+
walk_yaml_mapping(ctx, vc, path);
297+
}
298+
}
299+
continue;
300+
}
301+
302+
/* Leaf value: extract and classify */
303+
char *val_text = cbm_node_text(ctx->arena, val, ctx->source);
304+
if (!val_text || !val_text[0]) {
305+
continue;
306+
}
307+
308+
/* Strip quotes */
309+
int vlen = (int)strlen(val_text);
310+
const char *content = val_text;
311+
if (vlen >= 2 && (val_text[0] == '"' || val_text[0] == '\'')) {
312+
content = val_text + 1;
313+
vlen -= 2;
314+
if (vlen <= 0) {
315+
continue;
316+
}
317+
}
318+
319+
int kind_val = cbm_classify_string(content, vlen);
320+
if (kind_val < 0) {
321+
continue;
322+
}
323+
324+
char *stored = cbm_arena_strndup(ctx->arena, content, (size_t)vlen);
325+
if (!stored) {
326+
continue;
327+
}
328+
329+
CBMStringRef ref = {
330+
.value = stored,
331+
.enclosing_func_qn = ctx->module_qn,
332+
.key_path = path,
333+
.kind = (CBMStringRefKind)kind_val,
334+
};
335+
cbm_stringref_push(&ctx->result->string_refs, ctx->arena, ref);
336+
}
337+
}
338+
339+
/* Handle YAML files: walk top-level block_mapping recursively */
340+
static void handle_yaml_nested(CBMExtractCtx *ctx, TSNode node) {
341+
if (ctx->language != CBM_LANG_YAML) {
342+
return;
343+
}
344+
const char *kind = ts_node_type(node);
345+
if (strcmp(kind, "block_mapping") != 0) {
346+
return;
347+
}
348+
/* Only process root-level block_mapping (depth 0 or 1) */
349+
TSNode parent = ts_node_parent(node);
350+
if (ts_node_is_null(parent)) {
351+
walk_yaml_mapping(ctx, node, NULL);
352+
} else {
353+
const char *pk = ts_node_type(parent);
354+
if (strcmp(pk, "stream") == 0 || strcmp(pk, "document") == 0 ||
355+
strcmp(pk, "block_node") == 0) {
356+
walk_yaml_mapping(ctx, node, NULL);
357+
}
358+
}
359+
}
360+
251361
// --- Main unified cursor walk ---
252362

253363
void cbm_extract_unified(CBMExtractCtx *ctx) {
@@ -281,6 +391,7 @@ void cbm_extract_unified(CBMExtractCtx *ctx) {
281391
handle_env_accesses(ctx, node, spec, &state);
282392
handle_type_assigns(ctx, node, spec, &state);
283393
handle_string_refs(ctx, node, &state);
394+
handle_yaml_nested(ctx, node);
284395

285396
// 4. Push scope markers for boundary nodes
286397
if (spec->function_node_types && cbm_kind_in_set(node, spec->function_node_types)) {

src/cli/cli.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ static const char skill_tracing_content[] =
346346
"description: Call chain and dependency expert. ALWAYS invoke this skill when the user "
347347
"asks who calls a function, what a function calls, needs impact analysis, or traces "
348348
"dependencies. Do not grep for function names directly — use codebase-memory-mcp "
349-
"trace_call_path first.\n"
349+
"trace_path first.\n"
350350
"---\n"
351351
"\n"
352352
"# Call Tracing & Impact Analysis\n"
@@ -355,7 +355,7 @@ static const char skill_tracing_content[] =
355355
"\n"
356356
"## Workflow\n"
357357
"1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n"
358-
"2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + "
358+
"2. `trace_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + "
359359
"callees\n"
360360
"3. `detect_changes` — find what changed and assess risk_labels\n"
361361
"\n"
@@ -402,7 +402,7 @@ static const char skill_reference_content[] =
402402
"- `search_graph` — find nodes by pattern\n"
403403
"- `search_code` — text search in source\n"
404404
"- `query_graph` — Cypher query language\n"
405-
"- `trace_call_path` — call chain traversal\n"
405+
"- `trace_path` — call chain traversal\n"
406406
"- `get_code_snippet` — read function source\n"
407407
"- `get_graph_schema` — node/edge type catalog\n"
408408
"- `get_architecture` — high-level summary\n"
@@ -429,7 +429,7 @@ static const char codex_instructions_content[] =
429429
"Use the MCP tools to explore and understand the code:\n"
430430
"\n"
431431
"- `search_graph` — find functions, classes, routes by pattern\n"
432-
"- `trace_call_path` — trace who calls a function or what it calls\n"
432+
"- `trace_path` — trace who calls a function or what it calls\n"
433433
"- `get_code_snippet` — read function source code\n"
434434
"- `query_graph` — run Cypher queries for complex patterns\n"
435435
"- `get_architecture` — high-level project summary\n"
@@ -963,7 +963,7 @@ static const char agent_instructions_content[] =
963963
"\n"
964964
"## Priority Order\n"
965965
"1. `search_graph` — find functions, classes, routes, variables by pattern\n"
966-
"2. `trace_call_path` — trace who calls a function or what it calls\n"
966+
"2. `trace_path` — trace who calls a function or what it calls\n"
967967
"3. `get_code_snippet` — read specific function/class source code\n"
968968
"4. `query_graph` — run Cypher queries for complex patterns\n"
969969
"5. `get_architecture` — high-level project summary\n"
@@ -975,7 +975,7 @@ static const char agent_instructions_content[] =
975975
"\n"
976976
"## Examples\n"
977977
"- Find a handler: `search_graph(name_pattern=\".*OrderHandler.*\")`\n"
978-
"- Who calls it: `trace_call_path(function_name=\"OrderHandler\", direction=\"inbound\")`\n"
978+
"- Who calls it: `trace_path(function_name=\"OrderHandler\", direction=\"inbound\")`\n"
979979
"- Read source: `get_code_snippet(qualified_name=\"pkg/orders.OrderHandler\")`\n";
980980

981981
const char *cbm_get_agent_instructions(void) {
@@ -1558,7 +1558,7 @@ static void cbm_install_hook_gate_script(const char *home) {
15581558
"fi\n"
15591559
"touch \"$GATE\"\n"
15601560
"echo 'BLOCKED: For code discovery, use codebase-memory-mcp tools first: "
1561-
"search_graph(name_pattern) to find functions/classes, trace_call_path() for "
1561+
"search_graph(name_pattern) to find functions/classes, trace_path() for "
15621562
"call chains, get_code_snippet(qualified_name) to read source. If the graph "
15631563
"is not indexed yet, call index_repository first. Fall back to Grep/Glob/Read "
15641564
"only for text content search. If you need Grep, retry.' >&2\n"
@@ -1574,8 +1574,8 @@ static void cbm_install_hook_gate_script(const char *home) {
15741574
}
15751575

15761576
#define GEMINI_HOOK_MATCHER "google_search|read_file|grep_search"
1577-
#define GEMINI_HOOK_COMMAND \
1578-
"echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_call_path/" \
1577+
#define GEMINI_HOOK_COMMAND \
1578+
"echo 'Reminder: prefer codebase-memory-mcp search_graph/trace_path/" \
15791579
"get_code_snippet over grep/file search for code discovery.' >&2"
15801580

15811581
int cbm_upsert_gemini_hooks(const char *settings_path) {

src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ static void print_help(void) {
148148
printf(" --port=N Set UI port (default 9749, persisted)\n");
149149
printf("\nSupported agents (auto-detected):\n");
150150
printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode, Antigravity, Aider, KiloCode\n");
151-
printf("\nTools: index_repository, search_graph, query_graph, trace_call_path,\n");
151+
printf("\nTools: index_repository, search_graph, query_graph, trace_path,\n");
152152
printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n");
153153
printf(" list_projects, delete_project, index_status, detect_changes,\n");
154154
printf(" manage_adr, ingest_traces\n");

src/mcp/mcp.c

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,19 @@ static const tool_def_t TOOLS[] = {
253253
"\"Optional row limit. Default: unlimited (100k "
254254
"ceiling)\"}},\"required\":[\"query\",\"project\"]}"},
255255

256-
{"trace_call_path",
257-
"Trace function call paths — who calls a function and what it calls. Use INSTEAD OF grep when "
258-
"finding callers, dependencies, or impact analysis.",
256+
{"trace_path",
257+
"Trace paths through the code graph. Modes: calls (callers/callees), data_flow (value "
258+
"propagation with args at each hop), cross_service (through HTTP/async Route nodes). "
259+
"Use INSTEAD OF grep for callers, dependencies, impact analysis, or data flow tracing.",
259260
"{\"type\":\"object\",\"properties\":{\"function_name\":{\"type\":\"string\"},\"project\":{"
260261
"\"type\":\"string\"},\"direction\":{\"type\":\"string\",\"enum\":[\"inbound\",\"outbound\","
261-
"\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"edge_"
262-
"types\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"function_"
263-
"name\",\"project\"]}"},
262+
"\"both\"],\"default\":\"both\"},\"depth\":{\"type\":\"integer\",\"default\":3},\"mode\":{"
263+
"\"type\":\"string\",\"enum\":[\"calls\",\"data_flow\",\"cross_service\"],\"default\":"
264+
"\"calls\",\"description\":\"calls: follow CALLS edges. data_flow: follow CALLS+DATA_FLOWS "
265+
"with arg expressions. cross_service: follow HTTP_CALLS+ASYNC_CALLS+DATA_FLOWS through "
266+
"Routes.\"},\"parameter_name\":{\"type\":\"string\",\"description\":\"For data_flow mode: "
267+
"scope trace to a specific parameter name\"},\"edge_types\":{\"type\":\"array\",\"items\":{"
268+
"\"type\":\"string\"}}},\"required\":[\"function_name\",\"project\"]}"},
264269

265270
{"get_code_snippet",
266271
"Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the "
@@ -1223,11 +1228,15 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
12231228
char *project = cbm_mcp_get_string_arg(args, "project");
12241229
cbm_store_t *store = resolve_store(srv, project);
12251230
char *direction = cbm_mcp_get_string_arg(args, "direction");
1231+
char *mode = cbm_mcp_get_string_arg(args, "mode");
1232+
char *param_name = cbm_mcp_get_string_arg(args, "parameter_name");
12261233
int depth = cbm_mcp_get_int_arg(args, "depth", 3);
12271234

12281235
if (!func_name) {
12291236
free(project);
12301237
free(direction);
1238+
free(mode);
1239+
free(param_name);
12311240
return cbm_mcp_text_result("function_name is required", true);
12321241
}
12331242
if (!store) {
@@ -1237,6 +1246,8 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
12371246
free(func_name);
12381247
free(project);
12391248
free(direction);
1249+
free(mode);
1250+
free(param_name);
12401251
return _res;
12411252
}
12421253

@@ -1245,6 +1256,8 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
12451256
free(func_name);
12461257
free(project);
12471258
free(direction);
1259+
free(mode);
1260+
free(param_name);
12481261
return not_indexed;
12491262
}
12501263

@@ -1261,6 +1274,8 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
12611274
free(func_name);
12621275
free(project);
12631276
free(direction);
1277+
free(mode);
1278+
free(param_name);
12641279
cbm_store_free_nodes(nodes, 0);
12651280
return cbm_mcp_text_result("{\"error\":\"function not found\"}", true);
12661281
}
@@ -1271,9 +1286,55 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
12711286

12721287
yyjson_mut_obj_add_str(doc, root, "function", func_name);
12731288
yyjson_mut_obj_add_str(doc, root, "direction", direction);
1289+
if (mode) {
1290+
yyjson_mut_obj_add_str(doc, root, "mode", mode);
1291+
}
1292+
1293+
/* Edge types: explicit > mode-based > default */
1294+
static const char *mode_calls[] = {"CALLS"};
1295+
static const char *mode_data_flow[] = {"CALLS", "DATA_FLOWS"};
1296+
static const char *mode_cross_svc[] = {"HTTP_CALLS", "ASYNC_CALLS", "DATA_FLOWS", "CALLS"};
1297+
1298+
const char *edge_types[16];
1299+
int edge_type_count = 0;
1300+
1301+
/* Try parsing explicit edge_types array from args */
1302+
yyjson_doc *et_doc = yyjson_read(args, strlen(args), 0);
1303+
if (et_doc) {
1304+
yyjson_val *et_arr = yyjson_obj_get(yyjson_doc_get_root(et_doc), "edge_types");
1305+
if (et_arr && yyjson_is_arr(et_arr)) {
1306+
size_t idx2;
1307+
size_t max2;
1308+
yyjson_val *val2;
1309+
yyjson_arr_foreach(et_arr, idx2, max2, val2) {
1310+
if (yyjson_is_str(val2) && edge_type_count < 16) {
1311+
edge_types[edge_type_count++] = yyjson_get_str(val2);
1312+
}
1313+
}
1314+
}
1315+
}
12741316

1275-
const char *edge_types[] = {"CALLS"};
1276-
int edge_type_count = 1;
1317+
yyjson_doc *et_doc_keep = et_doc;
1318+
if (edge_type_count == 0) {
1319+
/* Select defaults by mode */
1320+
const char **defaults = mode_calls;
1321+
int n_defaults = 1;
1322+
if (mode && strcmp(mode, "data_flow") == 0) {
1323+
defaults = mode_data_flow;
1324+
n_defaults = 2;
1325+
} else if (mode && strcmp(mode, "cross_service") == 0) {
1326+
defaults = mode_cross_svc;
1327+
n_defaults = 4;
1328+
}
1329+
for (int i = 0; i < n_defaults; i++) {
1330+
edge_types[i] = defaults[i];
1331+
}
1332+
edge_type_count = n_defaults;
1333+
if (et_doc_keep) {
1334+
yyjson_doc_free(et_doc_keep);
1335+
et_doc_keep = NULL;
1336+
}
1337+
}
12771338

12781339
/* Run BFS for each requested direction.
12791340
* IMPORTANT: yyjson_mut_obj_add_str borrows pointers — we must keep
@@ -1338,6 +1399,11 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) {
13381399
free(func_name);
13391400
free(project);
13401401
free(direction);
1402+
free(mode);
1403+
free(param_name);
1404+
if (et_doc_keep) {
1405+
yyjson_doc_free(et_doc_keep);
1406+
}
13411407

13421408
char *result = cbm_mcp_text_result(json, false);
13431409
free(json);
@@ -2698,7 +2764,7 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch
26982764
if (strcmp(tool_name, "delete_project") == 0) {
26992765
return handle_delete_project(srv, args_json);
27002766
}
2701-
if (strcmp(tool_name, "trace_call_path") == 0) {
2767+
if (strcmp(tool_name, "trace_path") == 0 || strcmp(tool_name, "trace_call_path") == 0) {
27022768
return handle_trace_call_path(srv, args_json);
27032769
}
27042770
if (strcmp(tool_name, "get_architecture") == 0) {

src/pipeline/pass_definitions.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ static void build_def_props(char *buf, size_t bufsize, const CBMDefinition *def)
182182
size_t pos = (size_t)n;
183183
append_json_string(buf, bufsize, &pos, "docstring", def->docstring);
184184
append_json_string(buf, bufsize, &pos, "signature", def->signature);
185+
append_json_string(buf, bufsize, &pos, "return_type", def->return_type);
186+
append_json_string(buf, bufsize, &pos, "parent_class", def->parent_class);
185187
append_json_str_array(buf, bufsize, &pos, "decorators", def->decorators);
188+
append_json_str_array(buf, bufsize, &pos, "base_classes", def->base_classes);
186189
append_json_str_array(buf, bufsize, &pos, "param_names", def->param_names);
187190
append_json_str_array(buf, bufsize, &pos, "param_types", def->param_types);
188191

src/pipeline/pass_parallel.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,10 @@ static void build_def_props(char *buf, size_t bufsize, const CBMDefinition *def)
193193
size_t pos = (size_t)n;
194194
append_json_string(buf, bufsize, &pos, "docstring", def->docstring);
195195
append_json_string(buf, bufsize, &pos, "signature", def->signature);
196+
append_json_string(buf, bufsize, &pos, "return_type", def->return_type);
197+
append_json_string(buf, bufsize, &pos, "parent_class", def->parent_class);
196198
append_json_str_array(buf, bufsize, &pos, "decorators", def->decorators);
199+
append_json_str_array(buf, bufsize, &pos, "base_classes", def->base_classes);
197200
append_json_str_array(buf, bufsize, &pos, "param_names", def->param_names);
198201
append_json_str_array(buf, bufsize, &pos, "param_types", def->param_types);
199202
if (pos < bufsize - 1) {

0 commit comments

Comments
 (0)