Skip to content

Commit 817182a

Browse files
author
Your Name
committed
feat(cross-repo): unified cross-repository index, search, and channel flow tracing
Adds cross-repo capabilities to CBM by building a unified _cross_repo.db that aggregates node stubs, channels, and embeddings from all per-project databases. This bridges the per-project isolation gap without compromising the security model (no ATTACH DATABASE — data is copied, not linked). New files: - src/store/cross_repo.h — API: build, search, channel matching, stats - src/store/cross_repo.c — Implementation (~550 lines) - cbm_cross_repo_build(): scans all project DBs, copies nodes (134K), channels (526), embeddings (134K) into _cross_repo.db. Build time: ~2s. - cbm_cross_repo_search(): BM25 FTS5 + vector search + RRF merge across all repos in a single query. CamelCase token splitting enabled. - cbm_cross_repo_match_channels(): finds emit/listen pairs across repos (12 unique cross-repo channels detected, 127 individual flow matches) - cbm_cross_repo_get_info(): stats about the cross-repo index New MCP tools: - build_cross_repo_index: manually trigger cross-repo index rebuild - trace_cross_repo: trace message channels across repositories, showing which services produce and consume each channel with file+function detail Pipeline integration: - Auto-rebuilds _cross_repo.db after every index_repository (adds ~2s) - Cross-repo DB is always fresh without manual intervention Cross-repo channel matching detects: - Socket.IO channels emitted in one service and listened in another - EventEmitter patterns across repos - File-level and function-level attribution for each endpoint Performance: 54 repos, 134K nodes, 134K embeddings copied in 2.1 seconds. No new dependencies — uses existing SQLite3 and custom functions.
1 parent 70bc64f commit 817182a

File tree

5 files changed

+983
-1
lines changed

5 files changed

+983
-1
lines changed

Makefile.cbm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ PREPROCESSOR_SRC = $(CBM_DIR)/preprocessor.cpp
139139
SQLITE_WRITER_SRC = $(CBM_DIR)/sqlite_writer.c
140140

141141
# Store module (new)
142-
STORE_SRCS = src/store/store.c
142+
STORE_SRCS = src/store/store.c src/store/cross_repo.c
143143

144144
# Cypher module (new)
145145
CYPHER_SRCS = src/cypher/cypher.c

src/mcp/mcp.c

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "cypher/cypher.h"
1414
#include "pipeline/pipeline.h"
1515
#include "pipeline/embedding.h"
16+
#include "store/cross_repo.h"
1617
#include "cli/cli.h"
1718
#include "watcher/watcher.h"
1819
#include "foundation/mem.h"
@@ -380,6 +381,23 @@ static const tool_def_t TOOLS[] = {
380381
"{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"},"
381382
"\"force\":{\"type\":\"boolean\",\"default\":false,\"description\":"
382383
"\"Re-generate all embeddings even if they already exist\"}},\"required\":[\"project\"]}"},
384+
385+
{"build_cross_repo_index",
386+
"Build unified cross-repo index for cross-repository search, channel matching, and flow tracing. "
387+
"Scans all indexed project databases and builds a _cross_repo.db with node stubs, channels, "
388+
"and embeddings from all repos. Enables search_graph with project='*' for cross-repo search, "
389+
"and trace_cross_repo for cross-service message flow tracing. Auto-rebuilds after each "
390+
"index_repository call, but can be triggered manually to refresh.",
391+
"{\"type\":\"object\",\"properties\":{}}"},
392+
393+
{"trace_cross_repo",
394+
"Trace message/event channels across repositories. Shows which services produce and consume "
395+
"a specific channel, with file-level and function-level detail. Requires build_cross_repo_index "
396+
"to have been run at least once.",
397+
"{\"type\":\"object\",\"properties\":{"
398+
"\"channel\":{\"type\":\"string\",\"description\":\"Channel name to trace (partial match). "
399+
"Omit to list all cross-repo channels.\"},"
400+
"\"repo\":{\"type\":\"string\",\"description\":\"Filter to channels involving a specific repo.\"}}}"},
383401
};
384402

385403
static const int TOOL_COUNT = sizeof(TOOLS) / sizeof(TOOLS[0]);
@@ -3870,6 +3888,102 @@ static char *handle_generate_embeddings(cbm_mcp_server_t *srv, const char *args)
38703888
return result;
38713889
}
38723890

3891+
/* ── build_cross_repo_index ──────────────────────────────────── */
3892+
3893+
static char *handle_build_cross_repo_index(cbm_mcp_server_t *srv, const char *args) {
3894+
(void)srv; (void)args;
3895+
3896+
cbm_cross_repo_stats_t stats = cbm_cross_repo_build();
3897+
3898+
yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL);
3899+
yyjson_mut_val *root = yyjson_mut_obj(doc);
3900+
yyjson_mut_doc_set_root(doc, root);
3901+
3902+
yyjson_mut_obj_add_str(doc, root, "status",
3903+
stats.repos_scanned >= 0 ? "success" : "error");
3904+
yyjson_mut_obj_add_int(doc, root, "repos_scanned", stats.repos_scanned);
3905+
yyjson_mut_obj_add_int(doc, root, "nodes_copied", stats.nodes_copied);
3906+
yyjson_mut_obj_add_int(doc, root, "channels_copied", stats.channels_copied);
3907+
yyjson_mut_obj_add_int(doc, root, "embeddings_copied", stats.embeddings_copied);
3908+
yyjson_mut_obj_add_int(doc, root, "cross_repo_channel_matches", stats.cross_repo_matches);
3909+
yyjson_mut_obj_add_real(doc, root, "build_time_ms", stats.build_time_ms);
3910+
3911+
char *json = yy_doc_to_str(doc);
3912+
yyjson_mut_doc_free(doc);
3913+
3914+
char *result = cbm_mcp_text_result(json, stats.repos_scanned < 0);
3915+
free(json);
3916+
return result;
3917+
}
3918+
3919+
/* ── trace_cross_repo ────────────────────────────────────────── */
3920+
3921+
static char *handle_trace_cross_repo(cbm_mcp_server_t *srv, const char *args) {
3922+
(void)srv;
3923+
char *channel = cbm_mcp_get_string_arg(args, "channel");
3924+
3925+
cbm_cross_repo_t *cr = cbm_cross_repo_open();
3926+
if (!cr) {
3927+
free(channel);
3928+
return cbm_mcp_text_result(
3929+
"{\"error\":\"Cross-repo index not built. Run build_cross_repo_index first.\"}", true);
3930+
}
3931+
3932+
/* Get cross-repo info */
3933+
cbm_cross_repo_info_t info = {0};
3934+
cbm_cross_repo_get_info(cr, &info);
3935+
3936+
/* Get channel matches */
3937+
cbm_cross_channel_match_t *matches = NULL;
3938+
int match_count = 0;
3939+
cbm_cross_repo_match_channels(cr, channel, &matches, &match_count);
3940+
3941+
yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL);
3942+
yyjson_mut_val *root = yyjson_mut_obj(doc);
3943+
yyjson_mut_doc_set_root(doc, root);
3944+
3945+
yyjson_mut_obj_add_int(doc, root, "total_repos", info.total_repos);
3946+
yyjson_mut_obj_add_int(doc, root, "total_cross_repo_channels", info.cross_repo_channel_count);
3947+
yyjson_mut_obj_add_int(doc, root, "matches", match_count);
3948+
if (info.built_at)
3949+
yyjson_mut_obj_add_strcpy(doc, root, "built_at", info.built_at);
3950+
3951+
yyjson_mut_val *arr = yyjson_mut_arr(doc);
3952+
for (int i = 0; i < match_count; i++) {
3953+
cbm_cross_channel_match_t *m = &matches[i];
3954+
yyjson_mut_val *item = yyjson_mut_obj(doc);
3955+
yyjson_mut_obj_add_strcpy(doc, item, "channel", m->channel_name ? m->channel_name : "");
3956+
yyjson_mut_obj_add_strcpy(doc, item, "transport", m->transport ? m->transport : "");
3957+
3958+
yyjson_mut_val *emit = yyjson_mut_obj(doc);
3959+
yyjson_mut_obj_add_strcpy(doc, emit, "project", m->emit_project ? m->emit_project : "");
3960+
yyjson_mut_obj_add_strcpy(doc, emit, "file", m->emit_file ? m->emit_file : "");
3961+
yyjson_mut_obj_add_strcpy(doc, emit, "function", m->emit_function ? m->emit_function : "");
3962+
yyjson_mut_obj_add_val(doc, item, "emitter", emit);
3963+
3964+
yyjson_mut_val *listen = yyjson_mut_obj(doc);
3965+
yyjson_mut_obj_add_strcpy(doc, listen, "project", m->listen_project ? m->listen_project : "");
3966+
yyjson_mut_obj_add_strcpy(doc, listen, "file", m->listen_file ? m->listen_file : "");
3967+
yyjson_mut_obj_add_strcpy(doc, listen, "function", m->listen_function ? m->listen_function : "");
3968+
yyjson_mut_obj_add_val(doc, item, "listener", listen);
3969+
3970+
yyjson_mut_arr_add_val(arr, item);
3971+
}
3972+
yyjson_mut_obj_add_val(doc, root, "channel_flows", arr);
3973+
3974+
char *json = yy_doc_to_str(doc);
3975+
yyjson_mut_doc_free(doc);
3976+
3977+
cbm_cross_channel_free(matches, match_count);
3978+
cbm_cross_repo_info_free(&info);
3979+
cbm_cross_repo_close(cr);
3980+
free(channel);
3981+
3982+
char *result = cbm_mcp_text_result(json, false);
3983+
free(json);
3984+
return result;
3985+
}
3986+
38733987
/* ── Tool dispatch ────────────────────────────────────────────── */
38743988

38753989
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
@@ -3937,6 +4051,12 @@ char *cbm_mcp_handle_tool(cbm_mcp_server_t *srv, const char *tool_name, const ch
39374051
if (strcmp(tool_name, "generate_embeddings") == 0) {
39384052
return handle_generate_embeddings(srv, args_json);
39394053
}
4054+
if (strcmp(tool_name, "build_cross_repo_index") == 0) {
4055+
return handle_build_cross_repo_index(srv, args_json);
4056+
}
4057+
if (strcmp(tool_name, "trace_cross_repo") == 0) {
4058+
return handle_trace_cross_repo(srv, args_json);
4059+
}
39404060
char msg[256];
39414061
snprintf(msg, sizeof(msg), "unknown tool: %s", tool_name);
39424062
return cbm_mcp_text_result(msg, true);

src/pipeline/pipeline.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "pipeline/pipeline_internal.h"
1515
#include "pipeline/worker_pool.h"
1616
#include "pipeline/embedding.h"
17+
#include "store/cross_repo.h"
1718
#include "graph_buffer/graph_buffer.h"
1819
#include "store/store.h"
1920
#include "discover/discover.h"
@@ -881,6 +882,16 @@ int cbm_pipeline_run(cbm_pipeline_t *p) {
881882
itoa_buf(cbm_gbuf_edge_count(p->gbuf)), "elapsed_ms",
882883
itoa_buf((int)elapsed_ms(t0)));
883884

885+
/* ── Auto-rebuild cross-repo index after indexing ── */
886+
{
887+
cbm_cross_repo_stats_t cr_stats = cbm_cross_repo_build();
888+
if (cr_stats.repos_scanned > 0) {
889+
cbm_log_info("pass.done", "pass", "cross_repo_index",
890+
"repos", itoa_buf(cr_stats.repos_scanned),
891+
"cross_channels", itoa_buf(cr_stats.cross_repo_matches));
892+
}
893+
}
894+
884895
cleanup:
885896
/* Free prescan if not already freed */
886897
if (ctx.prescan_cache) {

0 commit comments

Comments
 (0)