@@ -245,14 +245,19 @@ static int create_user_indexes(cbm_store_t *s) {
245245 if (rc != SQLITE_OK ) return rc ;
246246
247247 /* FTS5 full-text search index on node names for BM25 ranking.
248- * content='nodes' makes it an external-content table — synced via triggers.
248+ * content='' makes it a contentless table — it stores only the inverted index,
249+ * not the original text. This is required for camelCase token splitting:
250+ * we index "createSession create Session" but the source table has "createSession".
251+ * With content='nodes', FTS5 would re-verify against the source and fail to match
252+ * the split tokens. Contentless mode trusts the inverted index directly.
253+ * Trade-off: highlight()/snippet() unavailable, but we never use them.
249254 * Each DDL statement must be executed separately for FTS5 compatibility. */
250255 {
251256 char * fts_err = NULL ;
252257 int fts_rc = sqlite3_exec (s -> db ,
253258 "CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5("
254259 "name, qualified_name, label, file_path,"
255- "content='nodes ', content_rowid='id',"
260+ "content='', content_rowid='id',"
256261 "tokenize='unicode61 remove_diacritics 2'"
257262 ");" ,
258263 NULL , NULL , & fts_err );
@@ -263,22 +268,30 @@ static int create_user_indexes(cbm_store_t *s) {
263268 }
264269 }
265270
266- /* Sync triggers: keep FTS index up to date when nodes change */
271+ /* Sync triggers: keep FTS index up to date when nodes change.
272+ * cbm_camel_split(name) splits camelCase into individual tokens so
273+ * "updateCloudClient" is searchable as "update", "Cloud", "Client".
274+ * Contentless FTS5 (content='') requires delete operations to provide the
275+ * exact same tokenized content that was originally inserted. */
267276 exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN"
268277 " INSERT INTO nodes_fts(rowid, name, qualified_name, label, file_path)"
269- " VALUES (new.id, new.name, new.qualified_name, new.label, new.file_path);"
278+ " VALUES (new.id, cbm_camel_split(new.name), new.qualified_name,"
279+ " new.label, new.file_path);"
270280 "END;" );
271281
272282 exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN"
273283 " INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, label, file_path)"
274- " VALUES ('delete', old.id, old.name, old.qualified_name, old.label, old.file_path);"
284+ " VALUES ('delete', old.id, cbm_camel_split(old.name), old.qualified_name,"
285+ " old.label, old.file_path);"
275286 "END;" );
276287
277288 exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN"
278289 " INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, label, file_path)"
279- " VALUES ('delete', old.id, old.name, old.qualified_name, old.label, old.file_path);"
290+ " VALUES ('delete', old.id, cbm_camel_split(old.name), old.qualified_name,"
291+ " old.label, old.file_path);"
280292 " INSERT INTO nodes_fts(rowid, name, qualified_name, label, file_path)"
281- " VALUES (new.id, new.name, new.qualified_name, new.label, new.file_path);"
293+ " VALUES (new.id, cbm_camel_split(new.name), new.qualified_name,"
294+ " new.label, new.file_path);"
282295 "END;" );
283296
284297 return SQLITE_OK ;
@@ -338,6 +351,52 @@ static void sqlite_regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv)
338351 sqlite3_result_int (ctx , rc == 0 ? 1 : 0 );
339352}
340353
354+ /* CamelCase token splitter for FTS5.
355+ * "updateCloudClient" → "updateCloudClient update Cloud Client"
356+ * "HTMLParser" → "HTMLParser HTML Parser"
357+ * "getURL" → "getURL get URL"
358+ * Preserves original name as first token for exact-match queries,
359+ * then appends space-split words for broad keyword matching. */
360+ static void sqlite_camel_split (sqlite3_context * ctx , int argc , sqlite3_value * * argv ) {
361+ (void )argc ;
362+ const char * input = (const char * )sqlite3_value_text (argv [0 ]);
363+ if (!input || !input [0 ]) {
364+ sqlite3_result_text (ctx , input ? input : "" , -1 , SQLITE_TRANSIENT );
365+ return ;
366+ }
367+
368+ char buf [2048 ];
369+ /* Start with the original name (preserves exact-match capability) */
370+ int len = snprintf (buf , sizeof (buf ), "%s " , input );
371+
372+ /* Walk input, insert space before each camelCase boundary:
373+ * - lowercase→Uppercase: "updateCloud" → "update Cloud"
374+ * - Uppercase→Uppercase+lowercase: "HTMLParser" → "HTML Parser" */
375+ for (int i = 0 ; input [i ] && len < (int )sizeof (buf ) - 2 ; i ++ ) {
376+ if (i > 0 ) {
377+ bool split = false;
378+ /* lowercase followed by Uppercase: updateC → update C */
379+ if (input [i ] >= 'A' && input [i ] <= 'Z' &&
380+ input [i - 1 ] >= 'a' && input [i - 1 ] <= 'z' ) {
381+ split = true;
382+ }
383+ /* Uppercase followed by Uppercase+lowercase: HTMLParser → HTML Parser
384+ * Only split before the LAST uppercase in a run */
385+ if (input [i ] >= 'A' && input [i ] <= 'Z' &&
386+ input [i - 1 ] >= 'A' && input [i - 1 ] <= 'Z' &&
387+ input [i + 1 ] >= 'a' && input [i + 1 ] <= 'z' ) {
388+ split = true;
389+ }
390+ if (split ) {
391+ buf [len ++ ] = ' ' ;
392+ }
393+ }
394+ buf [len ++ ] = input [i ];
395+ }
396+ buf [len ] = '\0' ;
397+ sqlite3_result_text (ctx , buf , len , SQLITE_TRANSIENT );
398+ }
399+
341400/* Case-insensitive REGEXP variant */
342401static void sqlite_iregexp (sqlite3_context * ctx , int argc , sqlite3_value * * argv ) {
343402 (void )argc ;
@@ -411,6 +470,9 @@ static cbm_store_t *store_open_internal(const char *path, bool in_memory) {
411470 /* Case-insensitive variant for search with case_sensitive=false */
412471 sqlite3_create_function (s -> db , "iregexp" , 2 , SQLITE_UTF8 | SQLITE_DETERMINISTIC , NULL ,
413472 sqlite_iregexp , NULL , NULL );
473+ /* CamelCase splitter for FTS5 indexing — used in triggers and backfill */
474+ sqlite3_create_function (s -> db , "cbm_camel_split" , 1 , SQLITE_UTF8 | SQLITE_DETERMINISTIC ,
475+ NULL , sqlite_camel_split , NULL , NULL );
414476
415477 if (configure_pragmas (s , in_memory ) != CBM_STORE_OK || init_schema (s ) != CBM_STORE_OK ||
416478 create_user_indexes (s ) != CBM_STORE_OK ) {
@@ -463,6 +525,9 @@ cbm_store_t *cbm_store_open_path_query(const char *db_path) {
463525 sqlite_regexp , NULL , NULL );
464526 sqlite3_create_function (s -> db , "iregexp" , 2 , SQLITE_UTF8 | SQLITE_DETERMINISTIC , NULL ,
465527 sqlite_iregexp , NULL , NULL );
528+ /* CamelCase splitter for FTS5 — must be registered before triggers fire */
529+ sqlite3_create_function (s -> db , "cbm_camel_split" , 1 , SQLITE_UTF8 | SQLITE_DETERMINISTIC ,
530+ NULL , sqlite_camel_split , NULL , NULL );
466531
467532 if (configure_pragmas (s , false) != CBM_STORE_OK ) {
468533 sqlite3_close (s -> db );
@@ -2060,9 +2125,12 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear
20602125 }
20612126
20622127 char fts_sql [4096 ];
2063- /* Join with FTS5 table, filter by project/label, order by BM25 rank.
2064- * Exclude noise labels (File, Folder, Module, Section, Variable, Project)
2065- * and boost Function/Method/Class via a structural score added to BM25. */
2128+ /* Join with FTS5 table, filter by project/label, order by pure BM25 relevance.
2129+ * Exclude noise labels (File, Folder, Module, Section, Variable, Project).
2130+ * Label-type boost: prefer Functions/Methods/Routes/Classes over generic nodes.
2131+ * No fan_in/popularity boost — that corrupts relevance for discovery queries
2132+ * (e.g. "update" with fan_in=222 would outrank the actually relevant match).
2133+ * in_deg/out_deg are still returned for display but do NOT affect ranking. */
20662134 int flen = snprintf (fts_sql , sizeof (fts_sql ),
20672135 "SELECT n.id, n.project, n.label, n.name, n.qualified_name, "
20682136 "n.file_path, n.start_line, n.end_line, n.properties, "
@@ -2073,7 +2141,6 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear
20732141 " WHEN n.label IN ('Class','Interface','Type') THEN 5.0 "
20742142 " WHEN n.label = 'Route' THEN 8.0 "
20752143 " ELSE 0.0 END "
2076- " - CASE WHEN (SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') > 5 THEN 3.0 ELSE 0.0 END"
20772144 ") AS rank "
20782145 "FROM nodes_fts "
20792146 "JOIN nodes n ON n.id = nodes_fts.rowid "
0 commit comments