Skip to content

Commit bd65ec0

Browse files
committed
Added syncing and fixed some issues in vtable search.
1 parent 3c7bf1f commit bd65ec0

8 files changed

Lines changed: 1033 additions & 43 deletions

File tree

API.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ A SQLite extension that provides semantic memory capabilities with hybrid search
1212
- [Configuration Functions](#configuration-functions)
1313
- [Memory Management Functions](#memory-management-functions)
1414
- [Deletion Functions](#deletion-functions)
15+
- [Sync Functions](#sync-functions)
1516
- [Virtual Table Module](#virtual-table-module)
1617
- [C API](#c-api)
1718
- [Configuration Options](#configuration-options)
@@ -399,6 +400,56 @@ SELECT memory_cache_clear('openai', 'text-embedding-3-small');
399400

400401
---
401402

403+
### Sync Functions
404+
405+
Require [sqlite-sync](https://github.com/sqliteai/sqlite-sync) to be loaded before use.
406+
407+
#### `memory_enable_sync([context TEXT, ...])`
408+
409+
Enables CRDT-based synchronization for `dbmem_content` via sqlite-sync. Uses the CLS algorithm with block-level LWW on the `value` column for fine-grained conflict resolution.
410+
411+
**Parameters:** Zero or more TEXT context names. If no arguments are given, all memory is synced. If one or more context names are provided, only rows matching those contexts are synced.
412+
413+
**Returns:** INTEGER - 1 on success
414+
415+
**Notes:**
416+
- Requires sqlite-sync to be loaded; returns an error otherwise
417+
- Idempotent: safe to call multiple times — each call is a full reconfiguration
418+
- With no arguments, any previously-set context filter is cleared (sync all)
419+
- With arguments, sets a row-level filter: only the specified contexts are replicated
420+
- Block-level LWW on `value` enables line-level conflict resolution for text content
421+
- All other columns use the default CLS algorithm
422+
423+
**Example:**
424+
```sql
425+
-- Sync all memory
426+
SELECT memory_enable_sync();
427+
428+
-- Sync only specific contexts
429+
SELECT memory_enable_sync('conversation', 'project-docs');
430+
```
431+
432+
---
433+
434+
#### `memory_disable_sync()`
435+
436+
Removes synchronization infrastructure from `dbmem_content`, disabling all replication. The table data is preserved.
437+
438+
**Parameters:** None
439+
440+
**Returns:** INTEGER - 1 on success
441+
442+
**Notes:**
443+
- Requires sqlite-sync to be loaded; returns an error otherwise
444+
- Safe to call even if sync was never enabled
445+
446+
**Example:**
447+
```sql
448+
SELECT memory_disable_sync();
449+
```
450+
451+
---
452+
402453
### `memory_search`
403454

404455
A virtual table for performing hybrid semantic search.

Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,30 @@ e2e: $(BUILD_DEPS) $(TARGET) $(BUILD_DIR)/e2e $(VECTOR_LIB)
472472
@VECTOR_LIB=$(CURDIR)/$(VECTOR_LIB) $(BUILD_DIR)/e2e
473473
@echo "E2E tests passed!"
474474

475+
SYNC_LIB := $(TEST_DIR)/sync/cloudsync.$(EXT)
476+
477+
$(BUILD_DIR)/test_sync.o: $(TEST_DIR)/sync/test_sync.c | $(BUILD_DIR)
478+
@echo "Compiling test_sync.c..."
479+
@$(CC) $(CFLAGS) $(TEST_DEFINES) $(DEFINES) $(INCLUDES) -c $< -o $@
480+
481+
$(BUILD_DIR)/test_sync: $(BUILD_DIR)/test_sync.o $(TEST_C_OBJECTS) $(TEST_OBJC_OBJECTS) $(TEST_SQLITE_OBJ) $(LLAMA_LIBS) $(CURL_DEPS) | $(BUILD_DIR)
482+
@echo "Linking test_sync..."
483+
@$(LINKER) $(BUILD_DIR)/test_sync.o $(TEST_C_OBJECTS) $(TEST_OBJC_OBJECTS) $(TEST_SQLITE_OBJ) $(LLAMA_LIBS) \
484+
$(TEST_LDFLAGS) $(FRAMEWORKS) $(TEST_LINK_EXTRAS) \
485+
-o $@
486+
487+
.PHONY: sync-test
488+
sync-test: $(BUILD_DEPS) $(TARGET) $(BUILD_DIR)/test_sync $(VECTOR_LIB)
489+
@echo "Running sync integration test..."
490+
@APIKEY=$${APIKEY} \
491+
VECTOR_LIB=$(CURDIR)/$(VECTOR_LIB) \
492+
SYNC_LIB=$(CURDIR)/$(SYNC_LIB) \
493+
SYNC_DB_ID=$${SYNC_DB_ID:-db_hqjmctyiplop4qn34lt7y74nli} \
494+
SYNC_APIKEY_A=$${SYNC_APIKEY_A:-rsn7t70bV4KccIlqBZqU0QGfYaN11v9v6LBAOoGYGv8} \
495+
SYNC_APIKEY_B=$${SYNC_APIKEY_B:-GB02PvlrhGAENj0xKHF5KAwHW4BKhSQPvxApS5g8NVM} \
496+
$(BUILD_DIR)/test_sync
497+
@echo "Sync test passed!"
498+
475499
.PHONY: remote
476500
remote:
477501
@$(MAKE) OMIT_LOCAL_ENGINE=1 extension

README.md

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,55 @@ Every sync operation is wrapped in a SQLite SAVEPOINT transaction. If anything f
164164

165165
This makes all sync functions safe to call repeatedly - for example, on a cron schedule or at agent startup - with minimal overhead.
166166

167-
## AI Agents Offline Syncing
167+
## Agent Memory Sync
168168

169-
Thanks to sqlite-sync, agents can share knowledge. Each markdown file added to the database is intelligently parsed and subdivided into chunks, and a [block-based LWW CRDT algorithm](https://github.com/sqliteai/sqlite-sync?tab=readme-ov-file#block-level-lww) keeps everything in sync. All memory, or just a specific memory context, can be kept in sync between agents.
169+
Multiple agents can share and merge knowledge without any coordination. Each agent works independently with its own local SQLite database, syncing through a shared [SQLiteCloud](https://sqlitecloud.io/) managed database when connectivity is available.
170170

171-
Memory syncing will be exposed in version 0.9.0.
171+
Enable sync on a database connection before ingesting content:
172+
173+
```sql
174+
-- Load the sqlite-sync extension
175+
SELECT load_extension('./cloudsync');
176+
177+
-- Enable CRDT sync (optionally scoped to a specific context)
178+
SELECT memory_enable_sync(); -- sync all memory
179+
SELECT memory_enable_sync('project-x'); -- sync only the 'project-x' context
180+
181+
-- Connect to the shared cloud database
182+
SELECT cloudsync_network_init('your-managed-database-id');
183+
SELECT cloudsync_network_set_apikey('your-api-key');
184+
185+
-- Ingest content normally — CRDT tracks every write
186+
SELECT memory_add_text('Agent A findings...', 'research');
187+
188+
-- Push local changes and pull remote ones (call twice for full bidirectional exchange)
189+
SELECT cloudsync_network_sync(500, 3);
190+
SELECT cloudsync_network_sync(500, 3);
191+
192+
-- Generate embeddings for any content received from other agents
193+
SELECT memory_reindex();
194+
```
195+
196+
Each piece of text added to the database is parsed into chunks and tracked by a [block-level LWW CRDT algorithm](https://github.com/sqliteai/sqlite-sync?tab=readme-ov-file#block-level-lww), which merges line-level changes from concurrent agents without conflicts. Only the `dbmem_content` table is synced — embeddings are always generated locally after receiving new content.
197+
198+
### Why This Matters for AI Systems
199+
200+
The combination of local-first memory and CRDT sync enables agent architectures that are not possible with centralized databases:
201+
202+
- **No single point of failure** — each agent has a complete, queryable copy of shared memory
203+
- **Offline-capable** — agents ingest and search without network access; sync catches up when connectivity returns
204+
- **Selective sharing**`memory_enable_sync('context')` limits sync to a named context, so agents can keep private memory separate from shared memory
205+
- **Scales to many agents** — agents running on different nodes accumulate knowledge in parallel and merge into a single consistent corpus without coordination
206+
207+
### Working Example
208+
209+
[`test/sync/`](test/sync/) contains a full integration test that walks through the entire flow:
210+
211+
- Agent A indexes knowledge about the James Webb Space Telescope
212+
- Agent B indexes knowledge about the Great Barrier Reef
213+
- After sync, **both agents can answer questions about both topics** — knowledge each agent never directly indexed
214+
215+
See [`test/sync/README.md`](test/sync/README.md) for setup instructions, SQLiteCloud account configuration, and how to run the test.
172216

173217
## Use Cases
174218

src/dbmem-search.c

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ static int dbmem_semantic_search (sqlite3 *db, vMemorySearchCursor *c, float *em
382382
static const char *sql_with_context =
383383
"SELECT v.distance, e.hash, e.seq FROM dbmem_vault AS e "
384384
"JOIN vector_full_scan('dbmem_vault', 'embedding', ?1, ?2) AS v ON e.rowid = v.rowid "
385-
"WHERE INSTR(',' || ?3 || ',', ',' || e.context || ',') > 0";
385+
"JOIN dbmem_content AS c ON e.hash = c.hash "
386+
"WHERE INSTR(',' || ?3 || ',', ',' || c.context || ',') > 0";
386387
const char *sql = (context) ? sql_with_context : sql_no_context;
387388

388389
sqlite3_stmt *vm = NULL;
@@ -452,32 +453,44 @@ static int vMemorySearchDisconnect (sqlite3_vtab *pVtab) {
452453
return SQLITE_OK;
453454
}
454455

456+
// idxNum bitmask for vMemorySearchCursorFilter to decode argv positions:
457+
#define SEARCH_IDX_QUERY 0x01 // argv[0] = query text
458+
#define SEARCH_IDX_MAXITEMS 0x02 // next argv = max_results
459+
#define SEARCH_IDX_CONTEXT 0x04 // next argv = context name
460+
455461
static int vMemorySearchBestIndex (sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) {
456462
UNUSED_PARAM(tab);
457463
pIdxInfo->estimatedCost = (double)1;
458464
pIdxInfo->estimatedRows = 100;
459465
pIdxInfo->orderByConsumed = 1;
460-
pIdxInfo->idxNum = 1;
461-
466+
467+
// Assign consecutive argvIndex values (no gaps) and record which columns are
468+
// present in idxNum so xFilter can decode argv without ambiguity.
469+
int idxNum = 0;
470+
int argvIndex = 1;
462471
const struct sqlite3_index_constraint *pConstraint = pIdxInfo->aConstraint;
463-
for(int i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
464-
if( pConstraint->usable == 0 ) continue;
465-
if( pConstraint->op != SQLITE_INDEX_CONSTRAINT_EQ ) continue;
466-
switch( pConstraint->iColumn ){
472+
for (int i = 0; i < pIdxInfo->nConstraint; i++, pConstraint++) {
473+
if (pConstraint->usable == 0) continue;
474+
if (pConstraint->op != SQLITE_INDEX_CONSTRAINT_EQ) continue;
475+
switch (pConstraint->iColumn) {
467476
case SEARCH_COLUMN_QUERY:
468-
pIdxInfo->aConstraintUsage[i].argvIndex = 1;
477+
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
469478
pIdxInfo->aConstraintUsage[i].omit = 1;
479+
idxNum |= SEARCH_IDX_QUERY;
470480
break;
471481
case SEARCH_COLUMN_MAXITEMS:
472-
pIdxInfo->aConstraintUsage[i].argvIndex = 2;
482+
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
473483
pIdxInfo->aConstraintUsage[i].omit = 1;
484+
idxNum |= SEARCH_IDX_MAXITEMS;
474485
break;
475486
case SEARCH_COLUMN_CONTEXT:
476-
pIdxInfo->aConstraintUsage[i].argvIndex = 3;
487+
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
477488
pIdxInfo->aConstraintUsage[i].omit = 1;
489+
idxNum |= SEARCH_IDX_CONTEXT;
478490
break;
479491
}
480492
}
493+
pIdxInfo->idxNum = idxNum;
481494
return SQLITE_OK;
482495
}
483496

@@ -559,31 +572,35 @@ static int vMemorySearchCursorFilter (sqlite3_vtab_cursor *cur, int idxNum, cons
559572
dbmem_context *ctx = searchTab->ctx;
560573
sqlite3 *db = searchTab->db;
561574

562-
// check and retrieve arguments
575+
// Decode arguments using the idxNum bitmask set by xBestIndex.
576+
// argvIndex values are consecutive so argv positions are derived from the bitmask.
563577
int max_results = dbmem_context_max_results(ctx);
564578
bool perform_fts = dbmem_context_perform_fts(ctx);
565579
const char *query = NULL;
566580
const char *context = NULL;
567-
568-
if (argc <= 0) {
581+
582+
if (!(idxNum & SEARCH_IDX_QUERY) || argc <= 0) {
569583
sqlvTab->zErrMsg = sqlite3_mprintf("The memory_search function expects at least one query argument of type TEXT");
570584
return SQLITE_ERROR;
571585
}
572-
573-
if (sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
574-
sqlvTab->zErrMsg = sqlite3_mprintf("The first query argument of memory_search must be of type TEXT");
575-
return SQLITE_ERROR;
576-
}
577-
query = (const char *)sqlite3_value_text(argv[0]);
578-
579-
if (argc > 1) {
580-
// only the next two arguments are handled
581-
for (int i=1; i<argc && i<=2; ++i) {
582-
if (sqlite3_value_type(argv[i]) == SQLITE_INTEGER) max_results = sqlite3_value_int(argv[i]);
583-
else if (sqlite3_value_type(argv[i]) == SQLITE_TEXT) context = (const char *)sqlite3_value_text(argv[i]);
584-
// ignore any other type
586+
587+
int argPos = 0;
588+
if (idxNum & SEARCH_IDX_QUERY) {
589+
if (sqlite3_value_type(argv[argPos]) != SQLITE_TEXT) {
590+
sqlvTab->zErrMsg = sqlite3_mprintf("The first query argument of memory_search must be of type TEXT");
591+
return SQLITE_ERROR;
585592
}
593+
query = (const char *)sqlite3_value_text(argv[argPos++]);
594+
}
595+
if (idxNum & SEARCH_IDX_MAXITEMS) {
596+
if (sqlite3_value_type(argv[argPos]) == SQLITE_INTEGER) max_results = sqlite3_value_int(argv[argPos]);
597+
argPos++;
598+
}
599+
if (idxNum & SEARCH_IDX_CONTEXT) {
600+
if (sqlite3_value_type(argv[argPos]) == SQLITE_TEXT) context = (const char *)sqlite3_value_text(argv[argPos]);
601+
argPos++;
586602
}
603+
(void)argPos; (void)argc; (void)idxStr;
587604

588605
// compute fetch count (oversampling)
589606
int oversample = dbmem_context_search_oversample(ctx);

0 commit comments

Comments
 (0)