Skip to content

Commit 1139641

Browse files
committed
Minor issues fixed, clarified README and raised version to 0.8.5
1 parent ff42d59 commit 1139641

File tree

8 files changed

+252
-104
lines changed

8 files changed

+252
-104
lines changed

API.md

Lines changed: 134 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ A SQLite extension that provides semantic memory capabilities with hybrid search
1313
- [Memory Management Functions](#memory-management-functions)
1414
- [Deletion Functions](#deletion-functions)
1515
- [Virtual Table Module](#virtual-table-module)
16+
- [C API](#c-api)
1617
- [Configuration Options](#configuration-options)
1718
- [Timestamps](#timestamps)
1819
- [Examples](#examples)
@@ -407,39 +408,164 @@ A virtual table for performing hybrid semantic search.
407408
SELECT * FROM memory_search WHERE query = 'search text';
408409
```
409410

410-
**Columns:**
411+
**Hidden filter columns (used in WHERE):**
412+
| Column | Type | Required | Description |
413+
|--------|------|----------|-------------|
414+
| `query` | TEXT | Yes | The search query |
415+
| `max_entries` | INTEGER | No | Override `max_results` setting for this query only |
416+
| `context` | TEXT | No | Restrict results to a specific context label |
417+
418+
**Output columns:**
411419
| Column | Type | Description |
412420
|--------|------|-------------|
413-
| `query` | TEXT (HIDDEN) | Search query (required in WHERE clause) |
414421
| `hash` | INTEGER | Content hash identifier |
422+
| `seq` | INTEGER | Chunk sequence number within the document (0-based) |
423+
| `ranking` | REAL | Combined similarity score (0.0 - 1.0) |
415424
| `path` | TEXT | Source file path or generated UUID for text content |
416-
| `context` | TEXT | Context label (NULL if not set) |
417425
| `snippet` | TEXT | Text snippet from the matching chunk |
418-
| `ranking` | REAL | Combined similarity score (0.0 - 1.0) |
419426

420427
**Notes:**
421428
- Requires sqlite-vector extension loaded first
422429
- Performs hybrid search combining vector similarity and FTS5
423430
- Results are ranked by combined score
424-
- Limited by `max_results` setting (default: 20)
431+
- Limited by `max_results` setting (default: 20), overridable per-query with `max_entries`
425432
- Filtered by `min_score` setting (default: 0.7)
426433
- Updates `last_accessed` timestamp if `update_access` is enabled
427434

428435
**Example:**
429436
```sql
430437
-- Basic search
431-
SELECT * FROM memory_search WHERE query = 'database indexing strategies';
438+
SELECT path, snippet, ranking FROM memory_search WHERE query = 'database indexing strategies';
432439

433440
-- Search with ranking filter
434441
SELECT path, snippet, ranking
435442
FROM memory_search
436443
WHERE query = 'how to optimize queries'
437444
AND ranking > 0.8;
438445

439-
-- Search within a specific context
440-
SELECT * FROM memory_search
446+
-- Restrict to a specific context
447+
SELECT path, snippet, ranking
448+
FROM memory_search
441449
WHERE query = 'meeting action items'
442450
AND context = 'meetings';
451+
452+
-- Override result limit for this query only
453+
SELECT path, snippet, ranking
454+
FROM memory_search
455+
WHERE query = 'architecture overview'
456+
AND max_entries = 5;
457+
458+
-- Get the chunk sequence number (useful for reconstructing document order)
459+
SELECT path, seq, snippet, ranking
460+
FROM memory_search
461+
WHERE query = 'installation steps';
462+
```
463+
464+
---
465+
466+
## C API
467+
468+
In addition to the SQL interface, sqlite-memory exposes a C API for embedding custom providers directly from application code.
469+
470+
### `sqlite3_memory_register_provider`
471+
472+
```c
473+
int sqlite3_memory_register_provider(
474+
sqlite3 *db,
475+
const char *provider_name,
476+
const dbmem_provider_t *provider
477+
);
478+
```
479+
480+
Registers a custom embedding engine for a specific database connection. Once registered, calling `memory_set_model(provider_name, model)` from SQL will use your engine instead of the built-in local or remote engines.
481+
482+
**Parameters:**
483+
| Parameter | Type | Description |
484+
|-----------|------|-------------|
485+
| `db` | `sqlite3 *` | The database connection to register the provider on |
486+
| `provider_name` | `const char *` | Name used to activate the provider via `memory_set_model()` |
487+
| `provider` | `const dbmem_provider_t *` | Pointer to a struct containing the engine callbacks |
488+
489+
**Returns:** `SQLITE_OK` on success, or a SQLite error code.
490+
491+
**`dbmem_provider_t` struct:**
492+
```c
493+
typedef struct {
494+
// Called when memory_set_model(provider_name, model) is executed.
495+
// api_key is the value set via memory_set_apikey() (may be NULL).
496+
// xdata is the user pointer from this struct.
497+
// Return an opaque engine pointer on success, or NULL on error (fill err_msg).
498+
void *(*init)(const char *model, const char *api_key, void *xdata, char err_msg[1024]);
499+
500+
// Compute the embedding for the given text.
501+
// Return 0 on success, non-zero on error.
502+
int (*compute)(void *engine, const char *text, int text_len, void *xdata, dbmem_embedding_result_t *result);
503+
504+
// Free the engine. Called on context teardown or when the model changes.
505+
// May be NULL if no cleanup is needed.
506+
void (*free)(void *engine, void *xdata);
507+
508+
// Optional user-supplied pointer passed to all three callbacks.
509+
void *xdata;
510+
} dbmem_provider_t;
511+
```
512+
513+
**`dbmem_embedding_result_t` struct:**
514+
```c
515+
typedef struct {
516+
int n_tokens; // Number of tokens processed
517+
int n_tokens_truncated; // Tokens that were truncated (0 if none)
518+
int n_embd; // Embedding dimension
519+
float *embedding; // Embedding vector (engine-owned, valid until next call or free)
520+
} dbmem_embedding_result_t;
521+
```
522+
523+
**Notes:**
524+
- Works regardless of `DBMEM_OMIT_LOCAL_ENGINE` / `DBMEM_OMIT_REMOTE_ENGINE` compile flags
525+
- The `embedding` buffer in `dbmem_embedding_result_t` must remain valid until the next `compute` call or `free` — it is engine-owned, not copied by the caller
526+
- Only one custom provider can be registered per connection at a time; registering again replaces the previous one
527+
- The provider struct is copied by value; the caller does not need to keep it alive after registration
528+
529+
**Example:**
530+
```c
531+
#include "sqlite-memory.h"
532+
533+
typedef struct { int dimension; } MyEngine;
534+
535+
static void *my_init(const char *model, const char *api_key, void *xdata, char err_msg[1024]) {
536+
MyEngine *e = malloc(sizeof(MyEngine));
537+
e->dimension = 384;
538+
return e;
539+
}
540+
541+
static int my_compute(void *engine, const char *text, int text_len, void *xdata,
542+
dbmem_embedding_result_t *result) {
543+
MyEngine *e = (MyEngine *)engine;
544+
static float vec[384];
545+
// ... fill vec with your embedding ...
546+
result->n_embd = e->dimension;
547+
result->n_tokens = text_len / 4;
548+
result->n_tokens_truncated = 0;
549+
result->embedding = vec;
550+
return 0;
551+
}
552+
553+
static void my_free(void *engine, void *xdata) {
554+
free(engine);
555+
}
556+
557+
// Register before using the database
558+
dbmem_provider_t provider = {
559+
.init = my_init,
560+
.compute = my_compute,
561+
.free = my_free,
562+
.xdata = NULL,
563+
};
564+
sqlite3_memory_register_provider(db, "my-engine", &provider);
565+
566+
// Then from SQL:
567+
// SELECT memory_set_model('my-engine', 'my-model-name');
568+
// SELECT memory_add_text('some text to embed');
443569
```
444570
445571
---

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ OUTPUT_NAME := memory
6060
ifeq ($(PLATFORM),macos)
6161
EXT := dylib
6262
FRAMEWORKS := -framework Security
63-
LDFLAGS := -dynamiclib $(FRAMEWORKS)
63+
LDFLAGS := -dynamiclib -undefined dynamic_lookup $(FRAMEWORKS)
6464
INCLUDES += -I/opt/homebrew/include -I/usr/local/include
6565
TEST_LDFLAGS := -L/opt/homebrew/lib -L/usr/local/lib -lsqlite3
6666
STRIP_CMD = strip -x -S $(TARGET)
@@ -196,7 +196,7 @@ ifeq ($(OMIT_LOCAL_ENGINE),0)
196196
else
197197
LLAMA_OPTIONS += '-DCMAKE_OSX_ARCHITECTURES=x86_64;arm64'
198198
endif
199-
LDFLAGS := -dynamiclib -framework Metal -framework Foundation -framework Accelerate -framework Security
199+
LDFLAGS := -dynamiclib -undefined dynamic_lookup -framework Metal -framework Foundation -framework Accelerate -framework Security
200200
ifeq ($(ARCH),x86_64)
201201
LDFLAGS += -arch x86_64
202202
else ifeq ($(ARCH),arm64)

README.md

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# SQLite Memory
22

3-
A SQLite extension that gives AI agents persistent, searchable memory. Features hybrid semantic search (vector similarity + FTS5), markdown-aware chunking, and local embedding via llama.cpp. Memory databases can be synced between agents using **offline first technology** each agent works independently and syncs when connected, making it ideal for distributed AI systems, edge deployments, and collaborative agent architectures.
3+
A SQLite extension that gives AI agents persistent, searchable memory, optimized for markdown content. Features hybrid semantic search (vector similarity + FTS5), markdown-aware chunking, and local embedding via llama.cpp.
4+
5+
Agent memory databases can be synchronized between agents using **offline-first technology** via [sqlite-sync](https://github.com/sqliteai/sqlite-sync). Each agent works independently and syncs when connected, making it ideal for distributed AI systems, edge deployments, and collaborative agent architectures.
46

57
## The Future of AI Agent Memory
68

@@ -33,10 +35,10 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
3335

3436
- **Hybrid Search**: Combines vector similarity (cosine distance) with FTS5 full-text search for superior retrieval
3537
- **Smart Chunking**: Markdown-aware parsing preserves semantic boundaries
36-
- **Intelligent Sync**: Content-hash change detection, unchanged files are skipped, modified files are atomically replaced, deleted files are cleaned up
37-
- **Transactional Safety**: Every sync operation runs inside a SAVEPOINT transaction, either fully succeeds or fully rolls back, no partially-indexed content
38+
- **Intelligent Sync**: Content-hash change detection skips unchanged files, atomically replaces modified ones, and cleans up deleted ones
39+
- **Transactional Safety**: Every sync operation runs inside a SAVEPOINT transaction - either fully succeeds or fully rolls back, no partially-indexed content
3840
- **Efficient Storage**: Binary embeddings with configurable dimensions
39-
- **Embedding Cache**: Automatically caches computed embeddings so re-indexing the same text skips redundant API calls and computation
41+
- **Embedding Cache**: Automatically caches computed embeddings, so re-indexing the same text skips redundant API calls and computation
4042
- **Flexible Embedding**: Use local models (llama.cpp) or [vectors.space](https://vectors.space) remote API
4143

4244
## Architecture
@@ -63,14 +65,16 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
6365

6466
- SQLite
6567
- [sqlite-vector](https://github.com/sqliteai/sqlite-vector) extension
68+
- [sqlite-sync](https://github.com/sqliteai/sqlite-sync) extension (optional, only needed for agent sync)
6669
- **For local embeddings**: A GGUF embedding model (e.g., [nomic-embed-text](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF))
6770
- **For remote embeddings**: A free API key from [vectors.space](https://vectors.space)
6871

6972
### Quick Start
7073

7174
```sql
72-
-- Load extensions
75+
-- Load extensions (sync is optional)
7376
.load ./vector
77+
.load ./sync
7478
.load ./memory
7579

7680
-- Configure embedding model (choose one):
@@ -79,6 +83,7 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
7983
SELECT memory_set_model('local', '/path/to/nomic-embed-text-v1.5.Q8_0.gguf');
8084

8185
-- Option 2: Remote embedding via vectors.space (requires free API key from https://vectors.space)
86+
-- The provider name 'openai' selects the vectors.space OpenAI-compatible endpoint.
8287
-- SELECT memory_set_model('openai', 'text-embedding-3-small');
8388
-- SELECT memory_set_apikey('your-vectorspace-api-key');
8489

@@ -91,7 +96,7 @@ SELECT memory_add_text('Vector databases store data as high-dimensional vectors,
9196
enabling similarity search. They are essential for semantic search, recommendation
9297
systems, and AI applications.', 'concepts');
9398

94-
-- Sync an entire documentation directory
99+
-- Add an entire documentation directory
95100
SELECT memory_add_directory('/path/to/docs', 'project-docs');
96101

97102
-- Search your memory semantically
@@ -149,15 +154,21 @@ memories = recall("what's the project timeline")
149154

150155
All `memory_add_*` functions use content-hash change detection to avoid redundant work:
151156

152-
- **`memory_add_text`** Computes a hash of the content. If the same content was already indexed, it is skipped entirely. No duplicate embeddings are ever created.
153-
- **`memory_add_file`** Reads the file and hashes its content. If the file was previously indexed with different content, the old entry (chunks, embeddings, FTS) is atomically replaced. Unchanged files are skipped.
154-
- **`memory_add_directory`** Performs a full two-phase sync:
157+
- **`memory_add_text`**: Computes a hash of the content. If the same content was already indexed, it is skipped entirely. No duplicate embeddings are ever created.
158+
- **`memory_add_file`**: Reads the file and hashes its content. If the file was previously indexed with different content, the old entry (chunks, embeddings, FTS) is atomically replaced. Unchanged files are skipped.
159+
- **`memory_add_directory`**: Performs a full two-phase sync:
155160
1. **Cleanup**: Removes database entries for files that no longer exist on disk
156-
2. **Scan**: Recursively processes all matching files adding new ones, replacing modified ones, and skipping unchanged ones
161+
2. **Scan**: Recursively processes all matching files - adding new ones, replacing modified ones, and skipping unchanged ones
157162

158163
Every sync operation is wrapped in a SQLite SAVEPOINT transaction. If anything fails mid-sync (embedding error, disk issue, etc.), the entire operation rolls back cleanly. There is no risk of partially-indexed files or orphaned entries.
159164

160-
This makes all sync functions safe to call repeatedly — for example, on a cron schedule or at agent startup — with minimal overhead.
165+
This makes all sync functions safe to call repeatedly - for example, on a cron schedule or at agent startup - with minimal overhead.
166+
167+
## AI Agents Offline Syncing
168+
169+
Thanks to sqlite-sync, agents can share knowledge. Each markdown file added to the database is intelligently parsed and subdivided into chunks, and a [block-based LWW CRDT algorithm](https://github.com/sqliteai/sqlite-sync?tab=readme-ov-file#block-level-lww) keeps everything in sync. All memory, or just a specific memory context, can be kept in sync between agents.
170+
171+
Memory syncing will be exposed in version 0.9.0.
161172

162173
## Use Cases
163174

@@ -197,8 +208,7 @@ SELECT memory_cache_clear(); -- Clear cached embedding
197208

198209
```sql
199210
-- View all memories
200-
SELECT hash, path, context,
201-
datetime(created_at, 'unixepoch', 'localtime') as created
211+
SELECT hash, path, context, datetime(created_at, 'unixepoch', 'localtime') as created
202212
FROM dbmem_content;
203213

204214
-- Delete by context
@@ -219,7 +229,7 @@ For complete API documentation, including all functions and configuration option
219229

220230
```bash
221231
# Clone with submodules
222-
git clone --recursive https://github.com/user/sqlite-memory.git
232+
git clone --recursive https://github.com/sqliteai/sqlite-memory.git
223233
cd sqlite-memory
224234

225235
# Build (full build with local + remote engines)
@@ -259,17 +269,17 @@ MIT License - see [LICENSE](LICENSE) for details.
259269

260270
## Part of the SQLite AI Ecosystem
261271

262-
This project is part of the **SQLite AI** ecosystem, a collection of extensions that bring modern AI capabilities to the worlds most widely deployed database. The goal is to make SQLite the default data and inference engine for Edge AI applications.
272+
This project is part of the **SQLite AI** ecosystem, a collection of extensions that bring modern AI capabilities to the world's most widely deployed database. The goal is to make SQLite the default data and inference engine for Edge AI applications.
263273

264274
Other projects in the ecosystem include:
265275

266-
- **[SQLite-AI](https://github.com/sqliteai/sqlite-ai)** On-device inference and embedding generation directly inside SQLite.
267-
- **[SQLite-Memory](https://github.com/sqliteai/sqlite-memory)** Markdown-based AI agent memory with semantic search.
268-
- **[SQLite-Vector](https://github.com/sqliteai/sqlite-vector)** Ultra-efficient vector search for embeddings stored as BLOBs in standard SQLite tables.
269-
- **[SQLite-Sync](https://github.com/sqliteai/sqlite-sync)** Local-first CRDT-based synchronization for seamless, conflict-free data sync and real-time collaboration across devices.
270-
- **[SQLite-Agent](https://github.com/sqliteai/sqlite-agent)** Run autonomous AI agents directly from within SQLite databases.
271-
- **[SQLite-MCP](https://github.com/sqliteai/sqlite-mcp)** Connect SQLite databases to MCP servers and invoke their tools.
272-
- **[SQLite-JS](https://github.com/sqliteai/sqlite-js)** Create custom SQLite functions using JavaScript.
273-
- **[Liteparser](https://github.com/sqliteai/liteparser)** A highly efficient and fully compliant SQLite SQL parser.
276+
- **[SQLite-AI](https://github.com/sqliteai/sqlite-ai)** - On-device inference and embedding generation directly inside SQLite.
277+
- **[SQLite-Memory](https://github.com/sqliteai/sqlite-memory)** - Markdown-based AI agent memory with semantic search.
278+
- **[SQLite-Vector](https://github.com/sqliteai/sqlite-vector)** - Ultra-efficient vector search for embeddings stored as BLOBs in standard SQLite tables.
279+
- **[SQLite-Sync](https://github.com/sqliteai/sqlite-sync)** - Local-first CRDT-based synchronization for seamless, conflict-free data sync and real-time collaboration across devices.
280+
- **[SQLite-Agent](https://github.com/sqliteai/sqlite-agent)** - Run autonomous AI agents directly from within SQLite databases.
281+
- **[SQLite-MCP](https://github.com/sqliteai/sqlite-mcp)** - Connect SQLite databases to MCP servers and invoke their tools.
282+
- **[SQLite-JS](https://github.com/sqliteai/sqlite-js)** - Create custom SQLite functions using JavaScript.
283+
- **[Liteparser](https://github.com/sqliteai/liteparser)** - A highly efficient and fully compliant SQLite SQL parser.
274284

275285
Learn more at **[SQLite AI](https://sqlite.ai)**.

src/dbmem-http.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ extern "C" {
1616

1717
// Synchronous HTTP POST using NSURLSession.
1818
// Returns 0 on success, -1 on error.
19-
// On success: *out_data is malloc'd response body (caller frees), *out_size is its length, *out_http_code is the status.
19+
// On success: *out_data is sqlite3_malloc64'd response body (caller must sqlite3_free), *out_size is its length, *out_http_code is the status.
2020
// On error: err_msg is filled with a description.
2121
int dbmem_http_post(const char *url, const char *api_key, const char *body,
2222
void **out_data, size_t *out_size, long *out_http_code,

src/dbmem-http.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
#import <Foundation/Foundation.h>
99
#include "dbmem-http.h"
10+
#include "sqlite-memory.h"
1011
#include <string.h>
11-
#include <stdlib.h>
1212

1313
int dbmem_http_post(const char *url, const char *api_key, const char *body,
1414
void **out_data, size_t *out_size, long *out_http_code,
@@ -53,7 +53,7 @@ int dbmem_http_post(const char *url, const char *api_key, const char *body,
5353

5454
*out_http_code = httpResponse.statusCode;
5555
*out_size = responseData.length;
56-
*out_data = malloc(responseData.length + 1);
56+
*out_data = sqlite3_malloc64(responseData.length + 1);
5757
if (!*out_data) {
5858
snprintf(err_msg, err_msg_size, "Failed to allocate response buffer");
5959
return -1;

0 commit comments

Comments
 (0)