Skip to content

Commit 1a2ece6

Browse files
committed
Merge branch 'main' into add-platform-builds
2 parents 421a2f2 + db9707c commit 1a2ece6

File tree

10 files changed

+1073
-100
lines changed

10 files changed

+1073
-100
lines changed

API.md

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ sqlite-memory enables semantic search over text content stored in SQLite. It:
3232

3333
## Sync Behavior
3434

35-
All `memory_sync_*` functions use **content-hash change detection** to avoid redundant embedding computation. Each piece of content is hashed before processing — if the hash already exists in the database, the content is skipped.
35+
All `memory_add_*` functions use **content-hash change detection** to avoid redundant embedding computation. Each piece of content is hashed before processing — if the hash already exists in the database, the content is skipped.
3636

3737
### Change Detection
3838

@@ -114,6 +114,7 @@ Configures the embedding model to use.
114114
- Remote embedding requires a free API key from [vectors.space](https://vectors.space) (set via `memory_set_apikey`)
115115
- Settings are persisted in `dbmem_settings` table
116116
- For local models, the embedding engine is initialized immediately
117+
- **Automatic reindex**: If a model was previously configured and the new provider/model differs, all existing content is automatically re-embedded with the new model. File-based entries are re-read from disk; text-based entries are re-embedded from stored content. Errors on individual entries are silently skipped (best-effort)
117118

118119
**Example:**
119120
```sql
@@ -200,7 +201,7 @@ SELECT memory_get_option('provider');
200201

201202
### Memory Management Functions
202203

203-
#### `memory_sync_text(content TEXT [, context TEXT])`
204+
#### `memory_add_text(content TEXT [, context TEXT])`
204205

205206
Syncs text content to memory. Duplicate content (same hash) is skipped automatically.
206207

@@ -222,15 +223,15 @@ Syncs text content to memory. Duplicate content (same hash) is skipped automatic
222223
**Example:**
223224
```sql
224225
-- Add text without context
225-
SELECT memory_sync_text('SQLite is a C-language library that implements a small, fast, self-contained SQL database engine.');
226+
SELECT memory_add_text('SQLite is a C-language library that implements a small, fast, self-contained SQL database engine.');
226227

227228
-- Add text with context
228-
SELECT memory_sync_text('Important meeting notes from 2024-01-15...', 'meetings');
229+
SELECT memory_add_text('Important meeting notes from 2024-01-15...', 'meetings');
229230
```
230231

231232
---
232233

233-
#### `memory_sync_file(path TEXT [, context TEXT])`
234+
#### `memory_add_file(path TEXT [, context TEXT])`
234235

235236
Syncs a file to memory. Unchanged files are skipped; modified files are atomically replaced.
236237

@@ -250,13 +251,13 @@ Syncs a file to memory. Unchanged files are skipped; modified files are atomical
250251

251252
**Example:**
252253
```sql
253-
SELECT memory_sync_file('/docs/readme.md');
254-
SELECT memory_sync_file('/docs/api.md', 'documentation');
254+
SELECT memory_add_file('/docs/readme.md');
255+
SELECT memory_add_file('/docs/api.md', 'documentation');
255256
```
256257

257258
---
258259

259-
#### `memory_sync_directory(path TEXT [, context TEXT])`
260+
#### `memory_add_directory(path TEXT [, context TEXT])`
260261

261262
Synchronizes a directory with memory. Adds new files, reindexes modified files, and removes entries for deleted files.
262263

@@ -282,13 +283,13 @@ Synchronizes a directory with memory. Adds new files, reindexes modified files,
282283

283284
**Example:**
284285
```sql
285-
SELECT memory_sync_directory('/path/to/docs');
286+
SELECT memory_add_directory('/path/to/docs');
286287
-- Returns: 42 (number of new files processed)
287288

288-
SELECT memory_sync_directory('/project/notes', 'project-notes');
289+
SELECT memory_add_directory('/project/notes', 'project-notes');
289290

290291
-- Safe to call again — unchanged files are skipped
291-
SELECT memory_sync_directory('/path/to/docs');
292+
SELECT memory_add_directory('/path/to/docs');
292293
-- Returns: 0 (nothing changed)
293294
```
294295

@@ -476,7 +477,7 @@ The extension tracks two timestamps for each memory:
476477

477478
### `created_at`
478479

479-
- Set automatically when content is added via `memory_sync_text`, `memory_sync_file`, or `memory_sync_directory`
480+
- Set automatically when content is added via `memory_add_text`, `memory_add_file`, or `memory_add_directory`
480481
- Stored as Unix timestamp (seconds since 1970-01-01 00:00:00 UTC)
481482
- Never updated after initial creation
482483

@@ -517,8 +518,8 @@ SELECT memory_set_option('max_tokens', 512);
517518
SELECT memory_set_option('min_score', 0.75);
518519

519520
-- Add content
520-
SELECT memory_sync_text('SQLite is a C library that provides a lightweight disk-based database.', 'sqlite-docs');
521-
SELECT memory_sync_directory('/docs/sqlite', 'sqlite-docs');
521+
SELECT memory_add_text('SQLite is a C library that provides a lightweight disk-based database.', 'sqlite-docs');
522+
SELECT memory_add_directory('/docs/sqlite', 'sqlite-docs');
522523

523524
-- Search
524525
SELECT path, snippet, ranking
@@ -546,9 +547,9 @@ SELECT memory_clear();
546547

547548
```sql
548549
-- Add memories with different contexts
549-
SELECT memory_sync_text('Meeting notes...', 'meetings');
550-
SELECT memory_sync_text('API documentation...', 'api-docs');
551-
SELECT memory_sync_text('Tutorial content...', 'tutorials');
550+
SELECT memory_add_text('Meeting notes...', 'meetings');
551+
SELECT memory_add_text('API documentation...', 'api-docs');
552+
SELECT memory_add_text('Tutorial content...', 'tutorials');
552553

553554
-- Search within a context
554555
SELECT * FROM memory_search
@@ -618,6 +619,6 @@ Errors can be caught using standard SQLite error handling mechanisms.
618619

619620
```sql
620621
-- Example error handling in application code
621-
SELECT memory_sync_text(123); -- Error: expects TEXT parameter
622+
SELECT memory_add_text(123); -- Error: expects TEXT parameter
622623
SELECT memory_delete('abc'); -- Error: expects INTEGER parameter
623624
```

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,16 @@ SELECT memory_set_model('local', '/path/to/nomic-embed-text-v1.5.Q8_0.gguf');
8383
-- SELECT memory_set_apikey('your-vectorspace-api-key');
8484

8585
-- Add some knowledge
86-
SELECT memory_sync_text('SQLite is a C-language library that implements a small, fast,
86+
SELECT memory_add_text('SQLite is a C-language library that implements a small, fast,
8787
self-contained, high-reliability, full-featured, SQL database engine. SQLite is the
8888
most used database engine in the world.', 'sqlite-docs');
8989

90-
SELECT memory_sync_text('Vector databases store data as high-dimensional vectors,
90+
SELECT memory_add_text('Vector databases store data as high-dimensional vectors,
9191
enabling similarity search. They are essential for semantic search, recommendation
9292
systems, and AI applications.', 'concepts');
9393

9494
-- Sync an entire documentation directory
95-
SELECT memory_sync_directory('/path/to/docs', 'project-docs');
95+
SELECT memory_add_directory('/path/to/docs', 'project-docs');
9696

9797
-- Search your memory semantically
9898
SELECT path, snippet, ranking
@@ -124,7 +124,7 @@ conn.execute("SELECT memory_set_model('local', './models/nomic-embed-text-v1.5.Q
124124

125125
# Store conversation context
126126
def remember(content, context="conversation"):
127-
conn.execute("SELECT memory_sync_text(?, ?)", (content, context))
127+
conn.execute("SELECT memory_add_text(?, ?)", (content, context))
128128
conn.commit()
129129

130130
# Retrieve relevant memories
@@ -147,11 +147,11 @@ memories = recall("what's the project timeline")
147147

148148
## Intelligent Sync
149149

150-
All `memory_sync_*` functions use content-hash change detection to avoid redundant work:
150+
All `memory_add_*` functions use content-hash change detection to avoid redundant work:
151151

152-
- **`memory_sync_text`** — Computes a hash of the content. If the same content was already indexed, it is skipped entirely. No duplicate embeddings are ever created.
153-
- **`memory_sync_file`** — Reads the file and hashes its content. If the file was previously indexed with different content, the old entry (chunks, embeddings, FTS) is atomically replaced. Unchanged files are skipped.
154-
- **`memory_sync_directory`** — Performs a full two-phase sync:
152+
- **`memory_add_text`** — Computes a hash of the content. If the same content was already indexed, it is skipped entirely. No duplicate embeddings are ever created.
153+
- **`memory_add_file`** — Reads the file and hashes its content. If the file was previously indexed with different content, the old entry (chunks, embeddings, FTS) is atomically replaced. Unchanged files are skipped.
154+
- **`memory_add_directory`** — Performs a full two-phase sync:
155155
1. **Cleanup**: Removes database entries for files that no longer exist on disk
156156
2. **Scan**: Recursively processes all matching files — adding new ones, replacing modified ones, and skipping unchanged ones
157157

@@ -240,7 +240,7 @@ make test
240240

241241
- **Local Engine**: Built-in llama.cpp for on-device embeddings (requires GGUF model)
242242
- **Remote Engine**: [vectors.space](https://vectors.space) API for cloud embeddings (requires free API key)
243-
- **File I/O**: `memory_sync_file` and `memory_sync_directory` functions
243+
- **File I/O**: `memory_add_file` and `memory_add_directory` functions
244244

245245
You can also combine options manually:
246246

src/dbmem-embed.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,13 @@ typedef struct {
2222
float *embedding; // Pointer to embedding (points to engine's buffer, do not free)
2323
} embedding_result_t;
2424

25-
dbmem_local_engine_t *dbmem_local_engine_init (const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]);
25+
dbmem_local_engine_t *dbmem_local_engine_init (void *ctx, const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]);
2626
int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *text, int text_len, embedding_result_t *result);
2727
bool dbmem_local_engine_warmup (dbmem_local_engine_t *engine);
2828
void dbmem_local_engine_free (dbmem_local_engine_t *engine);
29-
const char *dbmem_local_errmsg (dbmem_local_engine_t *engine);
3029

31-
dbmem_remote_engine_t *dbmem_remote_engine_init (const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]);
30+
dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]);
3231
int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *text, int text_len, embedding_result_t *result);
3332
void dbmem_remote_engine_free (dbmem_remote_engine_t *engine);
34-
const char *dbmem_remote_errmsg (dbmem_remote_engine_t *engine);
3533

3634
#endif

src/dbmem-lembed.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@
66
//
77

88
#include "dbmem-embed.h"
9+
#include "sqlite-memory.h"
910
#include "llama.h"
1011
#include "ggml.h"
1112

1213
#include <math.h>
1314
#include <string.h>
1415

1516
struct dbmem_local_engine_t {
17+
dbmem_context *context;
18+
1619
// Model and context
1720
struct llama_model *model; // Loaded GGUF model weights and architecture
1821
struct llama_context *ctx; // Inference context with KV cache and compute buffers
1922
const struct llama_vocab *vocab; // Tokenizer vocabulary for text-to-token conversion
2023
enum llama_pooling_type pooling; // Pooling strategy (NONE, MEAN, CLS, LAST, RANK)
2124
llama_memory_t mem; // KV cache memory handle for clearing between batches
22-
char err_msg[DBMEM_ERRBUF_SIZE]; // Error message
2325

2426
// Model info
2527
int n_embd; // Embedding dimension (e.g., 768 for nomic-embed)
@@ -98,7 +100,7 @@ void dbmem_logger (enum ggml_log_level level, const char *text, void *user_data)
98100

99101
// MARK: -
100102

101-
dbmem_local_engine_t *dbmem_local_engine_init (const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]) {
103+
dbmem_local_engine_t *dbmem_local_engine_init (void *ctx, const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]) {
102104
dbmem_local_engine_t *engine = (dbmem_local_engine_t *)dbmem_zeroalloc(sizeof(dbmem_local_engine_t));
103105
if (!engine) return NULL;
104106

@@ -203,15 +205,14 @@ bool dbmem_local_engine_warmup (dbmem_local_engine_t *engine) {
203205
}
204206

205207
int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *text, int text_len, embedding_result_t *result) {
206-
engine->err_msg[0] = 0;
207208
memset(result, 0, sizeof(embedding_result_t));
208209
if (text_len == -1) text_len = (int)strlen(text);
209210
if (text_len == 0) return 0;
210211

211212
// Tokenize
212213
int n_tokens = llama_tokenize(engine->vocab, text, text_len, engine->tokens, engine->tokens_capacity, true, true);
213214
if (n_tokens < 0) {
214-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "Tokenization failed (text too long?)");
215+
dbmem_context_set_error(engine->context, "Tokenization failed (text too long?)");
215216
return -1;
216217
}
217218

@@ -241,7 +242,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
241242
// Encode
242243
int ret = llama_encode(engine->ctx, batch);
243244
if (ret != 0) {
244-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "llama_encode failed");
245+
dbmem_context_set_error(engine->context, "Llama_encode failed");
245246
return -1;
246247
}
247248

@@ -254,7 +255,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
254255
}
255256

256257
if (!emb_ptr) {
257-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "Failed to get embeddings");
258+
dbmem_context_set_error(engine->context, "Failed to get embeddings");
258259
return -1;
259260
}
260261

@@ -302,6 +303,3 @@ void dbmem_local_engine_free (dbmem_local_engine_t *engine) {
302303
llama_backend_free();
303304
}
304305

305-
const char *dbmem_local_errmsg (dbmem_local_engine_t *engine) {
306-
return engine->err_msg;
307-
}

0 commit comments

Comments
 (0)