Skip to content

Commit db9707c

Browse files
committed
Implemented remote embedding
1 parent 89bddea commit db9707c

File tree

7 files changed

+952
-45
lines changed

7 files changed

+952
-45
lines changed

src/dbmem-embed.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,13 @@ typedef struct {
2222
float *embedding; // Pointer to embedding (points to engine's buffer, do not free)
2323
} embedding_result_t;
2424

25-
dbmem_local_engine_t *dbmem_local_engine_init (const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]);
25+
dbmem_local_engine_t *dbmem_local_engine_init (void *ctx, const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]);
2626
int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *text, int text_len, embedding_result_t *result);
2727
bool dbmem_local_engine_warmup (dbmem_local_engine_t *engine);
2828
void dbmem_local_engine_free (dbmem_local_engine_t *engine);
29-
const char *dbmem_local_errmsg (dbmem_local_engine_t *engine);
3029

31-
dbmem_remote_engine_t *dbmem_remote_engine_init (const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]);
30+
dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]);
3231
int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *text, int text_len, embedding_result_t *result);
3332
void dbmem_remote_engine_free (dbmem_remote_engine_t *engine);
34-
const char *dbmem_remote_errmsg (dbmem_remote_engine_t *engine);
3533

3634
#endif

src/dbmem-lembed.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@
66
//
77

88
#include "dbmem-embed.h"
9+
#include "sqlite-memory.h"
910
#include "llama.h"
1011
#include "ggml.h"
1112

1213
#include <math.h>
1314
#include <string.h>
1415

1516
struct dbmem_local_engine_t {
17+
dbmem_context *context;
18+
1619
// Model and context
1720
struct llama_model *model; // Loaded GGUF model weights and architecture
1821
struct llama_context *ctx; // Inference context with KV cache and compute buffers
1922
const struct llama_vocab *vocab; // Tokenizer vocabulary for text-to-token conversion
2023
enum llama_pooling_type pooling; // Pooling strategy (NONE, MEAN, CLS, LAST, RANK)
2124
llama_memory_t mem; // KV cache memory handle for clearing between batches
22-
char err_msg[DBMEM_ERRBUF_SIZE]; // Error message
2325

2426
// Model info
2527
int n_embd; // Embedding dimension (e.g., 768 for nomic-embed)
@@ -98,7 +100,7 @@ void dbmem_logger (enum ggml_log_level level, const char *text, void *user_data)
98100

99101
// MARK: -
100102

101-
dbmem_local_engine_t *dbmem_local_engine_init (const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]) {
103+
dbmem_local_engine_t *dbmem_local_engine_init (void *ctx, const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]) {
102104
dbmem_local_engine_t *engine = (dbmem_local_engine_t *)dbmem_zeroalloc(sizeof(dbmem_local_engine_t));
103105
if (!engine) return NULL;
104106

@@ -203,15 +205,14 @@ bool dbmem_local_engine_warmup (dbmem_local_engine_t *engine) {
203205
}
204206

205207
int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *text, int text_len, embedding_result_t *result) {
206-
engine->err_msg[0] = 0;
207208
memset(result, 0, sizeof(embedding_result_t));
208209
if (text_len == -1) text_len = (int)strlen(text);
209210
if (text_len == 0) return 0;
210211

211212
// Tokenize
212213
int n_tokens = llama_tokenize(engine->vocab, text, text_len, engine->tokens, engine->tokens_capacity, true, true);
213214
if (n_tokens < 0) {
214-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "Tokenization failed (text too long?)");
215+
dbmem_context_set_error(engine->context, "Tokenization failed (text too long?)");
215216
return -1;
216217
}
217218

@@ -241,7 +242,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
241242
// Encode
242243
int ret = llama_encode(engine->ctx, batch);
243244
if (ret != 0) {
244-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "llama_encode failed");
245+
dbmem_context_set_error(engine->context, "Llama_encode failed");
245246
return -1;
246247
}
247248

@@ -254,7 +255,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
254255
}
255256

256257
if (!emb_ptr) {
257-
snprintf(engine->err_msg, DBMEM_ERRBUF_SIZE, "Failed to get embeddings");
258+
dbmem_context_set_error(engine->context, "Failed to get embeddings");
258259
return -1;
259260
}
260261

@@ -302,6 +303,3 @@ void dbmem_local_engine_free (dbmem_local_engine_t *engine) {
302303
llama_backend_free();
303304
}
304305

305-
const char *dbmem_local_errmsg (dbmem_local_engine_t *engine) {
306-
return engine->err_msg;
307-
}

0 commit comments

Comments
 (0)