66//
77
88#include "dbmem-embed.h"
9+ #include "sqlite-memory.h"
910#include "llama.h"
1011#include "ggml.h"
1112
1213#include <math.h>
1314#include <string.h>
1415
1516struct dbmem_local_engine_t {
17+ dbmem_context * context ;
18+
1619 // Model and context
1720 struct llama_model * model ; // Loaded GGUF model weights and architecture
1821 struct llama_context * ctx ; // Inference context with KV cache and compute buffers
1922 const struct llama_vocab * vocab ; // Tokenizer vocabulary for text-to-token conversion
2023 enum llama_pooling_type pooling ; // Pooling strategy (NONE, MEAN, CLS, LAST, RANK)
2124 llama_memory_t mem ; // KV cache memory handle for clearing between batches
22- char err_msg [DBMEM_ERRBUF_SIZE ]; // Error message
2325
2426 // Model info
2527 int n_embd ; // Embedding dimension (e.g., 768 for nomic-embed)
@@ -98,7 +100,7 @@ void dbmem_logger (enum ggml_log_level level, const char *text, void *user_data)
98100
99101// MARK: -
100102
101- dbmem_local_engine_t * dbmem_local_engine_init (const char * model_path , char err_msg [DBMEM_ERRBUF_SIZE ]) {
103+ dbmem_local_engine_t * dbmem_local_engine_init (void * ctx , const char * model_path , char err_msg [DBMEM_ERRBUF_SIZE ]) {
102104 dbmem_local_engine_t * engine = (dbmem_local_engine_t * )dbmem_zeroalloc (sizeof (dbmem_local_engine_t ));
103105 if (!engine ) return NULL ;
104106
@@ -203,15 +205,14 @@ bool dbmem_local_engine_warmup (dbmem_local_engine_t *engine) {
203205}
204206
205207int dbmem_local_compute_embedding (dbmem_local_engine_t * engine , const char * text , int text_len , embedding_result_t * result ) {
206- engine -> err_msg [0 ] = 0 ;
207208 memset (result , 0 , sizeof (embedding_result_t ));
208209 if (text_len == -1 ) text_len = (int )strlen (text );
209210 if (text_len == 0 ) return 0 ;
210211
211212 // Tokenize
212213 int n_tokens = llama_tokenize (engine -> vocab , text , text_len , engine -> tokens , engine -> tokens_capacity , true, true);
213214 if (n_tokens < 0 ) {
214- snprintf (engine -> err_msg , DBMEM_ERRBUF_SIZE , "Tokenization failed (text too long?)" );
215+ dbmem_context_set_error (engine -> context , "Tokenization failed (text too long?)" );
215216 return -1 ;
216217 }
217218
@@ -241,7 +242,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
241242 // Encode
242243 int ret = llama_encode (engine -> ctx , batch );
243244 if (ret != 0 ) {
244- snprintf (engine -> err_msg , DBMEM_ERRBUF_SIZE , "llama_encode failed" );
245+ dbmem_context_set_error (engine -> context , "Llama_encode failed" );
245246 return -1 ;
246247 }
247248
@@ -254,7 +255,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
254255 }
255256
256257 if (!emb_ptr ) {
257- snprintf (engine -> err_msg , DBMEM_ERRBUF_SIZE , "Failed to get embeddings" );
258+ dbmem_context_set_error (engine -> context , "Failed to get embeddings" );
258259 return -1 ;
259260 }
260261
@@ -302,6 +303,3 @@ void dbmem_local_engine_free (dbmem_local_engine_t *engine) {
302303 llama_backend_free ();
303304}
304305
305- const char * dbmem_local_errmsg (dbmem_local_engine_t * engine ) {
306- return engine -> err_msg ;
307- }
0 commit comments