jsilvanus
diff --git a/‎README.md‎
Lines changed: 132 additions & 104 deletions b/‎README.md‎
Lines changed: 132 additions & 104 deletions
diff --git a/‎src/cli.js‎
Lines changed: 52 additions & 1 deletion b/‎src/cli.js‎
Lines changed: 52 additions & 1 deletion
@@ -28,6 +28,30 @@ Supports **batched** input, **parallel** execution, isolated **child-process** w
 
 ---
 
+## How it works
+
+```
+embed(texts)
+  │
+  ├─ split into batches of batchSize
+  │
+  └─ Promise.all(batches) ──► WorkerPool
+                                 │
+                                 ├─ [process mode] ChildProcessWorker 0
+                                 │   resolveProvider(device, provider)
+                                 │   → pipeline('feature-extraction', model, { device: 'cuda' })
+                                 │   → embed batch A
+                                 │
+                                 └─ [process mode] ChildProcessWorker 1
+                                     resolveProvider(device, provider)
+                                     → pipeline(...) → embed batch B
+```
+
+Workers load the model **once** at startup and reuse it for all batches.  
+Provider activation happens per-worker before the pipeline is created.
+
+--- 
+
 ## Installation
 
 ```bash
@@ -42,9 +66,72 @@ which ships as a transitive dependency. No additional packages are required.
 ```bash
 # Ubuntu / Debian
 sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12
-## Programmatic API
 ```
 
+## Input Sources
+
+### Embed texts (CPU — default)
+
+```js
+import { Embedder } from '@jsilvanus/embedeer';
+
+const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
+  batchSize:   32,          // texts per worker task   (default: 32)
+  concurrency: 2,           // parallel workers        (default: 2)
+  mode:       'process',    // 'process' | 'thread'    (default: 'process')
+  pooling:    'mean',       // 'mean' | 'cls' | 'none' (default: 'mean')
+  normalize:   true,        // L2-normalise vectors    (default: true)
+  token:      'hf_...',     // HF API token (optional; also reads HF_TOKEN env)
+  dtype:      'q8',         // quantization dtype      (optional)
+  cacheDir:   '/my/cache',  // override model cache    (default: ~/.embedeer/models)
+});
+
+const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
+// → number[][]  (one 384-dim vector per text for all-MiniLM-L6-v2)
+
+await embedder.destroy(); // shut down worker processes
+```
+
+### TypeScript example
+
+The package includes TypeScript declarations so imports are typed automatically.
+
+```ts
+import { Embedder } from '@jsilvanus/embedeer';
+
+async function main() {
+  const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { batchSize: 32, concurrency: 2 });
+  const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
+  // vectors: number[][]
+  await embedder.destroy();
+}
+
+main().catch(console.error);
+```
+
+### Embed texts with GPU
+
+```js
+import { Embedder } from '@jsilvanus/embedeer';
+
+// Auto-detect GPU (falls back to CPU if no provider is installed)
+const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
+  device: 'auto',
+});
+
+// Require GPU (throws if no provider is available)
+const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
+  device: 'gpu',
+});
+
+// Explicitly select an execution provider
+const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
+  provider: 'cuda',  // 'cuda' | 'dml'
+});
+```
+
+---
+
 ---
 
 ## Model management
@@ -71,17 +158,29 @@ const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', {
 
 - Cache location: default is `~/.embedeer/models`. Override with the CLI `--cache-dir` option or the `cacheDir` argument to `loadModel()`.
 
-- Removing cached models: delete the model directory from the cache. Example:
+### Local models
+
+Embedeer can load models directly from local directories or copy a local model into the embedeer cache for reuse.
+
+- Use a local model path directly (no copying)
 
 ```bash
-# Unix
-rm -rf ~/.embedeer/models/Xenova-all-MiniLM-L6-v2
+npx @jsilvanus/embedeer --use-local /path/to/local-model --data "Hello world"
+```
+
+- Copy a local model into the cache and give it a stable name:
 
-# PowerShell (Windows)
-Remove-Item -Recurse -Force $env:USERPROFILE\.embedeer\models\Xenova-all-MiniLM-L6-v2
+```bash
+npx @jsilvanus/embedeer --load-local /path/to/local-model --name my-local-model
 ```
 
-- Advanced: see `src/model-management.js` for low-level cache helpers.
+- How to use a local models?
+
+```bash
+npx @jsilvanus/embedeer --model my-local-model
+# or
+npx @jsilvanus/embedeer --model ~/.embedeer/models/my-local-model
+```
 
 ### Model compatibility (ONNX)
 
@@ -107,87 +206,35 @@ const removed = await deleteModel('Xenova/all-MiniLM-L6-v2');
 console.log('removed?', removed);
 ```
 
-## Explainer — deterministic LLM interface
-
-This was **deprecated** and moved to npm package [`@jsilvanus/chattydeer`](https://www.npmjs.com/package/@jsilvanus/chattydeer) in 1.3.0.
-
-## Input Sources
-
-### Embed texts (CPU — default)
+### Programmatic local models
 
 ```js
-import { Embedder } from '@jsilvanus/embedeer';
+import { importLocalModel, Embedder } from '@jsilvanus/embedeer';
 
-const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
-  batchSize:   32,          // texts per worker task   (default: 32)
-  concurrency: 2,           // parallel workers        (default: 2)
-  mode:       'process',    // 'process' | 'thread'    (default: 'process')
-  pooling:    'mean',       // 'mean' | 'cls' | 'none' (default: 'mean')
-  normalize:   true,        // L2-normalise vectors    (default: true)
-  token:      'hf_...',     // HF API token (optional; also reads HF_TOKEN env)
-  dtype:      'q8',         // quantization dtype      (optional)
-  cacheDir:   '/my/cache',  // override model cache    (default: ~/.embedeer/models)
-});
+// Load directly from a local directory (no copy)
+const embedder = await Embedder.create('/path/to/local-model', { cacheDir: '/my/cache' });
+const vecs = await embedder.embed(['hello world']);
+await embedder.destroy();
 
-const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
-// → number[][]  (one 384-dim vector per text for all-MiniLM-L6-v2)
+// Copy into cache as 'my-local-model'
+const { modelName, path } = await importLocalModel('/path/to/local-model', { name: 'my-local-model' });
+console.log('cached at', path);
 
-await embedder.destroy(); // shut down worker processes
+// Use the cached name like any other model
+const e = await Embedder.create(modelName);
+await e.destroy();
 ```
 
-### TypeScript example
-
-The package includes TypeScript declarations so imports are typed automatically.
-
-```ts
-import { Embedder } from '@jsilvanus/embedeer';
-
-async function main() {
-  const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { batchSize: 32, concurrency: 2 });
-  const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
-  // vectors: number[][]
-  await embedder.destroy();
-}
-
-main().catch(console.error);
-```
-
-### Programmatic profile generation (optional)
-
-You can generate and save a per-user performance profile which `Embedder.create()` will
-automatically apply. This is useful to pick the best `batchSize` / `concurrency` for your
-machine without manual tuning.
-
-```js
-import { Embedder } from '@jsilvanus/embedeer';
-
-// Quick profile generation (writes ~/.embedeer/perf-profile.json)
-await Embedder.generateAndSaveProfile({ mode: 'quick', device: 'cpu', sampleSize: 100 });
-// Subsequent calls to Embedder.create() will auto-apply the saved profile by default.
-```
-
-### Embed texts with GPU
-
-```js
-import { Embedder } from '@jsilvanus/embedeer';
+Helpful programmatic helpers:
 
-// Auto-detect GPU (falls back to CPU if no provider is installed)
-const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
-  device: 'auto',
-});
+- `importLocalModel(src, { name?, cacheDir? })` — copy a local model into the cache and return `{ modelName, path }`.
+- `getCacheDir()` — return the resolved cache directory used by embedeer (useful when you want to manage files yourself).
+- `isModelDownloaded(name)` / `listModels()` / `getCachedModels()` — inspect the cache.
+- `deleteModel(name)` — remove a cached model directory.
 
-// Require GPU (throws if no provider is available)
-const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
-  device: 'gpu',
-});
+These functions are exported from the public package entry (`src/index.js`) so you can import them from `@jsilvanus/embedeer`.
 
-// Explicitly select an execution provider
-const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
-  provider: 'cuda',  // 'cuda' | 'dml'
-});
-```
-
----
+--- 
 
 ## CLI
 
@@ -525,39 +572,20 @@ node bench/grid-search.js --device cpu --sample-size 200 --out bench/grid-result
 node bench/grid-search.js --device gpu --sample-size 100 --out bench/grid-results-gpu.json
 ```
 
-Programmatic profile generation (writes `~/.embedeer/perf-profile.json`):
+### Programmatic profile generation (optional)
+
+You can generate and save a per-user performance profile which `Embedder.create()` will
+automatically apply. This is useful to pick the best `batchSize` / `concurrency` for your
+machine without manual tuning.
 
 ```js
 import { Embedder } from '@jsilvanus/embedeer';
 
+// Quick profile generation (writes ~/.embedeer/perf-profile.json)
 await Embedder.generateAndSaveProfile({ mode: 'quick', device: 'cpu', sampleSize: 100 });
-// Embedder.create() will auto-apply a saved per-user profile by default
-```
-
---- 
-
-## How it works
-
-```
-embed(texts)
-  │
-  ├─ split into batches of batchSize
-  │
-  └─ Promise.all(batches) ──► WorkerPool
-                                 │
-                                 ├─ [process mode] ChildProcessWorker 0
-                                 │   resolveProvider(device, provider)
-                                 │   → pipeline('feature-extraction', model, { device: 'cuda' })
-                                 │   → embed batch A
-                                 │
-                                 └─ [process mode] ChildProcessWorker 1
-                                     resolveProvider(device, provider)
-                                     → pipeline(...) → embed batch B
+// Subsequent calls to Embedder.create() will auto-apply the saved profile by default.
 ```
 
-Workers load the model **once** at startup and reuse it for all batches.  
-Provider activation happens per-worker before the pipeline is created.
-
 ---
 
 ## E2E-testing
 
@@ -37,13 +37,16 @@
  *       --device <mode>          Compute device: auto|cpu|gpu (default: cpu)
  *       --provider <name>        Execution provider override: cpu|cuda|dml
  *       --prefix <str>           Text prepended to every input before embedding
+ *       --load-local <path>      Copy a local model directory into the cache and use it
+ *       --use-local <path>       Use a local model path directly (no copying)
  *   -h, --help                   Show this help
  */
 
 import { getCacheDir, DEFAULT_CACHE_DIR } from './model-cache.js';
-import { readFileSync, writeFileSync, appendFileSync } from 'fs';
+import { readFileSync, writeFileSync, appendFileSync, statSync, existsSync, cpSync, copyFileSync } from 'fs';
 import readline from 'readline';
 import { fileURLToPath } from 'url';
+import { basename, join } from 'path';
 
 // ── Argument parsing ────────────────────────────────────────────────────────
 
@@ -89,6 +92,8 @@ Options:
       --provider <name>        Execution provider override: cpu|cuda|dml
       --prefix <str>           Text prepended to every input before embedding
                                (e.g. "search_query: " for nomic-embed-text)
+      --load-local <path>      Copy a local model directory into the cache and use it
+      --use-local <path>       Use a local model path directly (no copying)
       --timer                  Print elapsed wall-clock time to stderr when done
   -h, --help                   Show this help
 `.trim());
@@ -102,6 +107,7 @@ const KNOWN_FLAGS = new Set([
   '--mode', '--pooling', '-p', '--no-normalize', '--dtype', '--token',
   '--cache-dir', '--device', '--provider', '--delimiter', '-D',
   '--interactive', '-i', '--prefix', '--timer',
+  '--load-local', '--use-local',
 ]);
 const options = {
   model: 'nomic-embed-text',
@@ -123,6 +129,8 @@ const options = {
   device: undefined,
   provider: undefined,
   prefix: undefined,
+  loadLocal: undefined, // --load-local <path> copy to cache and use
+  useLocal: undefined,  // --use-local <path> use path directly without copying
   timer: false,
 };
 
@@ -178,6 +186,10 @@ for (let i = 0; i < args.length; i++) {
     options.prefix = args[++i];
   } else if (arg === '--timer') {
     options.timer = true;
+  } else if (arg === '--load-local') {
+    options.loadLocal = args[++i];
+  } else if (arg === '--use-local') {
+    options.useLocal = args[++i];
   } else {
     positional.push(arg);
   }
@@ -452,6 +464,45 @@ async function runInteractive(cacheDir) {
 async function main() {
   const resolvedCacheDir = getCacheDir(options.cacheDir);
 
+  // Local model support:
+  // --use-local <path> uses the provided path directly (no copying).
+  // --load-local <path> copies the local model (file or directory) into
+  // the resolved cache directory and then uses the copied path as the model.
+  if (options.useLocal) {
+    options.model = options.useLocal;
+    console.error(`Using local model path: ${options.model}`);
+  } else if (options.loadLocal) {
+    const src = options.loadLocal;
+    if (!existsSync(src)) {
+      console.error(`Error: local model path not found: ${src}`);
+      process.exit(1);
+    }
+    const requestedName = options.name;
+    const base = requestedName ?? basename(src);
+    let dest = join(resolvedCacheDir, base);
+    if (existsSync(dest)) {
+      if (requestedName) {
+        console.error(`Error: model name '${requestedName}' already exists in cache: ${dest}`);
+        process.exit(1);
+      }
+      dest = join(resolvedCacheDir, `${base}-${Date.now()}`);
+    }
+    try {
+      const s = statSync(src);
+      if (s.isDirectory()) {
+        cpSync(src, dest, { recursive: true });
+      } else {
+        // src is a file
+        copyFileSync(src, dest);
+      }
+      console.error(`Copied local model into cache: ${dest}`);
+      options.model = dest;
+    } catch (err) {
+      console.error('Error copying local model:', err.message);
+      process.exit(1);
+    }
+  }
+
   // ── Interactive line-reader mode ─────────────────────────────────────────
   if (options.interactive) {
     return runInteractive(resolvedCacheDir);