Skip to content

Commit 78699e1

Browse files
committed
feat(model): add support for importing local models into cache
1 parent d6c93b4 commit 78699e1

5 files changed

Lines changed: 288 additions & 107 deletions

File tree

README.md

Lines changed: 132 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,30 @@ Supports **batched** input, **parallel** execution, isolated **child-process** w
2828

2929
---
3030

31+
## How it works
32+
33+
```
34+
embed(texts)
35+
36+
├─ split into batches of batchSize
37+
38+
└─ Promise.all(batches) ──► WorkerPool
39+
40+
├─ [process mode] ChildProcessWorker 0
41+
│ resolveProvider(device, provider)
42+
│ → pipeline('feature-extraction', model, { device: 'cuda' })
43+
│ → embed batch A
44+
45+
└─ [process mode] ChildProcessWorker 1
46+
resolveProvider(device, provider)
47+
→ pipeline(...) → embed batch B
48+
```
49+
50+
Workers load the model **once** at startup and reuse it for all batches.
51+
Provider activation happens per-worker before the pipeline is created.
52+
53+
---
54+
3155
## Installation
3256

3357
```bash
@@ -42,9 +66,72 @@ which ships as a transitive dependency. No additional packages are required.
4266
```bash
4367
# Ubuntu / Debian
4468
sudo apt install cuda-toolkit-12-6 libcudnn9-cuda-12
45-
## Programmatic API
4669
```
4770

71+
## Input Sources
72+
73+
### Embed texts (CPU — default)
74+
75+
```js
76+
import { Embedder } from '@jsilvanus/embedeer';
77+
78+
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
79+
batchSize: 32, // texts per worker task (default: 32)
80+
concurrency: 2, // parallel workers (default: 2)
81+
mode: 'process', // 'process' | 'thread' (default: 'process')
82+
pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean')
83+
normalize: true, // L2-normalise vectors (default: true)
84+
token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env)
85+
dtype: 'q8', // quantization dtype (optional)
86+
cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models)
87+
});
88+
89+
const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
90+
// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2)
91+
92+
await embedder.destroy(); // shut down worker processes
93+
```
94+
95+
### TypeScript example
96+
97+
The package includes TypeScript declarations so imports are typed automatically.
98+
99+
```ts
100+
import { Embedder } from '@jsilvanus/embedeer';
101+
102+
async function main() {
103+
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { batchSize: 32, concurrency: 2 });
104+
const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
105+
// vectors: number[][]
106+
await embedder.destroy();
107+
}
108+
109+
main().catch(console.error);
110+
```
111+
112+
### Embed texts with GPU
113+
114+
```js
115+
import { Embedder } from '@jsilvanus/embedeer';
116+
117+
// Auto-detect GPU (falls back to CPU if no provider is installed)
118+
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
119+
device: 'auto',
120+
});
121+
122+
// Require GPU (throws if no provider is available)
123+
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
124+
device: 'gpu',
125+
});
126+
127+
// Explicitly select an execution provider
128+
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
129+
provider: 'cuda', // 'cuda' | 'dml'
130+
});
131+
```
132+
133+
---
134+
48135
---
49136

50137
## Model management
@@ -71,17 +158,29 @@ const { modelName, cacheDir } = await loadModel('Xenova/all-MiniLM-L6-v2', {
71158

72159
- Cache location: default is `~/.embedeer/models`. Override with the CLI `--cache-dir` option or the `cacheDir` argument to `loadModel()`.
73160

74-
- Removing cached models: delete the model directory from the cache. Example:
161+
### Local models
162+
163+
Embedeer can load models directly from local directories or copy a local model into the embedeer cache for reuse.
164+
165+
- Use a local model path directly (no copying)
75166

76167
```bash
77-
# Unix
78-
rm -rf ~/.embedeer/models/Xenova-all-MiniLM-L6-v2
168+
npx @jsilvanus/embedeer --use-local /path/to/local-model --data "Hello world"
169+
```
170+
171+
- Copy a local model into the cache and give it a stable name:
79172

80-
# PowerShell (Windows)
81-
Remove-Item -Recurse -Force $env:USERPROFILE\.embedeer\models\Xenova-all-MiniLM-L6-v2
173+
```bash
174+
npx @jsilvanus/embedeer --load-local /path/to/local-model --name my-local-model
82175
```
83176

84-
- Advanced: see `src/model-management.js` for low-level cache helpers.
177+
- How to use a local models?
178+
179+
```bash
180+
npx @jsilvanus/embedeer --model my-local-model
181+
# or
182+
npx @jsilvanus/embedeer --model ~/.embedeer/models/my-local-model
183+
```
85184

86185
### Model compatibility (ONNX)
87186

@@ -107,87 +206,35 @@ const removed = await deleteModel('Xenova/all-MiniLM-L6-v2');
107206
console.log('removed?', removed);
108207
```
109208

110-
## Explainer — deterministic LLM interface
111-
112-
This was **deprecated** and moved to npm package [`@jsilvanus/chattydeer`](https://www.npmjs.com/package/@jsilvanus/chattydeer) in 1.3.0.
113-
114-
## Input Sources
115-
116-
### Embed texts (CPU — default)
209+
### Programmatic local models
117210

118211
```js
119-
import { Embedder } from '@jsilvanus/embedeer';
212+
import { importLocalModel, Embedder } from '@jsilvanus/embedeer';
120213

121-
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
122-
batchSize: 32, // texts per worker task (default: 32)
123-
concurrency: 2, // parallel workers (default: 2)
124-
mode: 'process', // 'process' | 'thread' (default: 'process')
125-
pooling: 'mean', // 'mean' | 'cls' | 'none' (default: 'mean')
126-
normalize: true, // L2-normalise vectors (default: true)
127-
token: 'hf_...', // HF API token (optional; also reads HF_TOKEN env)
128-
dtype: 'q8', // quantization dtype (optional)
129-
cacheDir: '/my/cache', // override model cache (default: ~/.embedeer/models)
130-
});
214+
// Load directly from a local directory (no copy)
215+
const embedder = await Embedder.create('/path/to/local-model', { cacheDir: '/my/cache' });
216+
const vecs = await embedder.embed(['hello world']);
217+
await embedder.destroy();
131218

132-
const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
133-
// → number[][] (one 384-dim vector per text for all-MiniLM-L6-v2)
219+
// Copy into cache as 'my-local-model'
220+
const { modelName, path } = await importLocalModel('/path/to/local-model', { name: 'my-local-model' });
221+
console.log('cached at', path);
134222

135-
await embedder.destroy(); // shut down worker processes
223+
// Use the cached name like any other model
224+
const e = await Embedder.create(modelName);
225+
await e.destroy();
136226
```
137227

138-
### TypeScript example
139-
140-
The package includes TypeScript declarations so imports are typed automatically.
141-
142-
```ts
143-
import { Embedder } from '@jsilvanus/embedeer';
144-
145-
async function main() {
146-
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', { batchSize: 32, concurrency: 2 });
147-
const vectors = await embedder.embed(['Hello world', 'Foo bar baz']);
148-
// vectors: number[][]
149-
await embedder.destroy();
150-
}
151-
152-
main().catch(console.error);
153-
```
154-
155-
### Programmatic profile generation (optional)
156-
157-
You can generate and save a per-user performance profile which `Embedder.create()` will
158-
automatically apply. This is useful to pick the best `batchSize` / `concurrency` for your
159-
machine without manual tuning.
160-
161-
```js
162-
import { Embedder } from '@jsilvanus/embedeer';
163-
164-
// Quick profile generation (writes ~/.embedeer/perf-profile.json)
165-
await Embedder.generateAndSaveProfile({ mode: 'quick', device: 'cpu', sampleSize: 100 });
166-
// Subsequent calls to Embedder.create() will auto-apply the saved profile by default.
167-
```
168-
169-
### Embed texts with GPU
170-
171-
```js
172-
import { Embedder } from '@jsilvanus/embedeer';
228+
Helpful programmatic helpers:
173229

174-
// Auto-detect GPU (falls back to CPU if no provider is installed)
175-
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
176-
device: 'auto',
177-
});
230+
- `importLocalModel(src, { name?, cacheDir? })` — copy a local model into the cache and return `{ modelName, path }`.
231+
- `getCacheDir()` — return the resolved cache directory used by embedeer (useful when you want to manage files yourself).
232+
- `isModelDownloaded(name)` / `listModels()` / `getCachedModels()` — inspect the cache.
233+
- `deleteModel(name)` — remove a cached model directory.
178234

179-
// Require GPU (throws if no provider is available)
180-
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
181-
device: 'gpu',
182-
});
235+
These functions are exported from the public package entry (`src/index.js`) so you can import them from `@jsilvanus/embedeer`.
183236

184-
// Explicitly select an execution provider
185-
const embedder = await Embedder.create('Xenova/all-MiniLM-L6-v2', {
186-
provider: 'cuda', // 'cuda' | 'dml'
187-
});
188-
```
189-
190-
---
237+
---
191238

192239
## CLI
193240

@@ -525,39 +572,20 @@ node bench/grid-search.js --device cpu --sample-size 200 --out bench/grid-result
525572
node bench/grid-search.js --device gpu --sample-size 100 --out bench/grid-results-gpu.json
526573
```
527574

528-
Programmatic profile generation (writes `~/.embedeer/perf-profile.json`):
575+
### Programmatic profile generation (optional)
576+
577+
You can generate and save a per-user performance profile which `Embedder.create()` will
578+
automatically apply. This is useful to pick the best `batchSize` / `concurrency` for your
579+
machine without manual tuning.
529580

530581
```js
531582
import { Embedder } from '@jsilvanus/embedeer';
532583

584+
// Quick profile generation (writes ~/.embedeer/perf-profile.json)
533585
await Embedder.generateAndSaveProfile({ mode: 'quick', device: 'cpu', sampleSize: 100 });
534-
// Embedder.create() will auto-apply a saved per-user profile by default
535-
```
536-
537-
---
538-
539-
## How it works
540-
541-
```
542-
embed(texts)
543-
544-
├─ split into batches of batchSize
545-
546-
└─ Promise.all(batches) ──► WorkerPool
547-
548-
├─ [process mode] ChildProcessWorker 0
549-
│ resolveProvider(device, provider)
550-
│ → pipeline('feature-extraction', model, { device: 'cuda' })
551-
│ → embed batch A
552-
553-
└─ [process mode] ChildProcessWorker 1
554-
resolveProvider(device, provider)
555-
→ pipeline(...) → embed batch B
586+
// Subsequent calls to Embedder.create() will auto-apply the saved profile by default.
556587
```
557588

558-
Workers load the model **once** at startup and reuse it for all batches.
559-
Provider activation happens per-worker before the pipeline is created.
560-
561589
---
562590

563591
## E2E-testing

src/cli.js

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,16 @@
3737
* --device <mode> Compute device: auto|cpu|gpu (default: cpu)
3838
* --provider <name> Execution provider override: cpu|cuda|dml
3939
* --prefix <str> Text prepended to every input before embedding
40+
* --load-local <path> Copy a local model directory into the cache and use it
41+
* --use-local <path> Use a local model path directly (no copying)
4042
* -h, --help Show this help
4143
*/
4244

4345
import { getCacheDir, DEFAULT_CACHE_DIR } from './model-cache.js';
44-
import { readFileSync, writeFileSync, appendFileSync } from 'fs';
46+
import { readFileSync, writeFileSync, appendFileSync, statSync, existsSync, cpSync, copyFileSync } from 'fs';
4547
import readline from 'readline';
4648
import { fileURLToPath } from 'url';
49+
import { basename, join } from 'path';
4750

4851
// ── Argument parsing ────────────────────────────────────────────────────────
4952

@@ -89,6 +92,8 @@ Options:
8992
--provider <name> Execution provider override: cpu|cuda|dml
9093
--prefix <str> Text prepended to every input before embedding
9194
(e.g. "search_query: " for nomic-embed-text)
95+
--load-local <path> Copy a local model directory into the cache and use it
96+
--use-local <path> Use a local model path directly (no copying)
9297
--timer Print elapsed wall-clock time to stderr when done
9398
-h, --help Show this help
9499
`.trim());
@@ -102,6 +107,7 @@ const KNOWN_FLAGS = new Set([
102107
'--mode', '--pooling', '-p', '--no-normalize', '--dtype', '--token',
103108
'--cache-dir', '--device', '--provider', '--delimiter', '-D',
104109
'--interactive', '-i', '--prefix', '--timer',
110+
'--load-local', '--use-local',
105111
]);
106112
const options = {
107113
model: 'nomic-embed-text',
@@ -123,6 +129,8 @@ const options = {
123129
device: undefined,
124130
provider: undefined,
125131
prefix: undefined,
132+
loadLocal: undefined, // --load-local <path> copy to cache and use
133+
useLocal: undefined, // --use-local <path> use path directly without copying
126134
timer: false,
127135
};
128136

@@ -178,6 +186,10 @@ for (let i = 0; i < args.length; i++) {
178186
options.prefix = args[++i];
179187
} else if (arg === '--timer') {
180188
options.timer = true;
189+
} else if (arg === '--load-local') {
190+
options.loadLocal = args[++i];
191+
} else if (arg === '--use-local') {
192+
options.useLocal = args[++i];
181193
} else {
182194
positional.push(arg);
183195
}
@@ -452,6 +464,45 @@ async function runInteractive(cacheDir) {
452464
async function main() {
453465
const resolvedCacheDir = getCacheDir(options.cacheDir);
454466

467+
// Local model support:
468+
// --use-local <path> uses the provided path directly (no copying).
469+
// --load-local <path> copies the local model (file or directory) into
470+
// the resolved cache directory and then uses the copied path as the model.
471+
if (options.useLocal) {
472+
options.model = options.useLocal;
473+
console.error(`Using local model path: ${options.model}`);
474+
} else if (options.loadLocal) {
475+
const src = options.loadLocal;
476+
if (!existsSync(src)) {
477+
console.error(`Error: local model path not found: ${src}`);
478+
process.exit(1);
479+
}
480+
const requestedName = options.name;
481+
const base = requestedName ?? basename(src);
482+
let dest = join(resolvedCacheDir, base);
483+
if (existsSync(dest)) {
484+
if (requestedName) {
485+
console.error(`Error: model name '${requestedName}' already exists in cache: ${dest}`);
486+
process.exit(1);
487+
}
488+
dest = join(resolvedCacheDir, `${base}-${Date.now()}`);
489+
}
490+
try {
491+
const s = statSync(src);
492+
if (s.isDirectory()) {
493+
cpSync(src, dest, { recursive: true });
494+
} else {
495+
// src is a file
496+
copyFileSync(src, dest);
497+
}
498+
console.error(`Copied local model into cache: ${dest}`);
499+
options.model = dest;
500+
} catch (err) {
501+
console.error('Error copying local model:', err.message);
502+
process.exit(1);
503+
}
504+
}
505+
455506
// ── Interactive line-reader mode ─────────────────────────────────────────
456507
if (options.interactive) {
457508
return runInteractive(resolvedCacheDir);

0 commit comments

Comments
 (0)