Skip to content

Commit 7daf69d

Browse files
Brooooooklynclaude
andauthored
feat: mlx launch claude, server-side tools, tier-2 prefix cache (mlx-node#52)
<!-- CURSOR_SUMMARY --> > [!NOTE] > **Medium Risk** > Medium risk because it introduces a new CLI workflow that spawns processes and adds server hooks for lazy model resolution and custom model listing, changing request handling and error paths for `/v1/messages` and `/v1/models`. Also updates download logic to parse manifests/indexes and skip copies, which could affect existing resume behavior across model formats. > > **Overview** > Adds a new `mlx launch claude` CLI command that starts a local Anthropic-compatible server, discovers locally downloaded models, lazy-loads/swallows model swaps via a serialized swap controller (including aliasing unknown Claude model names), and can optionally capture full request/response logs for debugging. > > Improves `mlx download model` defaults and robustness by introducing a shared `~/.mlx-node` config (`MLX_MODELS_DIR`/`config.json`) for model storage, adding stricter “already downloaded” checks (sharded safetensors shard verification, GGUF manifest completeness, glob-matched set completeness), and skipping redundant local `copyFile` operations when the destination file size already matches. > > Extends the server API to support `resolveModel` (lazy registration) and `listModels` overrides, reorders `/v1/messages` to map/validate before loading, brackets loads with the idle-sweeper drain suspension, and ensures resolve failures return Anthropic-shaped 500 errors. Updates presets/export surface to include `LAUNCH_PRESETS` and adds sampling defaults for `gemma4` and `lfm2`. > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit a3dfaeb. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 144653d commit 7daf69d

25 files changed

Lines changed: 2808 additions & 102 deletions

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ jobs:
103103
path: packages/core/
104104
- name: Run tests
105105
run: |
106-
yarn mlx download model
106+
yarn mlx download model --output .cache/models/qwen3-0.6b
107107
yarn mlx convert --input .cache/models/qwen3-0.6b -d bf16 --output .cache/models/qwen3-0.6b-mlx-bf16
108108
yarn mlx download dataset
109109
${{ matrix.test }}

__test__/cli/config.test.ts

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
2+
import { tmpdir } from 'node:os';
3+
import { join, resolve } from 'node:path';
4+
5+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vite-plus/test';
6+
7+
import { resolveModelsDir } from '../../packages/cli/src/config.js';
8+
9+
describe('resolveModelsDir', () => {
10+
let workRoot: string;
11+
let fakeHome: string;
12+
13+
beforeEach(() => {
14+
workRoot = mkdtempSync(join(tmpdir(), 'mlx-config-test-'));
15+
fakeHome = join(workRoot, 'home');
16+
mkdirSync(fakeHome, { recursive: true });
17+
vi.stubEnv('HOME', fakeHome);
18+
delete process.env.MLX_MODELS_DIR;
19+
});
20+
21+
afterEach(() => {
22+
vi.unstubAllEnvs();
23+
rmSync(workRoot, { recursive: true, force: true });
24+
});
25+
26+
it('uses the explicit argument when provided', () => {
27+
const explicit = join(workRoot, 'explicit');
28+
const got = resolveModelsDir(explicit);
29+
expect(got).toBe(resolve(explicit));
30+
});
31+
32+
it('falls through to env when explicit is absent', () => {
33+
const envDir = join(workRoot, 'env-models');
34+
vi.stubEnv('MLX_MODELS_DIR', envDir);
35+
const got = resolveModelsDir();
36+
expect(got).toBe(resolve(envDir));
37+
});
38+
39+
it('reads modelsDir from ~/.mlx-node/config.json when env is unset', () => {
40+
const configDir = join(fakeHome, '.mlx-node');
41+
mkdirSync(configDir, { recursive: true });
42+
const configuredDir = join(workRoot, 'configured');
43+
writeFileSync(join(configDir, 'config.json'), JSON.stringify({ modelsDir: configuredDir }));
44+
const got = resolveModelsDir();
45+
expect(got).toBe(resolve(configuredDir));
46+
});
47+
48+
it('falls back to ~/.mlx-node/models when nothing else is set', () => {
49+
const got = resolveModelsDir();
50+
expect(got).toBe(join(fakeHome, '.mlx-node', 'models'));
51+
});
52+
53+
it('tolerates malformed config.json with a warning and falls back to default', () => {
54+
const configDir = join(fakeHome, '.mlx-node');
55+
mkdirSync(configDir, { recursive: true });
56+
writeFileSync(join(configDir, 'config.json'), '{ not valid json');
57+
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
58+
try {
59+
const got = resolveModelsDir();
60+
expect(got).toBe(join(fakeHome, '.mlx-node', 'models'));
61+
expect(warn).toHaveBeenCalled();
62+
} finally {
63+
warn.mockRestore();
64+
}
65+
});
66+
67+
it('prefers explicit arg over env and config.json', () => {
68+
vi.stubEnv('MLX_MODELS_DIR', join(workRoot, 'should-lose-env'));
69+
const configDir = join(fakeHome, '.mlx-node');
70+
mkdirSync(configDir, { recursive: true });
71+
writeFileSync(join(configDir, 'config.json'), JSON.stringify({ modelsDir: join(workRoot, 'should-lose-config') }));
72+
const explicit = join(workRoot, 'winner');
73+
const got = resolveModelsDir(explicit);
74+
expect(got).toBe(resolve(explicit));
75+
});
76+
});
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
import { mkdtempSync, readdirSync, rmSync, writeFileSync } from 'node:fs';
2+
import { tmpdir } from 'node:os';
3+
import { join } from 'node:path';
4+
5+
import { afterEach, beforeEach, describe, expect, it } from 'vite-plus/test';
6+
7+
import {
8+
isGgufRepoComplete,
9+
isGlobMatchedSetComplete,
10+
isGlobVariantPresent,
11+
isLocalCopyComplete,
12+
isModelAlreadyDownloaded,
13+
} from '../../packages/cli/src/commands/download-model.js';
14+
15+
describe('isModelAlreadyDownloaded', () => {
16+
let dir: string;
17+
18+
beforeEach(() => {
19+
dir = mkdtempSync(join(tmpdir(), 'mlx-download-test-'));
20+
});
21+
22+
afterEach(() => {
23+
rmSync(dir, { recursive: true, force: true });
24+
});
25+
26+
function write(name: string, contents: string): void {
27+
writeFileSync(join(dir, name), contents);
28+
}
29+
30+
it('returns false when config.json is missing', () => {
31+
write('model.safetensors', 'x');
32+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false);
33+
});
34+
35+
it('returns true for a single-file safetensors model with config', () => {
36+
write('config.json', '{}');
37+
write('model.safetensors', 'x');
38+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true);
39+
});
40+
41+
it('returns true for a Paddle model (inference.pdiparams) with config', () => {
42+
write('config.json', '{}');
43+
write('inference.pdiparams', 'x');
44+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true);
45+
});
46+
47+
it('returns false when a sharded index references shards that are missing on disk', () => {
48+
// Regression: previously the early-return only checked that
49+
// model.safetensors.index.json was present. An interrupted prior
50+
// download that landed the index but not all shards would silently
51+
// be declared "already downloaded".
52+
write('config.json', '{}');
53+
write(
54+
'model.safetensors.index.json',
55+
JSON.stringify({
56+
metadata: { total_size: 12345 },
57+
weight_map: {
58+
'layer.0.weight': 'model-00001-of-00002.safetensors',
59+
'layer.1.weight': 'model-00002-of-00002.safetensors',
60+
},
61+
}),
62+
);
63+
// Only the first shard exists; the second is missing.
64+
write('model-00001-of-00002.safetensors', 'shard-1');
65+
66+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false);
67+
});
68+
69+
it('returns true for a sharded model when ALL referenced shards exist', () => {
70+
write('config.json', '{}');
71+
write(
72+
'model.safetensors.index.json',
73+
JSON.stringify({
74+
metadata: { total_size: 12345 },
75+
weight_map: {
76+
'layer.0.weight': 'model-00001-of-00002.safetensors',
77+
'layer.1.weight': 'model-00002-of-00002.safetensors',
78+
'layer.2.weight': 'model-00002-of-00002.safetensors', // duplicate target dedups
79+
},
80+
}),
81+
);
82+
write('model-00001-of-00002.safetensors', 'shard-1');
83+
write('model-00002-of-00002.safetensors', 'shard-2');
84+
85+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true);
86+
});
87+
88+
it('returns false when the index file is malformed JSON', () => {
89+
write('config.json', '{}');
90+
write('model.safetensors.index.json', '{not json');
91+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false);
92+
});
93+
94+
it('returns false when the index file lacks weight_map', () => {
95+
write('config.json', '{}');
96+
write('model.safetensors.index.json', JSON.stringify({ metadata: { total_size: 0 } }));
97+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false);
98+
});
99+
100+
it('returns false when weight_map is empty', () => {
101+
write('config.json', '{}');
102+
write('model.safetensors.index.json', JSON.stringify({ weight_map: {} }));
103+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false);
104+
});
105+
106+
it('still considers single-file safetensors complete even alongside an unverified index', () => {
107+
// If both `model.safetensors` and `model.safetensors.index.json` are
108+
// present, the single file wins — no need to parse the index.
109+
write('config.json', '{}');
110+
write('model.safetensors', 'x');
111+
write('model.safetensors.index.json', JSON.stringify({ weight_map: { x: 'never-existed.safetensors' } }));
112+
expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true);
113+
});
114+
});
115+
116+
describe('isGlobVariantPresent', () => {
117+
it('returns false when no patterns are provided', () => {
118+
expect(isGlobVariantPresent(['config.json', 'tokenizer.json', 'model.Q8_0.gguf'], [])).toBe(false);
119+
});
120+
121+
it('returns false when a prior Q8 download leaves only CORE_FILES + a non-matching gguf', () => {
122+
// Regression: previously the early-return counted CORE_FILES toward
123+
// the "matched" set, so any prior gguf download (which lays down
124+
// config.json + tokenizer.json) auto-satisfied the >1 threshold and
125+
// a fresh `--glob "*Q4*"` exited as "already downloaded" without
126+
// ever fetching the Q4 weights. The helper must look ONLY at user-
127+
// glob matches.
128+
const files = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'model.Q8_0.gguf'];
129+
expect(isGlobVariantPresent(files, ['*Q4*'])).toBe(false);
130+
});
131+
132+
it('returns true when an existing file matches one of the glob patterns', () => {
133+
const files = ['config.json', 'tokenizer.json', 'model.Q4_K_M.gguf'];
134+
expect(isGlobVariantPresent(files, ['*Q4*'])).toBe(true);
135+
});
136+
137+
it('returns true when ANY pattern matches (multi-glob OR semantics)', () => {
138+
const files = ['config.json', 'model.Q8_0.gguf'];
139+
expect(isGlobVariantPresent(files, ['*Q4*', '*Q8*'])).toBe(true);
140+
});
141+
142+
it('returns false when no file matches any pattern (CORE_FILES alone do not count)', () => {
143+
const files = ['config.json', 'tokenizer.json', 'tokenizer_config.json'];
144+
expect(isGlobVariantPresent(files, ['*BF16*'])).toBe(false);
145+
});
146+
147+
it('matches case-insensitively (gguf repos vary in capitalization)', () => {
148+
expect(isGlobVariantPresent(['model.q4_k_m.gguf'], ['*Q4_K_M*'])).toBe(true);
149+
expect(isGlobVariantPresent(['model.Q4_K_M.gguf'], ['*q4_k_m*'])).toBe(true);
150+
});
151+
});
152+
153+
describe('isGgufRepoComplete', () => {
154+
it('returns false when only some of the remote GGUF variants are present locally', () => {
155+
// Regression: previously a no-glob re-run after an interrupted
156+
// download (e.g. only Q2_K landed) silently exited as "already
157+
// downloaded" because the early-return only checked
158+
// `files.some(.gguf)`. The fix compares against the remote
159+
// manifest and refuses to short-circuit until every advertised
160+
// GGUF variant is on disk.
161+
const local = ['model.Q2_K.gguf', 'config.json'];
162+
const remote = ['model.Q2_K.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf'];
163+
expect(isGgufRepoComplete(local, remote)).toBe(false);
164+
});
165+
166+
it('returns true when every remote GGUF variant is present locally', () => {
167+
const local = ['model.Q4_K_M.gguf', 'config.json'];
168+
const remote = ['model.Q4_K_M.gguf'];
169+
expect(isGgufRepoComplete(local, remote)).toBe(true);
170+
});
171+
172+
it('returns false when the remote repo is not a GGUF repo (no .gguf files in manifest)', () => {
173+
// Caller should route through `isModelAlreadyDownloaded` for
174+
// safetensors / Paddle repos. A `false` return here tells the
175+
// caller "do not take the GGUF early-return branch".
176+
const local = ['model.safetensors', 'config.json'];
177+
const remote = ['model.safetensors', 'config.json', 'tokenizer.json'];
178+
expect(isGgufRepoComplete(local, remote)).toBe(false);
179+
});
180+
181+
it('returns false on an empty remote manifest (likely upstream error)', () => {
182+
// An empty manifest is almost certainly a network / auth failure
183+
// rather than a legitimate empty repo. Returning false routes the
184+
// caller through the download loop where the real error will
185+
// surface (404 / auth) instead of being masked as "already
186+
// downloaded".
187+
expect(isGgufRepoComplete(['model.Q4_K_M.gguf'], [])).toBe(false);
188+
expect(isGgufRepoComplete([], [])).toBe(false);
189+
});
190+
191+
it('compares basenames so a sub-directory remote layout still resolves cleanly', () => {
192+
// Some repos publish under a prefix (e.g. `models/foo.gguf`); the
193+
// local `readdir(outputDir)` is always flat, so the helper compares
194+
// basenames on both sides.
195+
const local = ['model.Q4_K_M.gguf'];
196+
const remote = ['models/model.Q4_K_M.gguf'];
197+
expect(isGgufRepoComplete(local, remote)).toBe(true);
198+
});
199+
200+
it('returns false when the local file list is empty', () => {
201+
// Defensive: a fresh outputDir against a non-empty manifest is
202+
// never complete.
203+
expect(isGgufRepoComplete([], ['model.Q4_K_M.gguf'])).toBe(false);
204+
});
205+
});
206+
207+
describe('isGlobMatchedSetComplete', () => {
208+
it('returns false when only some of the remote glob-matched files are present locally', () => {
209+
// Regression: previously the early-return used `isGlobVariantPresent`,
210+
// which only required AT LEAST ONE local hit. An interrupted prior
211+
// `--glob "*Q4*"` run that fetched one Q4 shard but not the others
212+
// would silently exit as "Matched files already downloaded" while
213+
// leaving the local copy incomplete.
214+
const remote = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf', 'config.json'];
215+
const local = ['model.Q4_0.gguf', 'config.json'];
216+
expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(false);
217+
});
218+
219+
it('returns true when every remote glob-matched file is present locally', () => {
220+
const remote = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf', 'config.json'];
221+
const local = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'config.json'];
222+
expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(true);
223+
});
224+
225+
it('returns false when the remote manifest has no files matching the glob', () => {
226+
// Empty intersection: nothing was supposed to be downloaded.
227+
// Declaring "complete" here would be wrong — the downstream
228+
// "no files matched the given criteria" path handles this case
229+
// after listing available variants.
230+
const remote = ['model.safetensors', 'config.json'];
231+
const local: string[] = [];
232+
expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(false);
233+
});
234+
235+
it('returns false on an empty remote manifest (likely upstream error)', () => {
236+
expect(isGlobMatchedSetComplete(['model.Q4_K_M.gguf'], [], ['*Q4*'])).toBe(false);
237+
});
238+
239+
it('compares basenames so a sub-directory remote layout still resolves cleanly', () => {
240+
// Some repos publish under a prefix (e.g. `models/Q4_K_M.gguf`); the
241+
// local `readdir(outputDir)` is always flat. Mirrors `isGgufRepoComplete`.
242+
const remote = ['models/model.Q4_K_M.gguf'];
243+
const local = ['model.Q4_K_M.gguf'];
244+
expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(true);
245+
});
246+
247+
it('returns false when no glob patterns are provided', () => {
248+
// Defensive: the helper requires at least one pattern to compare against.
249+
expect(isGlobMatchedSetComplete(['model.Q4_K_M.gguf'], ['model.Q4_K_M.gguf'], [])).toBe(false);
250+
});
251+
});
252+
253+
describe('isLocalCopyComplete', () => {
254+
let dir: string;
255+
256+
beforeEach(() => {
257+
dir = mkdtempSync(join(tmpdir(), 'mlx-download-test-'));
258+
});
259+
260+
afterEach(() => {
261+
rmSync(dir, { recursive: true, force: true });
262+
});
263+
264+
it('returns false when the destination file does not exist', () => {
265+
expect(isLocalCopyComplete(join(dir, 'missing.bin'), 100)).toBe(false);
266+
});
267+
268+
it('returns true when the destination exists and size matches', () => {
269+
// Regression: previously the download loop unconditionally called
270+
// copyFile for every file in `filesToDownload`, re-copying gigabytes
271+
// of already-complete shards from the HF cache to outputDir on every
272+
// resume. The skip is gated on size-equality so a single Edit catches
273+
// truncated/interrupted prior copies.
274+
const path = join(dir, 'shard.bin');
275+
writeFileSync(path, 'x'.repeat(100));
276+
expect(isLocalCopyComplete(path, 100)).toBe(true);
277+
});
278+
279+
it('returns false when the destination is truncated (interrupted prior copy)', () => {
280+
// A previous `copyFile` killed mid-write would leave a smaller-than-
281+
// expected file. The size mismatch must trigger a re-copy so the resume
282+
// doesn't ship a corrupt shard to disk.
283+
const path = join(dir, 'shard.bin');
284+
writeFileSync(path, 'x'.repeat(50));
285+
expect(isLocalCopyComplete(path, 100)).toBe(false);
286+
});
287+
288+
it('returns false when the destination is larger than expected (corrupt write)', () => {
289+
const path = join(dir, 'shard.bin');
290+
writeFileSync(path, 'x'.repeat(150));
291+
expect(isLocalCopyComplete(path, 100)).toBe(false);
292+
});
293+
294+
it('falls back to existence-only when expectedSize is non-positive', () => {
295+
// The HF manifest occasionally returns size=0 for tiny metadata files
296+
// or when the expand=true field isn't populated. Existence is the
297+
// best signal we can use without re-fetching the LFS pointer.
298+
const path = join(dir, 'meta.json');
299+
writeFileSync(path, '{}');
300+
expect(isLocalCopyComplete(path, 0)).toBe(true);
301+
expect(isLocalCopyComplete(path, -1)).toBe(true);
302+
});
303+
});

0 commit comments

Comments
 (0)