|
| 1 | +import { mkdtempSync, readdirSync, rmSync, writeFileSync } from 'node:fs'; |
| 2 | +import { tmpdir } from 'node:os'; |
| 3 | +import { join } from 'node:path'; |
| 4 | + |
| 5 | +import { afterEach, beforeEach, describe, expect, it } from 'vite-plus/test'; |
| 6 | + |
| 7 | +import { |
| 8 | + isGgufRepoComplete, |
| 9 | + isGlobMatchedSetComplete, |
| 10 | + isGlobVariantPresent, |
| 11 | + isLocalCopyComplete, |
| 12 | + isModelAlreadyDownloaded, |
| 13 | +} from '../../packages/cli/src/commands/download-model.js'; |
| 14 | + |
| 15 | +describe('isModelAlreadyDownloaded', () => { |
| 16 | + let dir: string; |
| 17 | + |
| 18 | + beforeEach(() => { |
| 19 | + dir = mkdtempSync(join(tmpdir(), 'mlx-download-test-')); |
| 20 | + }); |
| 21 | + |
| 22 | + afterEach(() => { |
| 23 | + rmSync(dir, { recursive: true, force: true }); |
| 24 | + }); |
| 25 | + |
| 26 | + function write(name: string, contents: string): void { |
| 27 | + writeFileSync(join(dir, name), contents); |
| 28 | + } |
| 29 | + |
| 30 | + it('returns false when config.json is missing', () => { |
| 31 | + write('model.safetensors', 'x'); |
| 32 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false); |
| 33 | + }); |
| 34 | + |
| 35 | + it('returns true for a single-file safetensors model with config', () => { |
| 36 | + write('config.json', '{}'); |
| 37 | + write('model.safetensors', 'x'); |
| 38 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true); |
| 39 | + }); |
| 40 | + |
| 41 | + it('returns true for a Paddle model (inference.pdiparams) with config', () => { |
| 42 | + write('config.json', '{}'); |
| 43 | + write('inference.pdiparams', 'x'); |
| 44 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true); |
| 45 | + }); |
| 46 | + |
| 47 | + it('returns false when a sharded index references shards that are missing on disk', () => { |
| 48 | + // Regression: previously the early-return only checked that |
| 49 | + // model.safetensors.index.json was present. An interrupted prior |
| 50 | + // download that landed the index but not all shards would silently |
| 51 | + // be declared "already downloaded". |
| 52 | + write('config.json', '{}'); |
| 53 | + write( |
| 54 | + 'model.safetensors.index.json', |
| 55 | + JSON.stringify({ |
| 56 | + metadata: { total_size: 12345 }, |
| 57 | + weight_map: { |
| 58 | + 'layer.0.weight': 'model-00001-of-00002.safetensors', |
| 59 | + 'layer.1.weight': 'model-00002-of-00002.safetensors', |
| 60 | + }, |
| 61 | + }), |
| 62 | + ); |
| 63 | + // Only the first shard exists; the second is missing. |
| 64 | + write('model-00001-of-00002.safetensors', 'shard-1'); |
| 65 | + |
| 66 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false); |
| 67 | + }); |
| 68 | + |
| 69 | + it('returns true for a sharded model when ALL referenced shards exist', () => { |
| 70 | + write('config.json', '{}'); |
| 71 | + write( |
| 72 | + 'model.safetensors.index.json', |
| 73 | + JSON.stringify({ |
| 74 | + metadata: { total_size: 12345 }, |
| 75 | + weight_map: { |
| 76 | + 'layer.0.weight': 'model-00001-of-00002.safetensors', |
| 77 | + 'layer.1.weight': 'model-00002-of-00002.safetensors', |
| 78 | + 'layer.2.weight': 'model-00002-of-00002.safetensors', // duplicate target dedups |
| 79 | + }, |
| 80 | + }), |
| 81 | + ); |
| 82 | + write('model-00001-of-00002.safetensors', 'shard-1'); |
| 83 | + write('model-00002-of-00002.safetensors', 'shard-2'); |
| 84 | + |
| 85 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true); |
| 86 | + }); |
| 87 | + |
| 88 | + it('returns false when the index file is malformed JSON', () => { |
| 89 | + write('config.json', '{}'); |
| 90 | + write('model.safetensors.index.json', '{not json'); |
| 91 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false); |
| 92 | + }); |
| 93 | + |
| 94 | + it('returns false when the index file lacks weight_map', () => { |
| 95 | + write('config.json', '{}'); |
| 96 | + write('model.safetensors.index.json', JSON.stringify({ metadata: { total_size: 0 } })); |
| 97 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false); |
| 98 | + }); |
| 99 | + |
| 100 | + it('returns false when weight_map is empty', () => { |
| 101 | + write('config.json', '{}'); |
| 102 | + write('model.safetensors.index.json', JSON.stringify({ weight_map: {} })); |
| 103 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(false); |
| 104 | + }); |
| 105 | + |
| 106 | + it('still considers single-file safetensors complete even alongside an unverified index', () => { |
| 107 | + // If both `model.safetensors` and `model.safetensors.index.json` are |
| 108 | + // present, the single file wins — no need to parse the index. |
| 109 | + write('config.json', '{}'); |
| 110 | + write('model.safetensors', 'x'); |
| 111 | + write('model.safetensors.index.json', JSON.stringify({ weight_map: { x: 'never-existed.safetensors' } })); |
| 112 | + expect(isModelAlreadyDownloaded(dir, readdirSync(dir))).toBe(true); |
| 113 | + }); |
| 114 | +}); |
| 115 | + |
| 116 | +describe('isGlobVariantPresent', () => { |
| 117 | + it('returns false when no patterns are provided', () => { |
| 118 | + expect(isGlobVariantPresent(['config.json', 'tokenizer.json', 'model.Q8_0.gguf'], [])).toBe(false); |
| 119 | + }); |
| 120 | + |
| 121 | + it('returns false when a prior Q8 download leaves only CORE_FILES + a non-matching gguf', () => { |
| 122 | + // Regression: previously the early-return counted CORE_FILES toward |
| 123 | + // the "matched" set, so any prior gguf download (which lays down |
| 124 | + // config.json + tokenizer.json) auto-satisfied the >1 threshold and |
| 125 | + // a fresh `--glob "*Q4*"` exited as "already downloaded" without |
| 126 | + // ever fetching the Q4 weights. The helper must look ONLY at user- |
| 127 | + // glob matches. |
| 128 | + const files = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'model.Q8_0.gguf']; |
| 129 | + expect(isGlobVariantPresent(files, ['*Q4*'])).toBe(false); |
| 130 | + }); |
| 131 | + |
| 132 | + it('returns true when an existing file matches one of the glob patterns', () => { |
| 133 | + const files = ['config.json', 'tokenizer.json', 'model.Q4_K_M.gguf']; |
| 134 | + expect(isGlobVariantPresent(files, ['*Q4*'])).toBe(true); |
| 135 | + }); |
| 136 | + |
| 137 | + it('returns true when ANY pattern matches (multi-glob OR semantics)', () => { |
| 138 | + const files = ['config.json', 'model.Q8_0.gguf']; |
| 139 | + expect(isGlobVariantPresent(files, ['*Q4*', '*Q8*'])).toBe(true); |
| 140 | + }); |
| 141 | + |
| 142 | + it('returns false when no file matches any pattern (CORE_FILES alone do not count)', () => { |
| 143 | + const files = ['config.json', 'tokenizer.json', 'tokenizer_config.json']; |
| 144 | + expect(isGlobVariantPresent(files, ['*BF16*'])).toBe(false); |
| 145 | + }); |
| 146 | + |
| 147 | + it('matches case-insensitively (gguf repos vary in capitalization)', () => { |
| 148 | + expect(isGlobVariantPresent(['model.q4_k_m.gguf'], ['*Q4_K_M*'])).toBe(true); |
| 149 | + expect(isGlobVariantPresent(['model.Q4_K_M.gguf'], ['*q4_k_m*'])).toBe(true); |
| 150 | + }); |
| 151 | +}); |
| 152 | + |
| 153 | +describe('isGgufRepoComplete', () => { |
| 154 | + it('returns false when only some of the remote GGUF variants are present locally', () => { |
| 155 | + // Regression: previously a no-glob re-run after an interrupted |
| 156 | + // download (e.g. only Q2_K landed) silently exited as "already |
| 157 | + // downloaded" because the early-return only checked |
| 158 | + // `files.some(.gguf)`. The fix compares against the remote |
| 159 | + // manifest and refuses to short-circuit until every advertised |
| 160 | + // GGUF variant is on disk. |
| 161 | + const local = ['model.Q2_K.gguf', 'config.json']; |
| 162 | + const remote = ['model.Q2_K.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf']; |
| 163 | + expect(isGgufRepoComplete(local, remote)).toBe(false); |
| 164 | + }); |
| 165 | + |
| 166 | + it('returns true when every remote GGUF variant is present locally', () => { |
| 167 | + const local = ['model.Q4_K_M.gguf', 'config.json']; |
| 168 | + const remote = ['model.Q4_K_M.gguf']; |
| 169 | + expect(isGgufRepoComplete(local, remote)).toBe(true); |
| 170 | + }); |
| 171 | + |
| 172 | + it('returns false when the remote repo is not a GGUF repo (no .gguf files in manifest)', () => { |
| 173 | + // Caller should route through `isModelAlreadyDownloaded` for |
| 174 | + // safetensors / Paddle repos. A `false` return here tells the |
| 175 | + // caller "do not take the GGUF early-return branch". |
| 176 | + const local = ['model.safetensors', 'config.json']; |
| 177 | + const remote = ['model.safetensors', 'config.json', 'tokenizer.json']; |
| 178 | + expect(isGgufRepoComplete(local, remote)).toBe(false); |
| 179 | + }); |
| 180 | + |
| 181 | + it('returns false on an empty remote manifest (likely upstream error)', () => { |
| 182 | + // An empty manifest is almost certainly a network / auth failure |
| 183 | + // rather than a legitimate empty repo. Returning false routes the |
| 184 | + // caller through the download loop where the real error will |
| 185 | + // surface (404 / auth) instead of being masked as "already |
| 186 | + // downloaded". |
| 187 | + expect(isGgufRepoComplete(['model.Q4_K_M.gguf'], [])).toBe(false); |
| 188 | + expect(isGgufRepoComplete([], [])).toBe(false); |
| 189 | + }); |
| 190 | + |
| 191 | + it('compares basenames so a sub-directory remote layout still resolves cleanly', () => { |
| 192 | + // Some repos publish under a prefix (e.g. `models/foo.gguf`); the |
| 193 | + // local `readdir(outputDir)` is always flat, so the helper compares |
| 194 | + // basenames on both sides. |
| 195 | + const local = ['model.Q4_K_M.gguf']; |
| 196 | + const remote = ['models/model.Q4_K_M.gguf']; |
| 197 | + expect(isGgufRepoComplete(local, remote)).toBe(true); |
| 198 | + }); |
| 199 | + |
| 200 | + it('returns false when the local file list is empty', () => { |
| 201 | + // Defensive: a fresh outputDir against a non-empty manifest is |
| 202 | + // never complete. |
| 203 | + expect(isGgufRepoComplete([], ['model.Q4_K_M.gguf'])).toBe(false); |
| 204 | + }); |
| 205 | +}); |
| 206 | + |
| 207 | +describe('isGlobMatchedSetComplete', () => { |
| 208 | + it('returns false when only some of the remote glob-matched files are present locally', () => { |
| 209 | + // Regression: previously the early-return used `isGlobVariantPresent`, |
| 210 | + // which only required AT LEAST ONE local hit. An interrupted prior |
| 211 | + // `--glob "*Q4*"` run that fetched one Q4 shard but not the others |
| 212 | + // would silently exit as "Matched files already downloaded" while |
| 213 | + // leaving the local copy incomplete. |
| 214 | + const remote = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf', 'config.json']; |
| 215 | + const local = ['model.Q4_0.gguf', 'config.json']; |
| 216 | + expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(false); |
| 217 | + }); |
| 218 | + |
| 219 | + it('returns true when every remote glob-matched file is present locally', () => { |
| 220 | + const remote = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'model.Q8_0.gguf', 'config.json']; |
| 221 | + const local = ['model.Q4_0.gguf', 'model.Q4_K_M.gguf', 'config.json']; |
| 222 | + expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(true); |
| 223 | + }); |
| 224 | + |
| 225 | + it('returns false when the remote manifest has no files matching the glob', () => { |
| 226 | + // Empty intersection: nothing was supposed to be downloaded. |
| 227 | + // Declaring "complete" here would be wrong — the downstream |
| 228 | + // "no files matched the given criteria" path handles this case |
| 229 | + // after listing available variants. |
| 230 | + const remote = ['model.safetensors', 'config.json']; |
| 231 | + const local: string[] = []; |
| 232 | + expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(false); |
| 233 | + }); |
| 234 | + |
| 235 | + it('returns false on an empty remote manifest (likely upstream error)', () => { |
| 236 | + expect(isGlobMatchedSetComplete(['model.Q4_K_M.gguf'], [], ['*Q4*'])).toBe(false); |
| 237 | + }); |
| 238 | + |
| 239 | + it('compares basenames so a sub-directory remote layout still resolves cleanly', () => { |
| 240 | + // Some repos publish under a prefix (e.g. `models/Q4_K_M.gguf`); the |
| 241 | + // local `readdir(outputDir)` is always flat. Mirrors `isGgufRepoComplete`. |
| 242 | + const remote = ['models/model.Q4_K_M.gguf']; |
| 243 | + const local = ['model.Q4_K_M.gguf']; |
| 244 | + expect(isGlobMatchedSetComplete(local, remote, ['*Q4*'])).toBe(true); |
| 245 | + }); |
| 246 | + |
| 247 | + it('returns false when no glob patterns are provided', () => { |
| 248 | + // Defensive: the helper requires at least one pattern to compare against. |
| 249 | + expect(isGlobMatchedSetComplete(['model.Q4_K_M.gguf'], ['model.Q4_K_M.gguf'], [])).toBe(false); |
| 250 | + }); |
| 251 | +}); |
| 252 | + |
| 253 | +describe('isLocalCopyComplete', () => { |
| 254 | + let dir: string; |
| 255 | + |
| 256 | + beforeEach(() => { |
| 257 | + dir = mkdtempSync(join(tmpdir(), 'mlx-download-test-')); |
| 258 | + }); |
| 259 | + |
| 260 | + afterEach(() => { |
| 261 | + rmSync(dir, { recursive: true, force: true }); |
| 262 | + }); |
| 263 | + |
| 264 | + it('returns false when the destination file does not exist', () => { |
| 265 | + expect(isLocalCopyComplete(join(dir, 'missing.bin'), 100)).toBe(false); |
| 266 | + }); |
| 267 | + |
| 268 | + it('returns true when the destination exists and size matches', () => { |
| 269 | + // Regression: previously the download loop unconditionally called |
| 270 | + // copyFile for every file in `filesToDownload`, re-copying gigabytes |
| 271 | + // of already-complete shards from the HF cache to outputDir on every |
| 272 | + // resume. The skip is gated on size-equality so a single Edit catches |
| 273 | + // truncated/interrupted prior copies. |
| 274 | + const path = join(dir, 'shard.bin'); |
| 275 | + writeFileSync(path, 'x'.repeat(100)); |
| 276 | + expect(isLocalCopyComplete(path, 100)).toBe(true); |
| 277 | + }); |
| 278 | + |
| 279 | + it('returns false when the destination is truncated (interrupted prior copy)', () => { |
| 280 | + // A previous `copyFile` killed mid-write would leave a smaller-than- |
| 281 | + // expected file. The size mismatch must trigger a re-copy so the resume |
| 282 | + // doesn't ship a corrupt shard to disk. |
| 283 | + const path = join(dir, 'shard.bin'); |
| 284 | + writeFileSync(path, 'x'.repeat(50)); |
| 285 | + expect(isLocalCopyComplete(path, 100)).toBe(false); |
| 286 | + }); |
| 287 | + |
| 288 | + it('returns false when the destination is larger than expected (corrupt write)', () => { |
| 289 | + const path = join(dir, 'shard.bin'); |
| 290 | + writeFileSync(path, 'x'.repeat(150)); |
| 291 | + expect(isLocalCopyComplete(path, 100)).toBe(false); |
| 292 | + }); |
| 293 | + |
| 294 | + it('falls back to existence-only when expectedSize is non-positive', () => { |
| 295 | + // The HF manifest occasionally returns size=0 for tiny metadata files |
| 296 | + // or when the expand=true field isn't populated. Existence is the |
| 297 | + // best signal we can use without re-fetching the LFS pointer. |
| 298 | + const path = join(dir, 'meta.json'); |
| 299 | + writeFileSync(path, '{}'); |
| 300 | + expect(isLocalCopyComplete(path, 0)).toBe(true); |
| 301 | + expect(isLocalCopyComplete(path, -1)).toBe(true); |
| 302 | + }); |
| 303 | +}); |
0 commit comments