|
14 | 14 | #include <stdlib.h> |
15 | 15 | #include <string.h> |
16 | 16 | #include <time.h> |
| 17 | +#include <unistd.h> |
| 18 | +#include <dirent.h> |
17 | 19 |
|
18 | 20 | static void print_usage(const char* prog) { |
19 | 21 | fprintf(stderr, "TQM Converter — Pre-quantize models for instant loading\n\n"); |
20 | | - fprintf(stderr, "Usage: %s <model.safetensors> [tokenizer.json] -o <output.tqm>\n\n", prog); |
| 22 | + fprintf(stderr, "Usage: %s [model.safetensors] [tokenizer.json] [-o output.tqm]\n\n", prog); |
| 23 | + fprintf(stderr, " All arguments are optional — auto-detects Qwen3.5-0.8B from HuggingFace cache.\n\n"); |
21 | 24 | fprintf(stderr, "Options:\n"); |
22 | | - fprintf(stderr, " -o <path> Output .tqm file path (required)\n"); |
23 | | - fprintf(stderr, " -j <threads> Number of threads for quantization (default: 4)\n"); |
| 25 | + fprintf(stderr, " -o <path> Output file (default: model.tqm)\n"); |
| 26 | + fprintf(stderr, " -j <threads> Threads for quantization (default: 4)\n"); |
24 | 27 | fprintf(stderr, " -h, --help Show this help\n"); |
25 | | - fprintf(stderr, "\nThe converter:\n"); |
26 | | - fprintf(stderr, " 1. Loads the safetensors model (BF16/FP32)\n"); |
27 | | - fprintf(stderr, " 2. Quantizes all weights to Q4 (4-bit, ~6x reduction)\n"); |
28 | | - fprintf(stderr, " 3. Writes a .tqm file with the tokenizer embedded\n"); |
29 | | - fprintf(stderr, " 4. The .tqm file can be mmap'd directly — no conversion needed\n"); |
| 28 | + fprintf(stderr, "\nExamples:\n"); |
| 29 | + fprintf(stderr, " %s # auto-detect + convert\n", prog); |
| 30 | + fprintf(stderr, " %s -o qwen.tqm # auto-detect, custom output\n", prog); |
| 31 | + fprintf(stderr, " %s model.safetensors tok.json -o out.tqm # explicit paths\n", prog); |
30 | 32 | } |
31 | 33 |
|
32 | 34 | int main(int argc, char** argv) { |
@@ -57,15 +59,71 @@ int main(int argc, char** argv) { |
57 | 59 | } |
58 | 60 | } |
59 | 61 |
|
| 62 | + /* Auto-detect model from HuggingFace cache if not specified */ |
60 | 63 | if (!model_path) { |
61 | | - fprintf(stderr, "Error: model path required\n"); |
62 | | - print_usage(argv[0]); |
| 64 | + const char* home = getenv("HOME"); |
| 65 | + if (home) { |
| 66 | + static char auto_model[1024]; |
| 67 | + static char auto_tok[1024]; |
| 68 | + /* Try common Qwen3.5-0.8B cache locations */ |
| 69 | + const char* base = "/.cache/huggingface/hub/models--Qwen--Qwen3.5-0.8B/snapshots"; |
| 70 | + snprintf(auto_model, sizeof(auto_model), "%s%s", home, base); |
| 71 | + /* Find snapshot directory */ |
| 72 | + DIR* dir = opendir(auto_model); |
| 73 | + if (dir) { |
| 74 | + struct dirent* ent; |
| 75 | + while ((ent = readdir(dir)) != NULL) { |
| 76 | + if (ent->d_name[0] == '.') continue; |
| 77 | + char try_path[2048]; |
| 78 | + /* Try single-file safetensors */ |
| 79 | + snprintf(try_path, sizeof(try_path), "%s/%s/model.safetensors", |
| 80 | + auto_model, ent->d_name); |
| 81 | + if (access(try_path, R_OK) == 0) { |
| 82 | + snprintf(auto_model, sizeof(auto_model), "%s", try_path); |
| 83 | + model_path = auto_model; |
| 84 | + } |
| 85 | + /* Try multi-shard */ |
| 86 | + if (!model_path) { |
| 87 | + snprintf(try_path, sizeof(try_path), |
| 88 | + "%s/%s/model.safetensors-00001-of-00001.safetensors", |
| 89 | + auto_model, ent->d_name); |
| 90 | + /* auto_model was overwritten, reconstruct */ |
| 91 | + snprintf(auto_model, sizeof(auto_model), "%s%s", home, base); |
| 92 | + snprintf(try_path, sizeof(try_path), |
| 93 | + "%s/%s/model.safetensors-00001-of-00001.safetensors", |
| 94 | + auto_model, ent->d_name); |
| 95 | + if (access(try_path, R_OK) == 0) { |
| 96 | + snprintf(auto_model, sizeof(auto_model), "%s", try_path); |
| 97 | + model_path = auto_model; |
| 98 | + } |
| 99 | + } |
| 100 | + /* Auto-detect tokenizer too */ |
| 101 | + if (model_path && !tokenizer_path) { |
| 102 | + char* last_slash = strrchr(auto_model, '/'); |
| 103 | + if (last_slash) { |
| 104 | + size_t dir_len = last_slash - auto_model; |
| 105 | + snprintf(auto_tok, sizeof(auto_tok), "%.*s/tokenizer.json", |
| 106 | + (int)dir_len, auto_model); |
| 107 | + if (access(auto_tok, R_OK) == 0) { |
| 108 | + tokenizer_path = auto_tok; |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + if (model_path) break; |
| 113 | + } |
| 114 | + closedir(dir); |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + |
| 119 | + if (!model_path) { |
| 120 | + fprintf(stderr, "Error: model not found.\n"); |
| 121 | + fprintf(stderr, " Auto-detect searched ~/.cache/huggingface/hub/models--Qwen--Qwen3.5-0.8B/\n"); |
| 122 | + fprintf(stderr, " Specify manually: %s <model.safetensors> [tokenizer.json] -o output.tqm\n", argv[0]); |
63 | 123 | return 1; |
64 | 124 | } |
65 | 125 | if (!output_path) { |
66 | | - fprintf(stderr, "Error: output path required (-o)\n"); |
67 | | - print_usage(argv[0]); |
68 | | - return 1; |
| 126 | + output_path = "model.tqm"; /* default output name */ |
69 | 127 | } |
70 | 128 |
|
71 | 129 | tq_set_threads(n_threads); |
|
0 commit comments