|
2 | 2 | source ~/.bash_profile |
3 | 3 |
|
4 | 4 | # Configuration |
5 | | -MAX_DIFF_CHARS=600 # stripped +/- lines only — keeps 1.5B prefill fast |
6 | | -TIMEOUT_SECONDS=60 # 60s covers multi-file commits on the 1.5B model |
| 5 | +MAX_DIFF_CHARS=300 # ~75 input tokens — halves prefill vs 600 |
| 6 | +TIMEOUT_SECONDS=30 # 7B INT4 generates 15 tokens well under 10s warm |
7 | 7 | MAX_COMMIT_LENGTH=72 # Standard git commit length |
8 | 8 |
|
9 | 9 | # Squish model selection. |
@@ -198,8 +198,16 @@ if [ -n "$SQUISH_BIN" ]; then |
198 | 198 | print_info "squish server: ${GREEN}already running on :$_port${NC}" |
199 | 199 | else |
200 | 200 | print_info "squish server: ${YELLOW}not running — starting it now…${NC}" |
| 201 | + # Draft model for speculative decoding — 1.5B drafts tokens, 7B verifies in |
| 202 | + # one forward pass. ~2-3x generation speedup; same output quality as 7B alone. |
| 203 | + _draft_model="${SQUISH_MODELS_DIR}/Qwen2.5-1.5B-Instruct-bf16-int8-bak" |
| 204 | + _draft_flag="" |
| 205 | + [ -d "$_draft_model" ] && _draft_flag="--draft-model $_draft_model" |
201 | 206 | # Start the server in the background and wait for it |
202 | | - $SQUISH_BIN serve ${SQUISH_MODEL:+--model $SQUISH_MODEL} --port "$_port" > /tmp/squish_serve.log 2>&1 & |
| 207 | + $SQUISH_BIN serve ${SQUISH_MODEL:+--model $SQUISH_MODEL} --port "$_port" \ |
| 208 | + --kv-cache-mode int8 \ |
| 209 | + $_draft_flag \ |
| 210 | + > /tmp/squish_serve.log 2>&1 & |
203 | 211 | _serve_pid=$! |
204 | 212 | # Run snake spinner in background while polling for server readiness |
205 | 213 | snake_spinner "Starting squish server" & |
@@ -235,9 +243,9 @@ if [ -n "$SQUISH_BIN" ]; then |
235 | 243 | ' | head -c "$MAX_DIFF_CHARS") |
236 | 244 |
|
237 | 245 | # Build JSON payload in pure bash — _json_str escapes all special chars |
238 | | - _sys="You are a git commit message writer. Read the diff and write ONE concise commit message describing what actually changed. Reply with ONLY the commit message — no labels, no filenames, no markdown, no period. Must be a complete thought under 72 characters. Imperative mood (e.g. 'Add', 'Fix', 'Update', 'Remove')." |
239 | | - _usr="Files: ${changed_names}\nStat: ${stat_summary}\n\nChanged lines:\n${stripped_diff}\n--- END DIFF ---\n\nCommit message (imperative, < 72 chars):" |
240 | | - PAYLOAD='{"model":"squish","messages":[{"role":"system","content":"'"$(_json_str "$_sys")"'"},{"role":"user","content":"'"$(_json_str "$_usr")"'"}],"max_tokens":50,"temperature":0.2,"stream":false,"stop":["\n","\r"]}' |
| 246 | + _sys="Write a git commit message. Imperative mood, under 72 chars, no punctuation. Reply with ONLY the message." |
| 247 | + _usr="Files: ${changed_names}\nStat: ${stat_summary}\nDiff:\n${stripped_diff}" |
| 248 | + PAYLOAD='{"model":"squish","messages":[{"role":"system","content":"'"$(_json_str "$_sys")"'"},{"role":"user","content":"'"$(_json_str "$_usr")"'"}],"max_tokens":20,"temperature":0.2,"stream":false,"stop":["\n","\r"]}' |
241 | 249 |
|
242 | 250 | # Run squish — curl in background, spinner inline in foreground (no subprocess) |
243 | 251 | print_step "Asking AI for commit message (Squish local LLM)..." |
|
0 commit comments