quantcpp 0.9.2: add Llama-3.2-1B to model registry

unamedkr · claude · unamedkr · commit 6b781eb29291 · 2026-04-09T18:10:41.000+09:00
Model.from_pretrained("Llama-3.2-1B") auto-downloads the Q4_K_M GGUF
(~750 MB) from hugging-quants on HuggingFace. Much better response
quality than the 135M starter model — suitable for Reddit demos and
first-impression showcases.

README quick start now defaults to Llama-3.2-1B with SmolLM2 as a
smaller alternative. Also adds quantcpp.available_models() helper.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
@@ -35,8 +35,9 @@ pip install quantcpp
 ```python
 from quantcpp import Model
 
-# Downloads a small model automatically (~135 MB, one-time)
-m = Model.from_pretrained("SmolLM2-135M")
+# Downloads a model automatically (one-time, cached)
+m = Model.from_pretrained("Llama-3.2-1B")   # ~750 MB, good quality
+# m = Model.from_pretrained("SmolLM2-135M") # ~135 MB, fastest download
 print(m.ask("What is gravity?"))
 ```
 
diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "quantcpp"
-version = "0.9.1"
+version = "0.9.2"
 description = "Single-header LLM inference engine with KV cache compression (7× compression at fp32 parity)"
 readme = "README.md"
 license = { text = "Apache-2.0" }
diff --git a/bindings/python/quantcpp/__init__.py b/bindings/python/quantcpp/__init__.py
@@ -19,7 +19,7 @@
     from importlib.metadata import version as _pkg_version
     __version__ = _pkg_version("quantcpp")
 except Exception:
-    __version__ = "0.9.1"  # fallback for editable / source-tree imports
+    __version__ = "0.9.2"  # fallback for editable / source-tree imports
 
 import os
 import sys
@@ -53,8 +53,17 @@
         "smollm2-135m-instruct-q8_0.gguf",
         135,
     ),
+    "Llama-3.2-1B": (
+        "hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
+        "llama-3.2-1b-instruct-q4_k_m.gguf",
+        750,
+    ),
 }
 
+def available_models():
+    """List available model names for ``from_pretrained``."""
+    return sorted(_MODEL_REGISTRY.keys())
+
 
 def _download_with_progress(url: str, dest: Path, desc: str) -> None:
     """Download a file with a tqdm-free progress bar (stdlib only)."""