Merge pull request #27 from codewithdark-git/copilot/add-flexible-model-registration

codewithdark-git · web-flow · commit c32c63d139a8 · 2026-04-25T18:41:15.000+05:00
Add architecture registration + fallback loading path for newly released HF model types
diff --git a/README.md b/README.md
@@ -348,6 +348,14 @@ pytest
 - 📚 Documentation
 - 🐛 Bug fixes
 
+**Quick template for new architecture support:**
+```python
+from quantllm import register_architecture, turbo
+
+register_architecture("new-arch", base_model_type="llama")
+model = turbo("org/new-arch-7b", base_model_fallback=True, trust_remote_code=True)
+```
+
 ---
 
 ## 📜 License
diff --git a/docs/guide/loading-models.md b/docs/guide/loading-models.md
@@ -74,6 +74,59 @@ model = turbo(
 )
 ```
 
+### New Architecture Fallbacks (for very recent model releases)
+
+If `transformers` does not recognize a just-released architecture yet, register a fallback family:
+
+```python
+from quantllm import turbo, register_architecture
+
+# Map new architecture/model_type to a compatible base family
+register_architecture("newmodel", base_model_type="llama")
+
+model = turbo(
+    "new-model-org/NewModel-7B",
+    model_type_override="llama",     # optional explicit override
+    base_model_fallback=True,        # enabled by default; can be disabled
+    trust_remote_code=True,
+)
+```
+
+> ⚠️ **Security note:** `trust_remote_code=True` executes model-provided code.
+> Only enable it for trusted publishers, especially when loading unregistered or very new architectures.
+
+You can also load from config only (no checkpoint weights) while waiting for upstream support:
+
+```python
+model = turbo(
+    "new-model-org/NewModel-7B",
+    from_config_only=True,
+    trust_remote_code=True,
+)
+```
+
+#### Fast contribution template for new architectures
+
+1. Add a registration in your code or PR:
+   - `register_architecture("new-arch", base_model_type="llama")`
+2. Validate loading with:
+   - `turbo("org/model", base_model_fallback=True, trust_remote_code=True)`
+3. Add/extend a focused test in `tests/test_architecture_fallback.py`.
+
+#### Real-world style "released yesterday" example
+
+```python
+from quantllm import turbo, register_architecture
+
+# Example: transformers doesn't recognize Qwen3 yet
+register_architecture("qwen3", base_model_type="qwen2")
+
+model = turbo(
+    "Qwen/Qwen3-8B",
+    trust_remote_code=True,
+)
+```
+
 ### Memory Options
 
 ```python
diff --git a/quantllm/__init__.py b/quantllm/__init__.py
@@ -35,6 +35,7 @@
 from .core import (
     turbo,
     TurboModel,
+    register_architecture,
     SmartConfig,
     HardwareProfiler,
     ModelAnalyzer,
@@ -117,6 +118,7 @@ def show_banner(force: bool = False):
     # Main API
     "turbo",
     "TurboModel",
+    "register_architecture",
     "SmartConfig",
     "HardwareProfiler",
     "ModelAnalyzer",
diff --git a/quantllm/core/__init__.py b/quantllm/core/__init__.py
@@ -8,7 +8,7 @@
 from .hardware import HardwareProfiler
 from .smart_config import SmartConfig
 from .model_analyzer import ModelAnalyzer
-from .turbo_model import TurboModel, turbo
+from .turbo_model import TurboModel, turbo, register_architecture
 from .compilation import (
     compile_model,
     compile_for_inference,
@@ -51,6 +51,7 @@
     "ModelAnalyzer",
     "TurboModel",
     "turbo",
+    "register_architecture",
     # Compilation
     "compile_model",
     "compile_for_inference",
diff --git a/quantllm/core/turbo_model.py b/quantllm/core/turbo_model.py
diff --git a/tests/test_architecture_fallback.py b/tests/test_architecture_fallback.py