Replace gated test model with Qwen2.5-Coder-0.5B-Instruct, bump to 0.3.15

codelion · claude · codelion · commit 86d797fe1acf · 2026-05-07T15:43:54.000+08:00
google/gemma-3-270m-it became gated, breaking integration-tests and
conversation-logging-tests in CI. Swap in Qwen/Qwen2.5-Coder-0.5B-Instruct
which is public, instruction-tuned, has a chat_template, no thinking mode,
and works with the existing transformers pin. Verified locally that
test_json_plugin.py (9/9), test_n_parameter.py, test_reasoning_integration.py
(8/8), and test_conversation_logging_server.py (10/10) now pass.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -84,7 +84,7 @@ jobs:
     - name: Start optillm server
       run: |
         echo "Starting optillm server for integration tests..."
-        OPTILLM_API_KEY=optillm python optillm.py --model google/gemma-3-270m-it --port 8000 &
+        OPTILLM_API_KEY=optillm python optillm.py --model Qwen/Qwen2.5-Coder-0.5B-Instruct --port 8000 &
         echo $! > server.pid
         
         # Wait for server to be ready
@@ -179,7 +179,7 @@ jobs:
         echo "Starting optillm server with conversation logging..."
         mkdir -p /tmp/optillm_conversations
         OPTILLM_API_KEY=optillm python optillm.py \
-          --model google/gemma-3-270m-it \
+          --model Qwen/Qwen2.5-Coder-0.5B-Instruct \
           --port 8000 \
           --log-conversations \
           --conversation-log-dir /tmp/optillm_conversations &
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.3.14"
+__version__ = "0.3.15"
 
 # Import from server module
 from .server import (
diff --git a/optillm/plugins/json_plugin.py b/optillm/plugins/json_plugin.py
@@ -22,7 +22,7 @@ def get_device(self):
         else:
             return torch.device("cpu")
 
-    def __init__(self, model_name: str = "google/gemma-3-270m-it"):
+    def __init__(self, model_name: str = "Qwen/Qwen2.5-Coder-0.5B-Instruct"):
         """Initialize the JSON generator with a specific model."""
         self.device = self.get_device()
         logger.info(f"Using device: {self.device}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.3.14"
+version = "0.3.15"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/test_conversation_logging_server.py b/tests/test_conversation_logging_server.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 Server-based integration tests for conversation logging with real model
-Tests conversation logging with actual OptILLM server and google/gemma-3-270m-it model
+Tests conversation logging with actual OptILLM server and Qwen/Qwen2.5-Coder-0.5B-Instruct model
 """
 
 import unittest
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -12,8 +12,8 @@
 from openai import OpenAI
 
 # Standard test model for all tests - small and fast
-TEST_MODEL = "google/gemma-3-270m-it"
-TEST_MODEL_MLX = "mlx-community/gemma-3-270m-it-bf16"
+TEST_MODEL = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+TEST_MODEL_MLX = "mlx-community/Qwen2.5-Coder-0.5B-Instruct-bf16"
 
 def setup_test_env():
     """Set up test environment with local inference"""