GetStream · Nash0x7E2 · Apr 3, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/conftest.py b/conftest.py
@@ -6,8 +6,11 @@
 """
 
 import asyncio
+import importlib.util
 import logging
 import os
+import platform
+import sys
 from typing import Iterator
 
 import av
@@ -26,6 +29,24 @@
 load_dotenv()
 
 
+requires_mlx = pytest.mark.skipif(
+    sys.platform != "darwin"
+    or platform.machine() != "arm64"
+    or importlib.util.find_spec("mlx_lm") is None,
+    reason="MLX tests require Apple Silicon with mlx-lm installed",
+)
+"""Skip marker for tests that require MLX (Apple Silicon only)."""
+
+
+requires_mlx_vlm = pytest.mark.skipif(
+    sys.platform != "darwin"
+    or platform.machine() != "arm64"
+    or importlib.util.find_spec("mlx_vlm") is None,
+    reason="MLX-VLM tests require Apple Silicon with mlx-vlm installed",
+)
+"""Skip marker for tests that require MLX-VLM (Apple Silicon only)."""
+
+
 def skip_blockbuster(func_or_class):
     """Decorator to skip blockbuster checks for a test function or class.
 

diff --git a/examples/12_resale_advisor_example/pyproject.toml b/examples/12_resale_advisor_example/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "resale-advisor-example"
+version = "0.0.0"
+requires-python = ">=3.10"
+
+dependencies = [
+  "python-dotenv>=1.0",
+  "vision-agents-plugins-huggingface[mlx-vlm]",
+  "vision-agents-plugins-getstream",
+  "vision-agents-plugins-deepgram",
+  "vision-agents",
+]
+
+[tool.uv.sources]
+"vision-agents-plugins-huggingface" = {path = "../../plugins/huggingface", editable=true}
+"vision-agents-plugins-getstream" = {path = "../../plugins/getstream", editable=true}
+"vision-agents-plugins-deepgram" = {path = "../../plugins/deepgram", editable=true}
+"vision-agents" = {path = "../../agents-core", editable=true}
diff --git a/examples/12_resale_advisor_example/resale_advisor_example.py b/examples/12_resale_advisor_example/resale_advisor_example.py
@@ -0,0 +1,77 @@
+"""
+Resale Advisor with Gemma 4 - Local VLM Agent (MLX)
+
+A real-time resale advisor powered by Gemma 4 E4B running on Apple Silicon via
+MLX. Demonstrates how to build a multimodal AI agent that can see an item on
+camera, discuss its condition, and provide resale-oriented guidance with voice:
+
+- Gemma 4 E4B (8-bit quantized) via mlx-vlm for vision-language inference
+- Deepgram for speech-to-text and text-to-speech
+- GetStream for real-time communication
+
+The user speaks naturally and the agent responds with voice, describing the
+item, asking clarifying questions when needed, and giving a rough resale view.
+
+Requirements:
+- STREAM_API_KEY and STREAM_API_SECRET environment variables
+- DEEPGRAM_API_KEY environment variable
+- Apple Silicon Mac with 16GB+ unified memory
+
+First run will download the MLX model (~8GB).
+"""
+
+import asyncio
+import logging
+
+from dotenv import load_dotenv
+from vision_agents.core import Agent, Runner, User
+from vision_agents.core.agents import AgentLauncher
+from vision_agents.plugins import deepgram, getstream, huggingface
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+SYSTEM_PROMPT = (
+    "You are a resale advisor running on a local Gemma 4 model. "
+    "You can see the user's camera feed. Identify the item, comment on visible "
+    "condition, ask for age or brand details when needed, and give a cautious "
+    "resale estimate or range when the user asks. Speak naturally, with no "
+    "lists or formatting. Never use emojis or special characters. Keep "
+    "responses under 60 words and be explicit when you are uncertain."
+)
+
+
+async def create_agent(**kwargs) -> Agent:
+    """Create a resale advisor agent with Gemma 4 VLM."""
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Resale Advisor", id="agent"),
+        instructions=SYSTEM_PROMPT,
+        llm=huggingface.MlxVLM(
+            model="mlx-community/gemma-4-e4b-it-8bit",
+            max_new_tokens=150,
+        ),
+        tts=deepgram.TTS(),
+        stt=deepgram.STT(),
+    )
+
+    return agent
+
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
+    """Join the call and run the agent."""
+    call = await agent.create_call(call_type, call_id)
+
+    logger.info("Starting Resale Advisor...")
+
+    async with agent.join(call):
+        await asyncio.sleep(2)
+        await agent.llm.simple_response(
+            text="Greet the user briefly. Tell them you can inspect items on camera and help with resale guidance.",
+        )
+        await agent.finish()
+
+
+if __name__ == "__main__":
+    Runner(AgentLauncher(create_agent=create_agent, join_call=join_call)).cli()
diff --git a/plugins/huggingface/pyproject.toml b/plugins/huggingface/pyproject.toml
@@ -12,12 +12,12 @@ requires-python = ">=3.10"
 license = "MIT"
 dependencies = [
     "vision-agents",
-    "huggingface_hub<1.0",
+    "huggingface_hub>=0.20.0,<2",
 ]
 
 [project.optional-dependencies]
 transformers = [
-    "transformers>=4.45.0,<5",
+    "transformers>=5.3.0,<6",
     "torch>=2.0.0,<3",
     "accelerate>=0.25.0,<2",
     "supervision>=0.21.0,<1",
@@ -27,6 +27,16 @@ transformers-quantized = [
     "vision-agents-plugins-huggingface[transformers]",
     "bitsandbytes>=0.41.0",
 ]
+mlx = [
+    "mlx>=0.22.0 ; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx-lm>=0.22.0 ; sys_platform == 'darwin' and platform_machine == 'arm64'",
+]
+mlx-vlm = [
+    "mlx>=0.22.0 ; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx-vlm>=0.4.0 ; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "av",
+    "aiortc",
+]
 
 [project.urls]
 Documentation = "https://visionagents.ai/"

diff --git a/plugins/huggingface/tests/test_transformers_vlm.py b/plugins/huggingface/tests/test_transformers_vlm.py
@@ -155,7 +155,7 @@ async def test_processor_fallback(self, vlm):
         }
 
         messages = [{"role": "user", "content": "describe this"}]
-        result = vlm._build_processor_inputs(messages, [])
+        result = vlm._build_processor_inputs(processor, messages, [], None)
         assert "input_ids" in result
 
         call_kwargs = processor.call_args.kwargs
@@ -174,7 +174,7 @@ async def test_build_processor_inputs_passes_tools(self, vlm):
             }
         ]
         messages = [{"role": "user", "content": "hi"}]
-        vlm._build_processor_inputs(messages, [], tools)
+        vlm._build_processor_inputs(vlm._resources.processor, messages, [], tools)
 
         call_kwargs = vlm._resources.processor.apply_chat_template.call_args.kwargs
         assert call_kwargs["tools"] is tools
@@ -205,7 +205,7 @@ def _side_effect(*args, **kwargs):
             }
         ]
         result = vlm._build_processor_inputs(
-            [{"role": "user", "content": "hi"}], [], tools
+            vlm._resources.processor, [{"role": "user", "content": "hi"}], [], tools
         )
         assert "input_ids" in result
         assert call_count == 2

diff --git a/plugins/huggingface/vision_agents/plugins/huggingface/__init__.py b/plugins/huggingface/vision_agents/plugins/huggingface/__init__.py
@@ -1,3 +1,7 @@
+from importlib import import_module
+
+import warnings
+
 from .events import DetectionCompletedEvent
 from .huggingface_llm import HuggingFaceLLM as LLM
 from .huggingface_vlm import HuggingFaceVLM as VLM
@@ -11,8 +15,6 @@
 
     __all__ += ["TransformersDetectionProcessor", "TransformersLLM", "TransformersVLM"]
 except ImportError as e:
-    import warnings
-
     optional = {"torch", "transformers", "av", "aiortc", "jinja2", "supervision", "cv2"}
     if e.name in optional:
         warnings.warn(
@@ -22,3 +24,40 @@
         )
     else:
         raise
+
+
+def _is_mlx_import_error(exc: ImportError) -> bool:
+    if exc.name in {"mlx", "mlx_lm", "mlx_vlm", "mlx.core"}:
+        return True
+    return exc.name is None and "mlx" in str(exc).lower()
+
+
+__all__ += ["MlxLLM", "MlxVLM"]
+
+
+def __getattr__(name: str):
+    if name == "MlxLLM":
+        try:
+            return import_module(".mlx_llm", __name__).MlxLLM
+        except ImportError as e:
+            if _is_mlx_import_error(e):
+                warnings.warn(
+                    "MLX is not available on this platform. "
+                    "Install the [mlx] extra on Apple Silicon to enable MLX plugins.",
+                    stacklevel=2,
+                )
+            raise
+
+    if name == "MlxVLM":
+        try:
+            return import_module(".mlx_vlm", __name__).MlxVLM
+        except ImportError as e:
+            if _is_mlx_import_error(e) or e.name in {"av", "aiortc"}:
+                warnings.warn(
+                    "MLX-VLM is not available on this platform. "
+                    "Install the [mlx-vlm] extra on Apple Silicon to enable MLX VLM plugins.",
+                    stacklevel=2,
+                )
+            raise
+
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")