fix: block MetalRT on M1/M2 with clear error (requires Apple M3+ for Metal 3.1)

shubhammalhotra28 · shubhammalhotra28 · commit fdc253811842 · 2026-03-10T08:34:51.000-07:00
MetalRT shaders use bfloat16 and -std=metal3.1, which requires Apple GPU
Family 9 (M3 or later). M1/M2 chips cannot load the metallib at all.

Added gpu_supported() check to every MetalRT entry point:
- metalrt_loader load()/install(): rejects early with clear message
- rcli setup: auto-selects llama.cpp on M1/M2, hides MetalRT choice
- rcli engine metalrt: prints "requires M3+" error
- rcli metalrt install: same check
- TUI engine switcher: shows "Requires Apple M3 or later" label
- rcli_init: graceful fallback to llama.cpp with warning

Made-with: Cursor
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,7 +5,7 @@ if(POLICY CMP0177)
 endif()
 set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
 
-project(rcli VERSION 0.2.4 LANGUAGES C CXX)
+project(rcli VERSION 0.2.5 LANGUAGES C CXX)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
diff --git a/src/api/rcli_api.cpp b/src/api/rcli_api.cpp
@@ -334,6 +334,11 @@ int rcli_init(RCLIHandle handle, const char* models_dir, int gpu_layers) {
     // --- MetalRT (optional, based on user engine preference) ---
     {
         std::string engine_pref = rcli::read_engine_preference();
+        if (engine_pref == "metalrt" && !rastack::MetalRTLoader::gpu_supported()) {
+            LOG_WARN("RCLI", "MetalRT requires Apple M3+ (Metal 3.1). Falling back to llama.cpp.");
+            fprintf(stderr, "  MetalRT requires Apple M3 or later. Falling back to llama.cpp.\n");
+            engine_pref = "llamacpp";
+        }
         if (engine_pref == "metalrt") {
             auto& mrt_loader = rastack::MetalRTLoader::instance();
             if (mrt_loader.is_available()) {
diff --git a/src/cli/main.cpp b/src/cli/main.cpp
@@ -590,6 +590,13 @@ static int cmd_rag(const Args& args) {
 
 static int cmd_metalrt(const Args& args) {
     if (args.arg1 == "install") {
+        if (!rastack::MetalRTLoader::gpu_supported()) {
+            fprintf(stderr, "\n  %s%sMetalRT requires Apple M3 or later.%s\n"
+                    "  Your Mac uses an M1/M2 chip which doesn't support Metal 3.1 shaders.\n"
+                    "  Please use llama.cpp instead: %srcli engine llamacpp%s\n\n",
+                    color::bold, color::red, color::reset, color::bold, color::reset);
+            return 1;
+        }
         auto& loader = rastack::MetalRTLoader::instance();
         if (loader.is_available()) {
             std::string ver = rastack::MetalRTLoader::installed_version();
@@ -910,6 +917,13 @@ static int cmd_engine(const Args& args) {
     std::string target = args.arg1;
 
     if (target == "metalrt") {
+        if (!rastack::MetalRTLoader::gpu_supported()) {
+            fprintf(stderr, "\n  %s%sMetalRT requires Apple M3 or later.%s\n"
+                    "  Your Mac uses an M1/M2 chip which doesn't support Metal 3.1 shaders.\n"
+                    "  Please use llama.cpp instead: %srcli engine llamacpp%s\n\n",
+                    color::bold, color::red, color::reset, color::bold, color::reset);
+            return 1;
+        }
         if (!rastack::MetalRTLoader::instance().is_available()) {
             fprintf(stderr, "\n  MetalRT not found. Installing automatically...\n\n");
             if (!rastack::MetalRTLoader::install()) {
diff --git a/src/cli/setup_cmds.h b/src/cli/setup_cmds.h
@@ -55,29 +55,44 @@ inline int cmd_setup(const Args& args) {
     }
 
     // --- Engine choice ---
+    bool metalrt_gpu_ok = rastack::MetalRTLoader::gpu_supported();
+
     fprintf(stderr, "  Choose your inference engine:\n\n");
     fprintf(stderr, "  %s1%s  %sOpen Source%s (llama.cpp + sherpa-onnx)              ~1 GB\n",
             color::bold, color::reset, color::green, color::reset);
     fprintf(stderr, "     Community-maintained, all models supported.\n");
     fprintf(stderr, "     Downloads: LFM2 1.2B + Whisper + Piper TTS\n\n");
-    fprintf(stderr, "  %s2%s  %sMetalRT%s (Apple Silicon GPU acceleration)           ~0.9 GB\n",
-            color::bold, color::reset, color::cyan, color::reset);
-    fprintf(stderr, "     GPU-accelerated engine: ~550 tok/s (LFM2.5 1.2B)\n");
-    fprintf(stderr, "     Downloads: LFM2.5 1.2B + Whisper Tiny + Kokoro TTS\n");
-    fprintf(stderr, "     More models available on-demand via %srcli models%s\n\n",
-            color::bold, color::reset);
-    fprintf(stderr, "  %s3%s  %sBoth%s (recommended)                                 ~1.9 GB\n",
-            color::bold, color::reset, color::orange, color::reset);
-    fprintf(stderr, "     Install both engines. Use MetalRT when available,\n");
-    fprintf(stderr, "     fall back to llama.cpp for unsupported models.\n\n");
-    fprintf(stderr, "  Enter choice [1-3]: ");
+    if (metalrt_gpu_ok) {
+        fprintf(stderr, "  %s2%s  %sMetalRT%s (Apple Silicon GPU acceleration)           ~0.9 GB\n",
+                color::bold, color::reset, color::cyan, color::reset);
+        fprintf(stderr, "     GPU-accelerated engine: ~550 tok/s (LFM2.5 1.2B)\n");
+        fprintf(stderr, "     Downloads: LFM2.5 1.2B + Whisper Tiny + Kokoro TTS\n");
+        fprintf(stderr, "     More models available on-demand via %srcli models%s\n\n",
+                color::bold, color::reset);
+        fprintf(stderr, "  %s3%s  %sBoth%s (recommended)                                 ~1.9 GB\n",
+                color::bold, color::reset, color::orange, color::reset);
+        fprintf(stderr, "     Install both engines. Use MetalRT when available,\n");
+        fprintf(stderr, "     fall back to llama.cpp for unsupported models.\n\n");
+        fprintf(stderr, "  Enter choice [1-3]: ");
+    } else {
+        fprintf(stderr, "  %s2%s  %sMetalRT%s  %s(requires Apple M3 or later)%s\n\n",
+                color::bold, color::reset, color::cyan, color::reset,
+                color::dim, color::reset);
+        fprintf(stderr, "  Your Mac doesn't support MetalRT (Metal 3.1 required).\n");
+        fprintf(stderr, "  Installing llama.cpp automatically.\n\n");
+    }
     fflush(stderr);
 
-    char engine_buf[16] = {};
-    if (read(STDIN_FILENO, engine_buf, sizeof(engine_buf) - 1) <= 0) engine_buf[0] = '3';
-    if (engine_buf[0] == '\n') engine_buf[0] = '3';
-    int engine_choice = engine_buf[0] - '0';
-    if (engine_choice < 1 || engine_choice > 3) engine_choice = 3;
+    int engine_choice;
+    if (!metalrt_gpu_ok) {
+        engine_choice = 1;
+    } else {
+        char engine_buf[16] = {};
+        if (read(STDIN_FILENO, engine_buf, sizeof(engine_buf) - 1) <= 0) engine_buf[0] = '3';
+        if (engine_buf[0] == '\n') engine_buf[0] = '3';
+        engine_choice = engine_buf[0] - '0';
+        if (engine_choice < 1 || engine_choice > 3) engine_choice = 3;
+    }
 
     bool install_llamacpp = (engine_choice == 1 || engine_choice == 3);
     bool install_metalrt  = (engine_choice == 2 || engine_choice == 3);
diff --git a/src/cli/tui_app.h b/src/cli/tui_app.h
@@ -1363,8 +1363,9 @@ class TuiApp {
         std::string current = rcli::read_engine_preference();
         if (current.empty()) current = "auto";
 
+        bool metalrt_gpu_ok = rastack::MetalRTLoader::gpu_supported();
         bool metalrt_available = false;
-        {
+        if (metalrt_gpu_ok) {
             std::string dylib_path = rastack::MetalRTLoader::engines_dir() + "/libmetalrt.dylib";
             struct stat st;
             metalrt_available = (stat(dylib_path.c_str(), &st) == 0);
@@ -1375,10 +1376,13 @@ class TuiApp {
             "CPU inference \u00B7 GGUF models \u00B7 Universal compatibility",
             current == "llamacpp"});
 
+        std::string mrt_desc = metalrt_gpu_ok
+            ? (std::string("GPU-accelerated \u00B7 MLX 4-bit \u00B7 Apple Silicon optimized") +
+               (metalrt_available ? "" : "  [not installed]"))
+            : "Requires Apple M3 or later";
         engine_entries_.push_back({"metalrt",
             "MetalRT",
-            std::string("GPU-accelerated \u00B7 MLX 4-bit \u00B7 Apple Silicon optimized") +
-                (metalrt_available ? "" : "  [not installed]"),
+            mrt_desc,
             current == "metalrt"});
 
         if (current == "auto") {
@@ -1401,6 +1405,11 @@ class TuiApp {
         }
 
         if (sel.id == "metalrt") {
+            if (!rastack::MetalRTLoader::gpu_supported()) {
+                engine_message_ = "MetalRT requires Apple M3 or later. Use llama.cpp instead.";
+                engine_msg_color_ = ftxui::Color::Red;
+                return;
+            }
             std::string dylib_path = rastack::MetalRTLoader::engines_dir() + "/libmetalrt.dylib";
             struct stat st;
             if (stat(dylib_path.c_str(), &st) != 0) {
diff --git a/src/engines/metalrt_loader.cpp b/src/engines/metalrt_loader.cpp
@@ -5,6 +5,21 @@
 #include <cstdio>
 #include <cstdlib>
 #include <mach-o/dyld.h>
+#include <sys/sysctl.h>
+
+static bool gpu_supports_metal31() {
+    char chip[64] = {};
+    size_t len = sizeof(chip);
+    if (sysctlbyname("machdep.cpu.brand_string", chip, &len, nullptr, 0) != 0)
+        return false;
+    // M3, M3 Pro, M3 Max, M3 Ultra, M4, etc. all support Metal 3.1
+    // M1 and M2 families do NOT support Metal 3.1 (bfloat16 in shaders)
+    std::string s(chip);
+    if (s.find("M4") != std::string::npos) return true;
+    if (s.find("M3") != std::string::npos) return true;
+    // M1, M2, and anything else: not supported
+    return false;
+}
 
 // =============================================================================
 // LOCAL-FIRST CONFIGURATION
@@ -83,6 +98,10 @@ bool MetalRTLoader::is_local_mode() {
     return METALRT_LOCAL_BUILD || (env && env[0] != '\0') || !resolve_local_repo().empty();
 }
 
+bool MetalRTLoader::gpu_supported() {
+    return gpu_supports_metal31();
+}
+
 bool MetalRTLoader::is_available() const {
     struct stat st;
     return stat(dylib_path().c_str(), &st) == 0;
@@ -97,6 +116,17 @@ bool MetalRTLoader::load() {
         return false;
     }
 
+    if (!gpu_supports_metal31()) {
+        LOG_ERROR("MetalRT", "MetalRT requires Apple M3 or later (Metal 3.1). "
+                  "Your chip does not support bfloat16 GPU shaders. "
+                  "Use llama.cpp engine instead: rcli engine llamacpp");
+        fprintf(stderr, "\n  %s%sMetalRT requires Apple M3 or later.%s\n"
+                "  Your Mac uses an M1/M2 chip which doesn't support Metal 3.1 shaders.\n"
+                "  Please use llama.cpp instead: %srcli engine llamacpp%s\n\n",
+                "\033[1m", "\033[31m", "\033[0m", "\033[1m", "\033[0m");
+        return false;
+    }
+
     handle_ = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
     if (!handle_) {
         LOG_ERROR("MetalRT", "dlopen failed: %s", dlerror());
@@ -361,6 +391,14 @@ static bool install_from_remote(const std::string& edir, const std::string& vers
 }
 
 bool MetalRTLoader::install(const std::string& version) {
+    if (!gpu_supports_metal31()) {
+        fprintf(stderr, "\n  %s%sMetalRT requires Apple M3 or later.%s\n"
+                "  Your Mac uses an M1/M2 chip which doesn't support Metal 3.1 shaders.\n"
+                "  Please use llama.cpp instead: %srcli engine llamacpp%s\n\n",
+                "\033[1m", "\033[31m", "\033[0m", "\033[1m", "\033[0m");
+        return false;
+    }
+
     std::string edir = engines_dir();
     std::string mkdir_cmd = "mkdir -p '" + edir + "'";
     if (system(mkdir_cmd.c_str()) != 0) return false;
diff --git a/src/engines/metalrt_loader.h b/src/engines/metalrt_loader.h
@@ -137,6 +137,7 @@ class MetalRTLoader {
     static std::string engines_dir();
     static std::string local_repo_path();
     static bool is_local_mode();
+    static bool gpu_supported();
 
     static constexpr uint32_t REQUIRED_ABI_VERSION = 2;