auto mode had a bug - fix

shubhammalhotra28 · shubhammalhotra28 · commit f91b14687f01 · 2026-03-11T22:33:51.000-07:00
diff --git a/src/api/rcli_api.cpp b/src/api/rcli_api.cpp
@@ -358,6 +358,9 @@ int rcli_init(RCLIHandle handle, const char* models_dir, int gpu_layers) {
         std::string engine_pref = engine->config_engine_override.empty()
             ? rcli::read_engine_preference()
             : engine->config_engine_override;
+        if (engine_pref == "auto" || engine_pref.empty()) {
+            engine_pref = rastack::MetalRTLoader::gpu_supported() ? "metalrt" : "llamacpp";
+        }
         if (engine_pref == "metalrt" && !rastack::MetalRTLoader::gpu_supported()) {
             LOG_WARN("RCLI", "MetalRT requires Apple M3+ (Metal 3.1). Falling back to llama.cpp.");
             fprintf(stderr, "  MetalRT requires Apple M3 or later. Falling back to llama.cpp.\n");
@@ -577,43 +580,6 @@ int rcli_init(RCLIHandle handle, const char* models_dir, int gpu_layers) {
                 }
                 config.llm_backend = rastack::LlmBackend::LLAMACPP;
             }
-        } else if (engine_pref == "auto" || engine_pref.empty()) {
-            config.llm_backend = rastack::LlmBackend::AUTO;
-
-            // For AUTO mode, populate MetalRT paths so the orchestrator can
-            // use MetalRT when it decides to (without paths it would hard-fail).
-            if (rastack::MetalRTLoader::gpu_supported()) {
-                auto& mrt_loader = rastack::MetalRTLoader::instance();
-                if (mrt_loader.is_available()) {
-                    auto models = rcli::all_models();
-                    for (auto& m : models) {
-                        if (m.metalrt_supported && rcli::is_metalrt_model_installed(m)) {
-                            config.metalrt.model_dir = rcli::metalrt_models_dir() + "/" + m.metalrt_dir_name;
-                            break;
-                        }
-                    }
-                    if (!config.metalrt.model_dir.empty()) {
-                        auto comps = rcli::metalrt_component_models();
-                        std::string stt_pref = rcli::read_selected_metalrt_stt_id();
-                        for (auto& cm : comps) {
-                            if (!rcli::is_metalrt_component_installed(cm)) continue;
-                            std::string comp_dir = rcli::metalrt_models_dir() + "/" + cm.dir_name;
-                            if (cm.component == "stt" && config.metalrt_stt.model_dir.empty()) {
-                                if (!stt_pref.empty() && cm.id != stt_pref) continue;
-                                config.metalrt_stt.model_dir = comp_dir;
-                                engine->stt_model_name = cm.name;
-                            } else if (cm.component == "tts" && config.metalrt_tts.model_dir.empty()) {
-                                config.metalrt_tts.model_dir = comp_dir;
-                                auto* pinfo = rastack::find_personality(engine->personality_key);
-                                config.metalrt_tts.voice = (pinfo && pinfo->voice[0] != '\0')
-                                    ? pinfo->voice : "af_heart";
-                                engine->tts_model_name = cm.name;
-                                config.audio.playback_rate = 24000;
-                            }
-                        }
-                    }
-                }
-            }
         }
     }
 
diff --git a/src/cli/main.cpp b/src/cli/main.cpp
@@ -858,22 +858,14 @@ static int cmd_engine(const Args& args) {
         return 0;
     }
 
-    if (target == "auto") {
-        rcli::write_engine_preference("auto");
-        fprintf(stderr, "\n  %s%sEngine set to Auto.%s MetalRT when available, llama.cpp fallback.\n\n",
-                color::bold, color::green, color::reset);
-        return 0;
-    }
-
     std::string current = rcli::read_engine_preference();
-    if (current.empty()) current = "auto";
+    if (current.empty() || current == "auto") current = "llamacpp";
     fprintf(stderr,
         "\n%s%s  rcli engine%s  —  Switch LLM inference backend\n\n"
         "  Current: %s%s%s\n\n"
         "  Commands:\n"
         "    rcli engine metalrt    Use MetalRT (Apple Silicon GPU)\n"
-        "    rcli engine llamacpp   Use llama.cpp (open source)\n"
-        "    rcli engine auto       Auto-select best available\n\n",
+        "    rcli engine llamacpp   Use llama.cpp (open source)\n\n",
         color::bold, color::orange, color::reset,
         color::bold, current.c_str(), color::reset);
     return 0;
diff --git a/src/cli/model_pickers.h b/src/cli/model_pickers.h
@@ -590,8 +590,7 @@ inline int cmd_info() {
 
     std::string engine_pref = rcli::read_engine_preference();
     bool mrt_available = rastack::MetalRTLoader::instance().is_available();
-    bool use_metalrt = (engine_pref == "metalrt" && mrt_available)
-                    || ((engine_pref == "auto" || engine_pref.empty()) && mrt_available);
+    bool use_metalrt = (engine_pref == "metalrt" && mrt_available);
     std::string engine_info = use_metalrt
         ? "MetalRT (Metal GPU — LLM, STT, TTS on-device)"
         : "llama.cpp + sherpa-onnx (ONNX Runtime)";
diff --git a/src/cli/setup_cmds.h b/src/cli/setup_cmds.h
@@ -97,9 +97,8 @@ inline int cmd_setup(const Args& args) {
     bool install_llamacpp = (engine_choice == 1 || engine_choice == 3);
     bool install_metalrt  = (engine_choice == 2 || engine_choice == 3);
 
-    std::string engine_pref = "auto";
+    std::string engine_pref = "metalrt";
     if (engine_choice == 1) engine_pref = "llamacpp";
-    if (engine_choice == 2) engine_pref = "metalrt";
     rcli::write_engine_preference(engine_pref);
 
     fprintf(stderr, "\n");
@@ -518,8 +517,8 @@ inline int cmd_upgrade_llm(const Args& args) {
     const auto* current_best = rcli::find_best_installed(models_dir, models);
 
     fprintf(stderr, "\n%s%s  RCLI — Upgrade Language Model%s\n\n", color::bold, color::orange, color::reset);
-    if (engine_pref == "auto" || engine_pref.empty()) {
-        fprintf(stderr, "  Engine: auto (llama.cpp). Tip: switch to MetalRT with %srcli setup%s for GPU speed.\n\n",
+    if (engine_pref.empty()) {
+        fprintf(stderr, "  Tip: switch to MetalRT with %srcli setup%s for GPU speed.\n\n",
                 color::bold, color::reset);
     }
     fprintf(stderr, "  Choose a model to download for smarter voice commands.\n");
diff --git a/src/cli/tui_app.h b/src/cli/tui_app.h
@@ -790,7 +790,6 @@ class TuiApp {
 
         double ttfa = last_ttfa_ms_.load(std::memory_order_relaxed);
 
-        // Engine indicator — use actual runtime engine, not config preference
         std::string engine_label_str = "llama.cpp";
         auto engine_color = theme_.text_normal;
         if (engine_) {
@@ -800,10 +799,6 @@ class TuiApp {
             std::string engine_pref = rcli::read_engine_preference();
             if (engine_pref == "metalrt")
                 engine_label_str = "MetalRT";
-            else if (engine_pref == "auto" || engine_pref.empty()) {
-                bool mrt_avail = rastack::MetalRTLoader::instance().is_available();
-                engine_label_str = mrt_avail ? "Auto (MetalRT)" : "Auto (llama.cpp)";
-            }
         }
         bool is_metalrt_engine = engine_label_str.find("MetalRT") != std::string::npos;
         if (is_metalrt_engine)
@@ -1384,8 +1379,13 @@ class TuiApp {
         auto stt_all = rcli::all_stt_models();
         auto tts_all = rcli::all_tts_models();
 
-        std::string engine_pref = rcli::read_engine_preference();
-        bool is_metalrt = (engine_pref == "metalrt");
+        bool is_metalrt = false;
+        if (engine_) {
+            const char* active = rcli_get_active_engine(engine_);
+            is_metalrt = active && std::string(active) == "MetalRT";
+        } else {
+            is_metalrt = (rcli::read_engine_preference() == "metalrt");
+        }
         std::string selected_model = rcli::read_selected_model_id();
 
         if (is_metalrt) {
diff --git a/src/models/model_registry.h b/src/models/model_registry.h
@@ -21,7 +21,7 @@
 
 namespace rcli {
 
-enum class LlmBackendType { LLAMACPP, METALRT, BOTH };
+enum class LlmBackendType { LLAMACPP, METALRT };
 
 struct LlmModelDef {
     std::string id;            // Unique slug: "qwen3.5-4b"
@@ -252,14 +252,13 @@ inline std::vector<LlmModelDef> all_models() {
 
 inline std::vector<LlmModelDef> models_for_engine(LlmBackendType engine) {
     auto all = all_models();
-    if (engine == LlmBackendType::LLAMACPP) return all;
     if (engine == LlmBackendType::METALRT) {
         std::vector<LlmModelDef> result;
         for (auto& m : all)
             if (m.metalrt_supported) result.push_back(m);
         return result;
     }
-    return all;  // BOTH: everything
+    return all;
 }
 
 inline std::string engine_label(const LlmModelDef& m) {
diff --git a/src/pipeline/orchestrator.cpp b/src/pipeline/orchestrator.cpp
@@ -100,7 +100,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
             pool_->total_size() / (1024.0*1024.0),
             pool_->utilization_pct());
 
-    // llama.cpp STT/LLM/TTS — required for llamacpp/auto, optional for metalrt-only
+    // llama.cpp STT/LLM/TTS — required for llamacpp, optional for metalrt-only
     bool need_llamacpp = (config.llm_backend != LlmBackend::METALRT);
 
     if (!stt_.init(config.stt)) {
@@ -142,8 +142,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
 
     // --- MetalRT backend (optional) ---
     bool metalrt_skip_due_to_crash = false;
-    if (config.llm_backend == LlmBackend::METALRT ||
-        config.llm_backend == LlmBackend::AUTO) {
+    if (config.llm_backend == LlmBackend::METALRT) {
         // Check if MetalRT crashed on a previous launch
         if (metalrt_previously_crashed()) {
             LOG_WARN("Pipeline", "MetalRT crashed on a previous launch — skipping. "
@@ -154,7 +153,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
                     metalrt_crash_breadcrumb_path().c_str());
             metalrt_breadcrumb_remove();
             metalrt_skip_due_to_crash = true;
-            if (config.llm_backend == LlmBackend::METALRT && !llm_.is_initialized()) {
+            if (!llm_.is_initialized()) {
                 LOG_ERROR("Pipeline", "MetalRT skipped and llama.cpp LLM not available");
                 return false;
             }
@@ -179,11 +178,9 @@ bool Orchestrator::init(const PipelineConfig& config) {
                 std::string mrt_prefix = metalrt_.profile().build_system_prefix(mrt_system);
                 metalrt_.cache_system_prompt(mrt_prefix);
                 metalrt_.set_system_prompt(mrt_system);
-                if (config.llm_backend == LlmBackend::METALRT) {
-                    active_backend_ = LlmBackend::METALRT;
-                    LOG_INFO("Pipeline", "Active LLM backend: MetalRT");
-                }
-            } else if (config.llm_backend == LlmBackend::METALRT) {
+                active_backend_ = LlmBackend::METALRT;
+                LOG_INFO("Pipeline", "Active LLM backend: MetalRT");
+            } else {
                 metalrt_breadcrumb_remove();
                 sigaction(SIGSEGV, &s_old_sigsegv, nullptr);
                 sigaction(SIGBUS, &s_old_sigbus, nullptr);
diff --git a/src/pipeline/orchestrator.h b/src/pipeline/orchestrator.h
@@ -23,7 +23,7 @@
 
 namespace rastack {
 
-enum class LlmBackend { AUTO, LLAMACPP, METALRT };
+enum class LlmBackend { LLAMACPP, METALRT };
 
 struct PipelineConfig {
     SttConfig        stt;
@@ -36,7 +36,7 @@ struct PipelineConfig {
     VadConfig        vad;
     AudioConfig      audio;
 
-    LlmBackend       llm_backend = LlmBackend::AUTO;
+    LlmBackend       llm_backend = LlmBackend::LLAMACPP;
 
     size_t memory_pool_size    = 64 * 1024 * 1024;  // 64MB
     size_t audio_ring_capacity = 16384 * 10;         // ~10 sec at 16kHz