Skip to content

Commit f91b146

Browse files
auto mode had a bug - fix
1 parent eb454d0 commit f91b146

8 files changed

Lines changed: 26 additions & 74 deletions

File tree

src/api/rcli_api.cpp

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,9 @@ int rcli_init(RCLIHandle handle, const char* models_dir, int gpu_layers) {
358358
std::string engine_pref = engine->config_engine_override.empty()
359359
? rcli::read_engine_preference()
360360
: engine->config_engine_override;
361+
if (engine_pref == "auto" || engine_pref.empty()) {
362+
engine_pref = rastack::MetalRTLoader::gpu_supported() ? "metalrt" : "llamacpp";
363+
}
361364
if (engine_pref == "metalrt" && !rastack::MetalRTLoader::gpu_supported()) {
362365
LOG_WARN("RCLI", "MetalRT requires Apple M3+ (Metal 3.1). Falling back to llama.cpp.");
363366
fprintf(stderr, " MetalRT requires Apple M3 or later. Falling back to llama.cpp.\n");
@@ -577,43 +580,6 @@ int rcli_init(RCLIHandle handle, const char* models_dir, int gpu_layers) {
577580
}
578581
config.llm_backend = rastack::LlmBackend::LLAMACPP;
579582
}
580-
} else if (engine_pref == "auto" || engine_pref.empty()) {
581-
config.llm_backend = rastack::LlmBackend::AUTO;
582-
583-
// For AUTO mode, populate MetalRT paths so the orchestrator can
584-
// use MetalRT when it decides to (without paths it would hard-fail).
585-
if (rastack::MetalRTLoader::gpu_supported()) {
586-
auto& mrt_loader = rastack::MetalRTLoader::instance();
587-
if (mrt_loader.is_available()) {
588-
auto models = rcli::all_models();
589-
for (auto& m : models) {
590-
if (m.metalrt_supported && rcli::is_metalrt_model_installed(m)) {
591-
config.metalrt.model_dir = rcli::metalrt_models_dir() + "/" + m.metalrt_dir_name;
592-
break;
593-
}
594-
}
595-
if (!config.metalrt.model_dir.empty()) {
596-
auto comps = rcli::metalrt_component_models();
597-
std::string stt_pref = rcli::read_selected_metalrt_stt_id();
598-
for (auto& cm : comps) {
599-
if (!rcli::is_metalrt_component_installed(cm)) continue;
600-
std::string comp_dir = rcli::metalrt_models_dir() + "/" + cm.dir_name;
601-
if (cm.component == "stt" && config.metalrt_stt.model_dir.empty()) {
602-
if (!stt_pref.empty() && cm.id != stt_pref) continue;
603-
config.metalrt_stt.model_dir = comp_dir;
604-
engine->stt_model_name = cm.name;
605-
} else if (cm.component == "tts" && config.metalrt_tts.model_dir.empty()) {
606-
config.metalrt_tts.model_dir = comp_dir;
607-
auto* pinfo = rastack::find_personality(engine->personality_key);
608-
config.metalrt_tts.voice = (pinfo && pinfo->voice[0] != '\0')
609-
? pinfo->voice : "af_heart";
610-
engine->tts_model_name = cm.name;
611-
config.audio.playback_rate = 24000;
612-
}
613-
}
614-
}
615-
}
616-
}
617583
}
618584
}
619585

src/cli/main.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -858,22 +858,14 @@ static int cmd_engine(const Args& args) {
858858
return 0;
859859
}
860860

861-
if (target == "auto") {
862-
rcli::write_engine_preference("auto");
863-
fprintf(stderr, "\n %s%sEngine set to Auto.%s MetalRT when available, llama.cpp fallback.\n\n",
864-
color::bold, color::green, color::reset);
865-
return 0;
866-
}
867-
868861
std::string current = rcli::read_engine_preference();
869-
if (current.empty()) current = "auto";
862+
if (current.empty() || current == "auto") current = "llamacpp";
870863
fprintf(stderr,
871864
"\n%s%s rcli engine%s — Switch LLM inference backend\n\n"
872865
" Current: %s%s%s\n\n"
873866
" Commands:\n"
874867
" rcli engine metalrt Use MetalRT (Apple Silicon GPU)\n"
875-
" rcli engine llamacpp Use llama.cpp (open source)\n"
876-
" rcli engine auto Auto-select best available\n\n",
868+
" rcli engine llamacpp Use llama.cpp (open source)\n\n",
877869
color::bold, color::orange, color::reset,
878870
color::bold, current.c_str(), color::reset);
879871
return 0;

src/cli/model_pickers.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,7 @@ inline int cmd_info() {
590590

591591
std::string engine_pref = rcli::read_engine_preference();
592592
bool mrt_available = rastack::MetalRTLoader::instance().is_available();
593-
bool use_metalrt = (engine_pref == "metalrt" && mrt_available)
594-
|| ((engine_pref == "auto" || engine_pref.empty()) && mrt_available);
593+
bool use_metalrt = (engine_pref == "metalrt" && mrt_available);
595594
std::string engine_info = use_metalrt
596595
? "MetalRT (Metal GPU — LLM, STT, TTS on-device)"
597596
: "llama.cpp + sherpa-onnx (ONNX Runtime)";

src/cli/setup_cmds.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,8 @@ inline int cmd_setup(const Args& args) {
9797
bool install_llamacpp = (engine_choice == 1 || engine_choice == 3);
9898
bool install_metalrt = (engine_choice == 2 || engine_choice == 3);
9999

100-
std::string engine_pref = "auto";
100+
std::string engine_pref = "metalrt";
101101
if (engine_choice == 1) engine_pref = "llamacpp";
102-
if (engine_choice == 2) engine_pref = "metalrt";
103102
rcli::write_engine_preference(engine_pref);
104103

105104
fprintf(stderr, "\n");
@@ -518,8 +517,8 @@ inline int cmd_upgrade_llm(const Args& args) {
518517
const auto* current_best = rcli::find_best_installed(models_dir, models);
519518

520519
fprintf(stderr, "\n%s%s RCLI — Upgrade Language Model%s\n\n", color::bold, color::orange, color::reset);
521-
if (engine_pref == "auto" || engine_pref.empty()) {
522-
fprintf(stderr, " Engine: auto (llama.cpp). Tip: switch to MetalRT with %srcli setup%s for GPU speed.\n\n",
520+
if (engine_pref.empty()) {
521+
fprintf(stderr, " Tip: switch to MetalRT with %srcli setup%s for GPU speed.\n\n",
523522
color::bold, color::reset);
524523
}
525524
fprintf(stderr, " Choose a model to download for smarter voice commands.\n");

src/cli/tui_app.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,6 @@ class TuiApp {
790790

791791
double ttfa = last_ttfa_ms_.load(std::memory_order_relaxed);
792792

793-
// Engine indicator — use actual runtime engine, not config preference
794793
std::string engine_label_str = "llama.cpp";
795794
auto engine_color = theme_.text_normal;
796795
if (engine_) {
@@ -800,10 +799,6 @@ class TuiApp {
800799
std::string engine_pref = rcli::read_engine_preference();
801800
if (engine_pref == "metalrt")
802801
engine_label_str = "MetalRT";
803-
else if (engine_pref == "auto" || engine_pref.empty()) {
804-
bool mrt_avail = rastack::MetalRTLoader::instance().is_available();
805-
engine_label_str = mrt_avail ? "Auto (MetalRT)" : "Auto (llama.cpp)";
806-
}
807802
}
808803
bool is_metalrt_engine = engine_label_str.find("MetalRT") != std::string::npos;
809804
if (is_metalrt_engine)
@@ -1384,8 +1379,13 @@ class TuiApp {
13841379
auto stt_all = rcli::all_stt_models();
13851380
auto tts_all = rcli::all_tts_models();
13861381

1387-
std::string engine_pref = rcli::read_engine_preference();
1388-
bool is_metalrt = (engine_pref == "metalrt");
1382+
bool is_metalrt = false;
1383+
if (engine_) {
1384+
const char* active = rcli_get_active_engine(engine_);
1385+
is_metalrt = active && std::string(active) == "MetalRT";
1386+
} else {
1387+
is_metalrt = (rcli::read_engine_preference() == "metalrt");
1388+
}
13891389
std::string selected_model = rcli::read_selected_model_id();
13901390

13911391
if (is_metalrt) {

src/models/model_registry.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
namespace rcli {
2323

24-
enum class LlmBackendType { LLAMACPP, METALRT, BOTH };
24+
enum class LlmBackendType { LLAMACPP, METALRT };
2525

2626
struct LlmModelDef {
2727
std::string id; // Unique slug: "qwen3.5-4b"
@@ -252,14 +252,13 @@ inline std::vector<LlmModelDef> all_models() {
252252

253253
inline std::vector<LlmModelDef> models_for_engine(LlmBackendType engine) {
254254
auto all = all_models();
255-
if (engine == LlmBackendType::LLAMACPP) return all;
256255
if (engine == LlmBackendType::METALRT) {
257256
std::vector<LlmModelDef> result;
258257
for (auto& m : all)
259258
if (m.metalrt_supported) result.push_back(m);
260259
return result;
261260
}
262-
return all; // BOTH: everything
261+
return all;
263262
}
264263

265264
inline std::string engine_label(const LlmModelDef& m) {

src/pipeline/orchestrator.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
100100
pool_->total_size() / (1024.0*1024.0),
101101
pool_->utilization_pct());
102102

103-
// llama.cpp STT/LLM/TTS — required for llamacpp/auto, optional for metalrt-only
103+
// llama.cpp STT/LLM/TTS — required for llamacpp, optional for metalrt-only
104104
bool need_llamacpp = (config.llm_backend != LlmBackend::METALRT);
105105

106106
if (!stt_.init(config.stt)) {
@@ -142,8 +142,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
142142

143143
// --- MetalRT backend (optional) ---
144144
bool metalrt_skip_due_to_crash = false;
145-
if (config.llm_backend == LlmBackend::METALRT ||
146-
config.llm_backend == LlmBackend::AUTO) {
145+
if (config.llm_backend == LlmBackend::METALRT) {
147146
// Check if MetalRT crashed on a previous launch
148147
if (metalrt_previously_crashed()) {
149148
LOG_WARN("Pipeline", "MetalRT crashed on a previous launch — skipping. "
@@ -154,7 +153,7 @@ bool Orchestrator::init(const PipelineConfig& config) {
154153
metalrt_crash_breadcrumb_path().c_str());
155154
metalrt_breadcrumb_remove();
156155
metalrt_skip_due_to_crash = true;
157-
if (config.llm_backend == LlmBackend::METALRT && !llm_.is_initialized()) {
156+
if (!llm_.is_initialized()) {
158157
LOG_ERROR("Pipeline", "MetalRT skipped and llama.cpp LLM not available");
159158
return false;
160159
}
@@ -179,11 +178,9 @@ bool Orchestrator::init(const PipelineConfig& config) {
179178
std::string mrt_prefix = metalrt_.profile().build_system_prefix(mrt_system);
180179
metalrt_.cache_system_prompt(mrt_prefix);
181180
metalrt_.set_system_prompt(mrt_system);
182-
if (config.llm_backend == LlmBackend::METALRT) {
183-
active_backend_ = LlmBackend::METALRT;
184-
LOG_INFO("Pipeline", "Active LLM backend: MetalRT");
185-
}
186-
} else if (config.llm_backend == LlmBackend::METALRT) {
181+
active_backend_ = LlmBackend::METALRT;
182+
LOG_INFO("Pipeline", "Active LLM backend: MetalRT");
183+
} else {
187184
metalrt_breadcrumb_remove();
188185
sigaction(SIGSEGV, &s_old_sigsegv, nullptr);
189186
sigaction(SIGBUS, &s_old_sigbus, nullptr);

src/pipeline/orchestrator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
namespace rastack {
2525

26-
enum class LlmBackend { AUTO, LLAMACPP, METALRT };
26+
enum class LlmBackend { LLAMACPP, METALRT };
2727

2828
struct PipelineConfig {
2929
SttConfig stt;
@@ -36,7 +36,7 @@ struct PipelineConfig {
3636
VadConfig vad;
3737
AudioConfig audio;
3838

39-
LlmBackend llm_backend = LlmBackend::AUTO;
39+
LlmBackend llm_backend = LlmBackend::LLAMACPP;
4040

4141
size_t memory_pool_size = 64 * 1024 * 1024; // 64MB
4242
size_t audio_ring_capacity = 16384 * 10; // ~10 sec at 16kHz

0 commit comments

Comments
 (0)