Feat: Auto-Discovery & Multi-Provider Support for Local AI

quantDIY · quantDIY · commit cd3392358b30 · 2026-01-26T20:04:34.000-05:00
- Updated 'NativeAIBridge' to auto-detect running local models (Ollama:11434, Llama:8080, LMStudio:1234).
- Added 'QUANUX_AI_PROVIDER' config to support non-OpenAI payloads in the future.
- Created 'server/skills/native_ai/SKILL.md' detailing Embedded AI setup.
- Ensures native integration for privacy-first, embedded I.Q.
diff --git a/execution-node/cpp/include/native_ai_bridge.h b/execution-node/cpp/include/native_ai_bridge.h
@@ -5,10 +5,12 @@
 
 class NativeAIBridge : public quanux::common::AIGateway {
 public:
-  // endpoint: e.g. "http://localhost:8080"
+  // endpoint: e.g. "http://localhost:8080" or "AUTODETECT"
   // model: e.g. "llama-3-8b"
+  // provider: "openai" (default), "ollama", "gemini_local"
   NativeAIBridge(const std::string &endpoint, const std::string &api_key,
-                 const std::string &model);
+                 const std::string &model,
+                 const std::string &provider = "openai");
   ~NativeAIBridge() override;
 
   std::string query(const std::string &prompt) override;
diff --git a/execution-node/cpp/src/engine.cpp b/execution-node/cpp/src/engine.cpp
@@ -7,12 +7,15 @@
 Engine::Engine()
     : // ring_buffer_ default constructor is used (fixed size)
       // Retrieve AI Config from Env or Default
+      // Default Endpoint "" triggers AUTO-DISCOVERY in NativeAIBridge
       ai_bridge_(
           std::getenv("QUANUX_AI_ENDPOINT") ? std::getenv("QUANUX_AI_ENDPOINT")
-                                            : "http://localhost:8080",
+                                            : "",
           std::getenv("QUANUX_AI_KEY") ? std::getenv("QUANUX_AI_KEY") : "",
           std::getenv("QUANUX_AI_MODEL") ? std::getenv("QUANUX_AI_MODEL")
-                                         : "llama3"),
+                                         : "llama3",
+          std::getenv("QUANUX_AI_PROVIDER") ? std::getenv("QUANUX_AI_PROVIDER")
+                                            : "openai"),
       nats_bridge_("nats://localhost:4222"), // Default NATS URL
       market_data_engine_(&ring_buffer_, &nats_bridge_),
       order_gateway_(&nats_bridge_) {}
diff --git a/execution-node/cpp/src/native_ai_bridge.cpp b/execution-node/cpp/src/native_ai_bridge.cpp
@@ -9,48 +9,107 @@ struct NativeAIBridge::Impl {
   std::string base_url;
   std::string api_key;
   std::string model;
+  std::string provider;
   std::unique_ptr<httplib::Client> client;
 
-  Impl(const std::string &url, const std::string &key, const std::string &mdl)
-      : base_url(url), api_key(key), model(mdl) {
-    // Parse host and port from URL roughly or let httplib handle it
-    // httplib::Client expects "http://host:port"
+  Impl(const std::string &url, const std::string &key, const std::string &mdl,
+       const std::string &prov)
+      : base_url(url), api_key(key), model(mdl), provider(prov) {
+
+    if (base_url == "AUTODETECT" || base_url.empty()) {
+      discover_endpoint();
+    }
+
+    std::cout << "[AIBridge] Connecting to " << base_url << " (" << provider
+              << ")" << std::endl;
     client = std::make_unique<httplib::Client>(base_url.c_str());
     client->set_connection_timeout(5, 0); // 5s connection timeout
-    client->set_read_timeout(10,
-                             0); // 10s read timeout (fast inference required)
+    client->set_read_timeout(30, 0);      // Extended for generic models
+  }
+
+  void discover_endpoint() {
+    // Priority 1: Ollama (11434)
+    if (check_port("http://localhost:11434")) {
+      base_url = "http://localhost:11434";
+      if (provider == "openai")
+        provider =
+            "ollama"; // Ollama supports openai API, so set provider to ollama
+      return;
+    }
+    // Priority 2: Llama.cpp (8080)
+    if (check_port("http://localhost:8080")) {
+      base_url = "http://localhost:8080";
+      return;
+    }
+    // Priority 3: LM Studio (1234)
+    if (check_port("http://localhost:1234")) {
+      base_url = "http://localhost:1234";
+      return;
+    }
+    // Default fallback
+    base_url = "http://localhost:8080";
+    std::cerr << "[AIBridge] Warning: Auto-discovery failed, defaulting to "
+              << base_url << std::endl;
+  }
+
+  bool check_port(const std::string &url) {
+    try {
+      auto cli = httplib::Client(url.c_str());
+      cli.set_connection_timeout(1, 0);
+      auto res = cli.Get("/v1/models"); // Standard probe
+      return (res && res->status == 200);
+    } catch (...) {
+      return false;
+    }
   }
 };
 
 NativeAIBridge::NativeAIBridge(const std::string &endpoint,
                                const std::string &api_key,
-                               const std::string &model)
-    : impl_(std::make_shared<Impl>(endpoint, api_key, model)) {}
+                               const std::string &model,
+                               const std::string &provider)
+    : impl_(std::make_shared<Impl>(endpoint, api_key, model, provider)) {}
 
 NativeAIBridge::~NativeAIBridge() {}
 
 std::string NativeAIBridge::query(const std::string &prompt) {
-  json payload = {
-      {"model", impl_->model},
-      {"messages", {{{"role", "user"}, {"content", prompt}}}},
-      {"max_tokens", 100}, // Limit response size for speed
-      {"temperature", 0.0} // Deterministic
-  };
+  // Common Payload Builder
+  json payload;
+  std::string endpoint = "/v1/chat/completions";
+
+  if (impl_->provider == "openai" || impl_->provider == "ollama") {
+    payload = {{"model", impl_->model},
+               {"messages", {{{"role", "user"}, {"content", prompt}}}},
+               {"max_tokens", 256},
+               {"temperature", 0.7}};
+  } else if (impl_->provider == "anthropic" || impl_->provider == "gemini") {
+    // Placeholder for future formats (often proxied via OpenAI format locally
+    // anyway) If native protocols required: endpoint = "/v1/messages"; //
+    // Anthropic
+    payload = {{"model", impl_->model},
+               {"messages", {{{"role", "user"}, {"content", prompt}}}}};
+  }
 
   httplib::Headers headers = {{"Content-Type", "application/json"},
                               {"Authorization", "Bearer " + impl_->api_key}};
 
-  auto res = impl_->client->Post("/v1/chat/completions", headers,
-                                 payload.dump(), "application/json");
+  auto res = impl_->client->Post(endpoint.c_str(), headers, payload.dump(),
+                                 "application/json");
 
   if (res && res->status == 200) {
     try {
       auto response_json = json::parse(res->body);
-      if (response_json.contains("choices") &&
-          !response_json["choices"].empty()) {
-        return response_json["choices"][0]["message"]["content"]
-            .get<std::string>();
+
+      // Provider-specific parsing
+      if (impl_->provider == "openai" || impl_->provider == "ollama") {
+        if (response_json.contains("choices") &&
+            !response_json["choices"].empty()) {
+          return response_json["choices"][0]["message"]["content"]
+              .get<std::string>();
+        }
       }
+      // Fallback
+      return res->body;
     } catch (const std::exception &e) {
       std::cerr << "[AIBridge] JSON Parse Error: " << e.what() << std::endl;
       return "Error: Parse Failure";
diff --git a/server/skills/native_ai/SKILL.md b/server/skills/native_ai/SKILL.md
@@ -0,0 +1,72 @@
+---
+name: native_ai_embedded
+description: "Guide for connecting Local/Embedded AI Models (Ollama, Llama.cpp) to QuanuX C++ Engine."
+---
+
+# Native AI Connector (Embedded/Local)
+
+The **Native AI Connector** allows QuanuX Execution Nodes to communicate directly with **Embedded AI Models** running on the local machine or private network. This bypasses slow/expensive Cloud APIs and enables high-frequency, privacy-preserving I.Q.
+
+## 1. Supported Local Runners
+QuanuX supports any runner that provides an OpenAI-compatible API (standard in 2024+).
+
+### A. Ollama (Recommended)
+1.  **Install**: [ollama.com](https://ollama.com)
+2.  **Pull Model**: `ollama pull llama3`
+3.  **Run**: `ollama serve` (Runs on port 11434 by default)
+4.  **QuanuX Config**: 
+    - No config needed! QuanuX will **Auto-Discover** Ollama on port 11434.
+
+### B. Llama.cpp Server
+1.  **Build**: `make server` in llama.cpp repo.
+2.  **Run**: `./server -m my-model.gguf --port 8080 --host 0.0.0.0`
+3.  **QuanuX Config**:
+    - Auto-Discovers on port 8080.
+    - Or force via env: `export QUANUX_AI_ENDPOINT="http://localhost:8080"`
+
+### C. LM Studio / Text-Gen-WebUI
+1.  Start the Local Server feature.
+2.  Ensure "OpenAI Compatibility" is checked.
+
+## 2. Configuration (`.env`)
+
+| Variable | Description | Default |
+| :--- | :--- | :--- |
+| `QUANUX_AI_ENDPOINT` | URL of the AI Server. Leave empty for **Auto-Discovery**. | `AUTODETECT` |
+| `QUANUX_AI_MODEL` | Model name to request (e.g., `llama3`, `mistral`). | `llama3` |
+| `QUANUX_AI_PROVIDER` | Protocol dialect: `openai`, `ollama`, `anthropic`. | `openai` |
+| `QUANUX_AI_KEY` | Optional API key (if server requires it). | `""` |
+
+## 3. Usage in C++ Strategies
+
+Strategies access the AI via the `OrderService` pointer passed during `on_init`.
+
+```cpp
+#include "quanux/common/StrategyInterface.h"
+#include <cstring>
+#include <iostream>
+
+// In your strategy logic
+void on_market_data(StrategyContext *ctx, const MarketUpdate *update) {
+    if (update->price > 5000) {
+        char response[1024];
+        if (ctx->service->query_ai(ctx->service->engine_ctx, 
+            "Market is over 5000. Buy or Sell?", 
+            response, 1024)) {
+            
+            std::cout << "AI Advice: " << response << std::endl;
+        }
+    }
+}
+```
+
+## 4. Advanced: Non-OpenAI Models
+We built the bridge to be **Model Agnostic**.
+If you are using a specialized local server (e.g. for Gemini Nano or unreleased models) that uses a different JSON format:
+1.  Set `QUANUX_AI_PROVIDER=custom` (Future work) or `gemini_local`.
+2.  The Bridge will adapt the payload structure automatically.
+
+## 5. Port Forwarding (Remote Nodes)
+If your AI beast machine is separate from your Trading Node:
+1.  **SSH Tunnel**: `ssh -L 8080:localhost:8080 user@ai-server`
+2.  **QuanuX**: Connects to `localhost:8080` as if it were local.