Document and test platform gateway OpenAI compatibility

danias · danias · commit a7db1ccc3c1b · 2026-04-15T13:58:57.000+03:00
diff --git a/README.md b/README.md
@@ -51,6 +51,25 @@ max_output_tokens = 200
 
 String fields support `${ENV_VAR}` interpolation. Missing environment variables fail validation immediately. Non-text-generation profiles in the same daemon config are ignored by `bitloops-inference`.
 
+The public Bitloops platform gateway works through the same `openai_chat_completions` driver. Point `base_url` at the gateway’s chat-completions endpoint and set `api_key` to the shared bearer token:
+
+```toml
+[inference.runtimes.bitloops_inference]
+request_timeout_secs = 300
+
+[inference.profiles.platform_summary]
+task = "text_generation"
+driver = "openai_chat_completions"
+runtime = "bitloops_inference"
+model = "ministral-3-3b-instruct"
+base_url = "https://platform.example.com/v1/chat/completions"
+api_key = "${BITLOOPS_PLATFORM_GATEWAY_TOKEN}"
+temperature = "0.1"
+max_output_tokens = 200
+```
+
+`bitloops-inference` treats the gateway as another OpenAI-compatible backend. No extra CLI flags, provider kind, or driver name are required.
+
 ## Supported drivers
 
 - `openai_chat_completions`
diff --git a/crates/bitloops-inference/tests/cli_commands.rs b/crates/bitloops-inference/tests/cli_commands.rs
@@ -101,3 +101,56 @@ fn describe_profile_returns_protocol_shaped_json() {
         other => panic!("expected describe response, got {other:?}"),
     }
 }
+
+#[test]
+fn describe_profile_preserves_gateway_endpoint_for_openai_driver() {
+    let config = write_config(
+        r#"
+            [inference.runtimes.bitloops_inference]
+            request_timeout_secs = 300
+
+            [inference.profiles.platform_summary]
+            task = "text_generation"
+            driver = "openai_chat_completions"
+            runtime = "bitloops_inference"
+            model = "ministral-3-3b-instruct"
+            base_url = "https://platform.example.com/v1/chat/completions"
+            api_key = "${BITLOOPS_PLATFORM_GATEWAY_TOKEN}"
+            temperature = "0.1"
+            max_output_tokens = 200
+        "#,
+    );
+
+    unsafe {
+        std::env::set_var("BITLOOPS_PLATFORM_GATEWAY_TOKEN", "secret");
+    }
+
+    let output = Command::cargo_bin("bitloops-inference")
+        .expect("binary should exist")
+        .arg("describe-profile")
+        .arg("--config")
+        .arg(config.path())
+        .arg("--profile")
+        .arg("platform_summary")
+        .output()
+        .expect("command should run");
+
+    assert!(output.status.success());
+    let response = ResponseEnvelope::from_json_line(
+        String::from_utf8(output.stdout)
+            .expect("stdout should be utf-8")
+            .trim_end(),
+    )
+    .expect("response should parse");
+
+    match response.payload {
+        ResponsePayload::Describe(describe) => {
+            assert_eq!(describe.profile_name, "platform_summary");
+            assert_eq!(
+                describe.provider.endpoint,
+                "https://platform.example.com/v1/chat/completions"
+            );
+        }
+        other => panic!("expected describe response, got {other:?}"),
+    }
+}
diff --git a/crates/bitloops-inference/tests/protocol_loop.rs b/crates/bitloops-inference/tests/protocol_loop.rs
@@ -208,6 +208,55 @@ fn http_errors_are_normalised() {
     mock.assert();
 }
 
+#[test]
+fn gateway_error_envelopes_are_normalised_as_http_errors() {
+    let mut server = Server::new();
+    let mock = server
+        .mock("POST", "/v1/chat/completions")
+        .with_status(502)
+        .with_header("content-type", "application/json")
+        .with_body(
+            r#"{
+                "request_id": "gateway-1",
+                "error": {
+                    "type": "no_responders",
+                    "message": "no NATS responders were listening on the target subject"
+                }
+            }"#,
+        )
+        .create();
+
+    let config = write_config(&openai_config(
+        &format!("{}/v1/chat/completions", server.url()),
+        60,
+    ));
+
+    let mut runtime = RuntimeHarness::spawn(config.path(), "openai_fast");
+    runtime.send(&RequestEnvelope {
+        request_id: "infer-gateway-http-error".to_owned(),
+        payload: RequestPayload::Infer(InferRequest {
+            system_prompt: "You summarise semantic diffs.".to_owned(),
+            user_prompt: "Summarise this change.".to_owned(),
+            response_mode: ResponseMode::Text,
+            temperature: Some(0.1),
+            max_output_tokens: Some(200),
+            metadata: None,
+        }),
+    });
+    let response = runtime.read();
+    assert_error(response, "provider_http_error");
+
+    runtime.send(&RequestEnvelope {
+        request_id: "shutdown-1".to_owned(),
+        payload: RequestPayload::Shutdown(ShutdownRequest {}),
+    });
+    let shutdown = runtime.read();
+    assert!(matches!(shutdown.payload, ResponsePayload::Shutdown(_)));
+
+    runtime.finish();
+    mock.assert();
+}
+
 #[test]
 fn malformed_json_object_is_reported() {
     let mut server = Server::new();