Skip to content

Commit a7db1cc

Browse files
committed
Document and test platform gateway OpenAI compatibility
1 parent 78be2b4 commit a7db1cc

3 files changed

Lines changed: 121 additions & 0 deletions

File tree

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,25 @@ max_output_tokens = 200
5151

5252
String fields support `${ENV_VAR}` interpolation. Missing environment variables fail validation immediately. Non-text-generation profiles in the same daemon config are ignored by `bitloops-inference`.
5353

54+
The public Bitloops platform gateway works through the same `openai_chat_completions` driver. Point `base_url` at the gateway’s chat-completions endpoint and set `api_key` to the shared bearer token:
55+
56+
```toml
57+
[inference.runtimes.bitloops_inference]
58+
request_timeout_secs = 300
59+
60+
[inference.profiles.platform_summary]
61+
task = "text_generation"
62+
driver = "openai_chat_completions"
63+
runtime = "bitloops_inference"
64+
model = "ministral-3-3b-instruct"
65+
base_url = "https://platform.example.com/v1/chat/completions"
66+
api_key = "${BITLOOPS_PLATFORM_GATEWAY_TOKEN}"
67+
temperature = "0.1"
68+
max_output_tokens = 200
69+
```
70+
71+
`bitloops-inference` treats the gateway as another OpenAI-compatible backend. No extra CLI flags, provider kind, or driver name are required.
72+
5473
## Supported drivers
5574

5675
- `openai_chat_completions`

crates/bitloops-inference/tests/cli_commands.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,56 @@ fn describe_profile_returns_protocol_shaped_json() {
101101
other => panic!("expected describe response, got {other:?}"),
102102
}
103103
}
104+
105+
#[test]
106+
fn describe_profile_preserves_gateway_endpoint_for_openai_driver() {
107+
let config = write_config(
108+
r#"
109+
[inference.runtimes.bitloops_inference]
110+
request_timeout_secs = 300
111+
112+
[inference.profiles.platform_summary]
113+
task = "text_generation"
114+
driver = "openai_chat_completions"
115+
runtime = "bitloops_inference"
116+
model = "ministral-3-3b-instruct"
117+
base_url = "https://platform.example.com/v1/chat/completions"
118+
api_key = "${BITLOOPS_PLATFORM_GATEWAY_TOKEN}"
119+
temperature = "0.1"
120+
max_output_tokens = 200
121+
"#,
122+
);
123+
124+
unsafe {
125+
std::env::set_var("BITLOOPS_PLATFORM_GATEWAY_TOKEN", "secret");
126+
}
127+
128+
let output = Command::cargo_bin("bitloops-inference")
129+
.expect("binary should exist")
130+
.arg("describe-profile")
131+
.arg("--config")
132+
.arg(config.path())
133+
.arg("--profile")
134+
.arg("platform_summary")
135+
.output()
136+
.expect("command should run");
137+
138+
assert!(output.status.success());
139+
let response = ResponseEnvelope::from_json_line(
140+
String::from_utf8(output.stdout)
141+
.expect("stdout should be utf-8")
142+
.trim_end(),
143+
)
144+
.expect("response should parse");
145+
146+
match response.payload {
147+
ResponsePayload::Describe(describe) => {
148+
assert_eq!(describe.profile_name, "platform_summary");
149+
assert_eq!(
150+
describe.provider.endpoint,
151+
"https://platform.example.com/v1/chat/completions"
152+
);
153+
}
154+
other => panic!("expected describe response, got {other:?}"),
155+
}
156+
}

crates/bitloops-inference/tests/protocol_loop.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,55 @@ fn http_errors_are_normalised() {
208208
mock.assert();
209209
}
210210

211+
#[test]
212+
fn gateway_error_envelopes_are_normalised_as_http_errors() {
213+
let mut server = Server::new();
214+
let mock = server
215+
.mock("POST", "/v1/chat/completions")
216+
.with_status(502)
217+
.with_header("content-type", "application/json")
218+
.with_body(
219+
r#"{
220+
"request_id": "gateway-1",
221+
"error": {
222+
"type": "no_responders",
223+
"message": "no NATS responders were listening on the target subject"
224+
}
225+
}"#,
226+
)
227+
.create();
228+
229+
let config = write_config(&openai_config(
230+
&format!("{}/v1/chat/completions", server.url()),
231+
60,
232+
));
233+
234+
let mut runtime = RuntimeHarness::spawn(config.path(), "openai_fast");
235+
runtime.send(&RequestEnvelope {
236+
request_id: "infer-gateway-http-error".to_owned(),
237+
payload: RequestPayload::Infer(InferRequest {
238+
system_prompt: "You summarise semantic diffs.".to_owned(),
239+
user_prompt: "Summarise this change.".to_owned(),
240+
response_mode: ResponseMode::Text,
241+
temperature: Some(0.1),
242+
max_output_tokens: Some(200),
243+
metadata: None,
244+
}),
245+
});
246+
let response = runtime.read();
247+
assert_error(response, "provider_http_error");
248+
249+
runtime.send(&RequestEnvelope {
250+
request_id: "shutdown-1".to_owned(),
251+
payload: RequestPayload::Shutdown(ShutdownRequest {}),
252+
});
253+
let shutdown = runtime.read();
254+
assert!(matches!(shutdown.payload, ResponsePayload::Shutdown(_)));
255+
256+
runtime.finish();
257+
mock.assert();
258+
}
259+
211260
#[test]
212261
fn malformed_json_object_is_reported() {
213262
let mut server = Server::new();

0 commit comments

Comments
 (0)