Skip to content

Commit d83789d

Browse files
committed
Bump version to 0.1.5 and enhance OpenAI chat completions provider with cache refresh handling
1 parent bc38820 commit d83789d

3 files changed

Lines changed: 151 additions & 17 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ members = [
66
resolver = "2"
77

88
[workspace.package]
9-
version = "0.1.4"
9+
version = "0.1.5"
1010
edition = "2024"
1111
license = "Apache-2.0"
1212

crates/bitloops-inference/src/provider/openai_chat_completions.rs

Lines changed: 148 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use bitloops_inference_protocol::{ProviderMetadata, ResponseMode, TokenUsage};
2-
use serde_json::{Value, json};
2+
use serde_json::{Map, Value, json};
33

44
use crate::config::ProfileConfig;
55
use crate::provider::{
@@ -11,6 +11,10 @@ pub struct OpenAiChatCompletionsProvider {
1111
profile: ProfileConfig,
1212
}
1313

14+
const BITLOOPS_PLATFORM_PROVIDER_NAME: &str = "bitloops";
15+
const REFRESH_CACHE_HEADER: &str = "X-Bitloops-Refresh-Cache";
16+
const REFRESH_CACHE_METADATA_FIELD: &str = "bitloops_refresh_cache";
17+
1418
impl OpenAiChatCompletionsProvider {
1519
pub fn new(profile: ProfileConfig) -> Self {
1620
Self { profile }
@@ -41,6 +45,47 @@ impl OpenAiChatCompletionsProvider {
4145
payload
4246
}
4347

48+
fn build_headers(&self, refresh_cache: bool) -> Vec<(&'static str, String)> {
49+
let mut headers = Vec::new();
50+
if let Some(api_key) = &self.profile.api_key {
51+
headers.push(("Authorization", format!("Bearer {api_key}")));
52+
}
53+
if refresh_cache {
54+
headers.push((REFRESH_CACHE_HEADER, "true".to_owned()));
55+
}
56+
headers
57+
}
58+
59+
fn infer_once(
60+
&self,
61+
request: &InferenceRequest,
62+
refresh_cache: bool,
63+
) -> Result<InferenceResponse, ProviderError> {
64+
let payload = self.build_payload(request);
65+
let headers = self.build_headers(refresh_cache);
66+
67+
let body = post_json(
68+
&self.profile.base_url,
69+
self.profile.timeout_secs,
70+
&headers,
71+
&payload,
72+
)?;
73+
74+
self.parse_response(body, request.response_mode)
75+
}
76+
77+
fn should_retry_with_cache_refresh(
78+
&self,
79+
request: &InferenceRequest,
80+
error: &ProviderError,
81+
already_refreshed: bool,
82+
) -> bool {
83+
!already_refreshed
84+
&& self.profile.provider_name == BITLOOPS_PLATFORM_PROVIDER_NAME
85+
&& request.response_mode == ResponseMode::JsonObject
86+
&& error.code == "invalid_provider_response"
87+
}
88+
4489
fn parse_response(
4590
&self,
4691
body: Value,
@@ -99,23 +144,27 @@ impl InferenceProvider for OpenAiChatCompletionsProvider {
99144
}
100145

101146
fn infer(&self, request: &InferenceRequest) -> Result<InferenceResponse, ProviderError> {
102-
let payload = self.build_payload(request);
103-
let mut headers = Vec::new();
104-
if let Some(api_key) = &self.profile.api_key {
105-
headers.push(("Authorization", format!("Bearer {api_key}")));
147+
let refresh_cache = metadata_requests_refresh_cache(request.metadata.as_ref());
148+
match self.infer_once(request, refresh_cache) {
149+
Ok(response) => Ok(response),
150+
Err(error) if self.should_retry_with_cache_refresh(request, &error, refresh_cache) => {
151+
self.infer_once(request, true)
152+
}
153+
Err(error) => Err(error),
106154
}
107-
108-
let body = post_json(
109-
&self.profile.base_url,
110-
self.profile.timeout_secs,
111-
&headers,
112-
&payload,
113-
)?;
114-
115-
self.parse_response(body, request.response_mode)
116155
}
117156
}
118157

158+
fn metadata_requests_refresh_cache(metadata: Option<&Map<String, Value>>) -> bool {
159+
metadata
160+
.and_then(|metadata| metadata.get(REFRESH_CACHE_METADATA_FIELD))
161+
.is_some_and(|value| match value {
162+
Value::Bool(value) => *value,
163+
Value::String(value) => value.eq_ignore_ascii_case("true"),
164+
_ => false,
165+
})
166+
}
167+
119168
fn extract_message_content(choice: &Value) -> Option<String> {
120169
let content = choice.get("message")?.get("content")?;
121170
if let Some(text) = content.as_str() {
@@ -170,6 +219,14 @@ mod tests {
170219
}
171220
}
172221

222+
fn bitloops_profile() -> ProfileConfig {
223+
ProfileConfig {
224+
provider_name: "bitloops".to_owned(),
225+
base_url: "https://platform.example.com/v1/chat/completions".to_owned(),
226+
..profile()
227+
}
228+
}
229+
173230
fn request(response_mode: ResponseMode) -> InferenceRequest {
174231
InferenceRequest {
175232
system_prompt: "You summarise diffs.".to_owned(),
@@ -181,6 +238,16 @@ mod tests {
181238
}
182239
}
183240

241+
fn request_with_metadata(
242+
response_mode: ResponseMode,
243+
metadata: Map<String, Value>,
244+
) -> InferenceRequest {
245+
InferenceRequest {
246+
metadata: Some(metadata),
247+
..request(response_mode)
248+
}
249+
}
250+
184251
#[test]
185252
fn builds_json_mode_payload() {
186253
let provider = OpenAiChatCompletionsProvider::new(profile());
@@ -193,6 +260,73 @@ mod tests {
193260
assert_eq!(payload["response_format"]["type"], "json_object");
194261
}
195262

263+
#[test]
264+
fn builds_text_mode_payload_without_response_format() {
265+
let provider = OpenAiChatCompletionsProvider::new(profile());
266+
let payload = provider.build_payload(&request(ResponseMode::Text));
267+
268+
assert_eq!(payload["model"], "gpt-4.1-mini");
269+
assert!(payload.get("response_format").is_none());
270+
}
271+
272+
#[test]
273+
fn metadata_refresh_flag_adds_refresh_cache_header() {
274+
let provider = OpenAiChatCompletionsProvider::new(profile());
275+
let request = request_with_metadata(
276+
ResponseMode::Text,
277+
Map::from_iter([(REFRESH_CACHE_METADATA_FIELD.to_owned(), json!(true))]),
278+
);
279+
280+
let headers =
281+
provider.build_headers(metadata_requests_refresh_cache(request.metadata.as_ref()));
282+
283+
assert!(
284+
headers
285+
.iter()
286+
.any(|(name, value)| *name == REFRESH_CACHE_HEADER && value == "true")
287+
);
288+
}
289+
290+
#[test]
291+
fn openai_profile_does_not_refresh_cache_by_default() {
292+
let provider = OpenAiChatCompletionsProvider::new(profile());
293+
294+
let headers = provider.build_headers(metadata_requests_refresh_cache(
295+
request(ResponseMode::JsonObject).metadata.as_ref(),
296+
));
297+
298+
assert!(
299+
headers
300+
.iter()
301+
.all(|(name, _)| *name != REFRESH_CACHE_HEADER)
302+
);
303+
}
304+
305+
#[test]
306+
fn bitloops_json_parse_failure_retries_once_with_refresh_cache() {
307+
let provider = OpenAiChatCompletionsProvider::new(bitloops_profile());
308+
let request = request(ResponseMode::JsonObject);
309+
let error = ProviderError::invalid_provider_response(
310+
"provider response did not contain JSON",
311+
None,
312+
);
313+
314+
assert!(provider.should_retry_with_cache_refresh(&request, &error, false));
315+
assert!(!provider.should_retry_with_cache_refresh(&request, &error, true));
316+
}
317+
318+
#[test]
319+
fn openai_json_parse_failure_does_not_retry_with_refresh_cache_by_default() {
320+
let provider = OpenAiChatCompletionsProvider::new(profile());
321+
let request = request(ResponseMode::JsonObject);
322+
let error = ProviderError::invalid_provider_response(
323+
"provider response did not contain JSON",
324+
None,
325+
);
326+
327+
assert!(!provider.should_retry_with_cache_refresh(&request, &error, false));
328+
}
329+
196330
#[test]
197331
fn parses_successful_response() {
198332
let provider = OpenAiChatCompletionsProvider::new(profile());

0 commit comments

Comments
 (0)