64bit
diff --git a/‎.github/workflows/pr-checks.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/pr-checks.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/wasm-pr-checks.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/wasm-pr-checks.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎async-openai-macros/Cargo.toml‎
Lines changed: 4 additions & 1 deletion b/‎async-openai-macros/Cargo.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎async-openai-macros/src/lib.rs‎
Lines changed: 13 additions & 1 deletion b/‎async-openai-macros/src/lib.rs‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎async-openai/Cargo.toml‎
Lines changed: 6 additions & 3 deletions b/‎async-openai/Cargo.toml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎async-openai/MIDDLEWARE.md‎
Lines changed: 112 additions & 0 deletions b/‎async-openai/MIDDLEWARE.md‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎async-openai/README.md‎
Lines changed: 6 additions & 1 deletion b/‎async-openai/README.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎async-openai/src/assistants/runs.rs‎
Lines changed: 4 additions & 6 deletions b/‎async-openai/src/assistants/runs.rs‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎async-openai/src/assistants/threads.rs‎
Lines changed: 2 additions & 3 deletions b/‎async-openai/src/assistants/threads.rs‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎async-openai/src/audio/speech.rs‎
Lines changed: 2 additions & 3 deletions b/‎async-openai/src/audio/speech.rs‎
Lines changed: 2 additions & 3 deletions
@@ -112,3 +112,9 @@ jobs:
 
       - name: Run clippy with feature ${{ matrix.feature }}
         run: cargo clippy --no-default-features --features ${{ matrix.feature }} -- -D warnings
+
+      - name: Build with feature ${{ matrix.feature }} and middleware
+        if: ${{ !contains(matrix.feature, 'types') }}
+        env:
+          RUSTFLAGS: "-D warnings"
+        run: cargo build --no-default-features --features ${{ matrix.feature }},middleware --verbose
@@ -14,7 +14,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        target: [wasm32-unknown-unknown, wasm32-wasip1]
+        target: [wasm32-unknown-unknown] #, wasm32-wasip1 (doesnt work with changes for middlewares)
         feature:
           [
             byot,
@@ -98,3 +98,9 @@ jobs:
         env:
           RUSTFLAGS: "-D warnings"
         run: cargo build --no-default-features --features ${{ matrix.feature }} --verbose --target ${{ matrix.target }}
+
+      - name: Build with feature ${{ matrix.feature }}, middleware, and target ${{ matrix.target }}
+        if: ${{ !contains(matrix.feature, 'types') }}
+        env:
+          RUSTFLAGS: "-D warnings"
+        run: cargo build --no-default-features --features ${{ matrix.feature }},middleware --verbose --target ${{ matrix.target }}
@@ -1,6 +1,6 @@
 [package]
 name = "async-openai-macros"
-version = "0.1.1"
+version = "0.2.0"
 authors = ["Himanshu Neema"]
 keywords = ["openai", "macros", "ai"]
 description = "Macros for async-openai"
@@ -14,6 +14,9 @@ readme = "README.md"
 [lib]
 proc-macro = true
 
+[features]
+middleware = []
+
 [dependencies]
 syn = { version = "2.0", features = ["full"] }
 quote = "1.0"
 
@@ -2,7 +2,7 @@ use proc_macro::TokenStream;
 use quote::{quote, ToTokens};
 use syn::{
     parse::{Parse, ParseStream},
-    parse_macro_input,
+    parse_macro_input, parse_quote,
     punctuated::Punctuated,
     token::Comma,
     FnArg, GenericParam, Generics, ItemFn, Pat, PatType, TypeParam, WhereClause,
@@ -57,6 +57,7 @@ pub fn byot(args: TokenStream, item: TokenStream) -> TokenStream {
     let input = parse_macro_input!(item as ItemFn);
     let mut new_generics = Generics::default();
     let mut param_count = 0;
+    let middleware_enabled = cfg!(feature = "middleware");
 
     // Process function arguments
     let mut new_params = Vec::new();
@@ -82,6 +83,17 @@ pub fn byot(args: TokenStream, item: TokenStream) -> TokenStream {
                         {
                             type_param.bounds.extend(vec![bound.clone()]);
                         }
+                        let needs_middleware_replay_bounds =
+                            bounds_args.bounds.iter().any(|(name, bound)| {
+                                name == &generic_name
+                                    && bound.to_token_stream().to_string().contains("Clone")
+                                    && !bound.to_token_stream().to_string().contains("Display")
+                            });
+                        if middleware_enabled && needs_middleware_replay_bounds {
+                            type_param
+                                .bounds
+                                .push(parse_quote!(crate::middleware::MiddlewareInput));
+                        }
 
                         new_params.push(GenericParam::Type(type_param));
                         param_count += 1;
 
@@ -49,6 +49,7 @@ chat-completion = ["chat-completion-types", "_api"]
 assistant = ["assistant-types", "_api", ]
 administration = ["administration-types", "_api"]
 completions = ["completion-types", "_api"]
+middleware = ["dep:tower", "_api", "async-openai-macros?/middleware"]
 
 # Type feature flags - these enable only the types
 response-types = ["dep:derive_builder"]
@@ -128,12 +129,12 @@ full = [
     "completions",
     "types",
     "byot",
+    "middleware",
 ]
 
 # Internal feature to enable API dependencies
 _api = [
     "dep:async-openai-macros",
-    "dep:backoff",
     "dep:base64",
     "dep:bytes",
     "dep:futures",
@@ -144,6 +145,7 @@ _api = [
     "dep:tokio-stream",
     "dep:tokio-util",
     "dep:tracing",
+    "dep:tower",
     "dep:secrecy",
     "dep:eventsource-stream",
     "dep:serde_urlencoded",
@@ -160,7 +162,7 @@ bytes = { version = "1.11", optional = true }
 
 # API dependencies - only needed when API features are enabled
 # We use a feature gate to enable these when any API feature is enabled
-async-openai-macros = { path = "../async-openai-macros", version = "0.1.1", optional = true }
+async-openai-macros = { path = "../async-openai-macros", version = "0.2.0", optional = true }
 base64 = { version = "0.22", optional = true }
 rand = { version = "0.9", optional = true }
 reqwest = { version = "0.13", features = [
@@ -174,14 +176,14 @@ tracing = { version = "0.1", optional = true }
 secrecy = { version = "0.10", features = ["serde"], optional = true }
 serde_urlencoded = { version = "0.7", optional = true }
 url = { version = "2.5", optional = true }
+tower = { version = "0.5", features = ["limit", "retry", "timeout", "util"], optional = true }
 ## For Webhook signature verification
 hmac = { version = "0.12", optional = true, default-features = false}
 sha2 = { version = "0.10", optional = true, default-features = false }
 hex = { version = "0.4", optional = true, default-features = false }
 
 ## API Non-WASM dependencies (streaming and retry is not implemented for WASM yet)
 [target.'cfg(not(target_family = "wasm"))'.dependencies]
-backoff = { version = "0.4.0", features = ["tokio"], optional = true }
 futures = { version = "0.3", optional = true }
 tokio = { version = "1", features = ["fs", "macros"], optional = true }
 tokio-stream = { version = "0.1", optional = true }
@@ -195,6 +197,7 @@ tokio-tungstenite = { version = "0.28", optional = true, default-features = fals
 getrandom = { version = "0.3", features = ["wasm_js"] }
 
 [dev-dependencies]
+http = "1"
 tokio-test = "0.4"
 serde_json = "1"
 
 
@@ -0,0 +1,112 @@
+# Tower based middlewares
+
+Enable the `middleware` feature to customize the HTTP execution path with Tower services and layers.
+
+The middleware boundary is intentionally below the API groups and above the concrete HTTP transport, an example middleware stack:
+
+```text
+async-openai API groups
+  responses(), chat(), files(), ...
+            |
+            v
+     HttpRequestFactory
+            |
+            v
++----- concurrency_limit ------+
+| +------- timeout ----------+ |
+| | +-- OpenAIRetryLayer --+ | |
+| | |                      | | |
+| | |  ReqwestService or   | | |
+| | |  custom service      | | |
+| | |                      | | |
+| | +-- OpenAIRetryLayer --+ | |
+| +------- timeout ----------+ |
++----- concurrency_limit ------+
+            |
+            v
+     reqwest::Response
+```
+
+The request value passed through tower is `HttpRequestFactory`, not `reqwest::Request`. This is deliberate: `reqwest::Request` is not generally cloneable once it contains a streaming body, but retry middleware needs a way to replay a request. The factory is cheap to clone and rebuilds a fresh `reqwest::Request` for each attempt.
+
+## Use the Default `ReqwestService`
+
+`ReqwestService` is a tower service backed by `reqwest::Client`. It is used by default to make outbound HTTP requests.
+
+```rust
+use async_openai::{Client, config::OpenAIConfig};
+use async_openai::middleware::{retry::OpenAIRetryLayer, ReqwestService};
+use std::time::Duration;
+
+let service = tower::ServiceBuilder::new()
+    .concurrency_limit(8)
+    .timeout(Duration::from_secs(30))
+    .layer(OpenAIRetryLayer::default())
+    .service(ReqwestService::new(reqwest::Client::new()));
+
+let client = Client::with_config(OpenAIConfig::default())
+    .with_http_service(service);
+```
+
+## Use a Custom Service
+
+You can replace `ReqwestService` entirely. This is useful for logging, metrics, tests, mocks, alternate transports, or policy layers that want to inspect the generated request before sending it.
+
+```rust
+use async_openai::{Client, config::OpenAIConfig, error::OpenAIError};
+use async_openai::middleware::HttpRequestFactory;
+use tower::service_fn;
+
+let service = service_fn(|factory: HttpRequestFactory| async move {
+    let request = factory.build().await?;
+
+    // here you can inspect, modify, or log the request, route it somewhere else,
+    // or return a synthetic response for testing.
+
+    println!("sending {} {}", request.method(), request.url());
+
+    reqwest::Client::new()
+        .execute(request)
+        .await
+        .map_err(OpenAIError::Reqwest)
+});
+
+let client = Client::with_config(OpenAIConfig::default())
+    .with_http_service(service);
+```
+
+## Retry layer
+
+`middleware::retry::OpenAIRetryLayer` is a Tower layer and `middleware::retry::SimpleRetryPolicy` is a Tower retry policy.
+
+Both attempt retries with exponential backoff on `429`, `5xx` and connection errors and respects `Retry-After` header.
+
+The difference is that upon seeing 429, `OpenAIRetryLayer` consumes response body to check if it is a rate limit (retryable error) or insufficient quota (permanent error). The default async-openai client uses this layer internally for library's default retry behavior.
+
+The retry boundary is `HttpRequestFactory`. Retrying clones the factory and rebuilds a fresh `reqwest::Request` for each attempt instead of cloning a built request. That matters because `reqwest::Request` is not Clone.
+
+`middleware::retry::SimpleRetryPolicy` uses `middleware::retry::should_retry` to determine if a request should be retried.
+
+Custom tower retry policies can call `middleware::retry::should_retry` to reuse the same retry classification while changing delay behavior.
+
+On native targets retries wait using `tokio::time::sleep`. On WASM retries are immediate.
+
+## Native and WASM bounds
+
+The conceptual middleware boundary stays the same; only the platform thread-safety bounds differ.
+
+On native targets, middleware services installed with `Client::with_http_service` must be `Send + Sync + 'static` and return `Send + 'static` futures.
+
+On WASM targets, middleware services and futures must be `'static`.
+
+## Bring Your Own Types Interaction
+
+With the `byot` feature, generated `*_byot` methods keep minimal trait bounds. When `middleware` feature is enabled additional `MiddlewareInput` bounds are added based on native or WASM targets so the input can be stored long enough to rebuild a fresh request for retries.
+
+## Error Handling
+
+`OpenAIError::Boxed` is available only when the `middleware` feature is enabled.
+
+Custom middleware services installed with `Client::with_http_service` may use any error type that implements `Into<OpenAIError>`. This lets middleware preserve structured errors when it has a dedicated `OpenAIError` conversion.
+
+Tower's `BoxError` converts into `OpenAIError::Boxed`, which is useful for generic tower layers whose concrete error type is erased. Callers can still downcast the boxed error when they know the original error type.
@@ -44,7 +44,8 @@ Features that makes `async-openai` unique:
 - Ergonomic builder pattern for all request objects.
 - Granular feature flags to enable any types or apis: good for faster compilation and crate reuse.
 - Microsoft Azure OpenAI Service (only for APIs matching OpenAI spec).
-- WASM (doesn't include streaming and retry support yet)
+- WASM (doesn't support streaming yet)
+- Middleware support with [tower](https://crates.io/crates/tower) ecosystem
 
 ## Usage
 
@@ -236,6 +237,10 @@ fn chat_completion(client: &Client<Box<dyn Config>>) {
 }
 ```
 
+## Middleware
+
+Middleware is supported via Tower ecosystem, which can be enabled with `middleware` feature. See [middleware](https://github.com/64bit/async-openai/blob/main/async-openai/MIDDLEWARE.md) for more detail.
+
 ## Contributing
 
 🎉 Thank you for taking the time to contribute and improve the project. I'd be happy to have you!
 
@@ -76,15 +76,14 @@ impl<'c, C: Config> Runs<'c, C> {
             request.stream = Some(true);
         }
 
-        Ok(self
-            .client
+        self.client
             .post_stream_mapped_raw_events(
                 &format!("/threads/{}/runs", self.thread_id),
                 request,
                 &self.request_options,
                 TryFrom::try_from,
             )
-            .await)
+            .await
     }
 
     /// Retrieves a run.
@@ -170,8 +169,7 @@ impl<'c, C: Config> Runs<'c, C> {
             request.stream = Some(true);
         }
 
-        Ok(self
-            .client
+        self.client
             .post_stream_mapped_raw_events(
                 &format!(
                     "/threads/{}/runs/{run_id}/submit_tool_outputs",
@@ -181,7 +179,7 @@ impl<'c, C: Config> Runs<'c, C> {
                 &self.request_options,
                 TryFrom::try_from,
             )
-            .await)
+            .await
     }
 
     /// Cancels a run that is `in_progress`
 
@@ -76,15 +76,14 @@ impl<'c, C: Config> Threads<'c, C> {
 
             request.stream = Some(true);
         }
-        Ok(self
-            .client
+        self.client
             .post_stream_mapped_raw_events(
                 "/threads/runs",
                 request,
                 &self.request_options,
                 TryFrom::try_from,
             )
-            .await)
+            .await
     }
 
     /// Create a thread.
 
@@ -60,9 +60,8 @@ impl<'c, C: Config> Speech<'c, C> {
 
             request.stream_format = Some(StreamFormat::SSE);
         }
-        Ok(self
-            .client
+        self.client
             .post_stream("/audio/speech", request, &self.request_options)
-            .await)
+            .await
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -76,15 +76,14 @@ impl<'c, C: Config> Runs<'c, C> {`
`76`	`76`	`request.stream = Some(true);`
`77`	`77`	`}`
`78`	`78`
`79`		`- Ok(self`
`80`		`- .client`
	`79`	`+ self.client`
`81`	`80`	`.post_stream_mapped_raw_events(`
`82`	`81`	`&format!("/threads/{}/runs", self.thread_id),`
`83`	`82`	`request,`
`84`	`83`	`&self.request_options,`
`85`	`84`	`TryFrom::try_from,`
`86`	`85`	`)`
`87`		`- .await)`
	`86`	`+ .await`
`88`	`87`	`}`
`89`	`88`
`90`	`89`	`/// Retrieves a run.`
`@@ -170,8 +169,7 @@ impl<'c, C: Config> Runs<'c, C> {`
`170`	`169`	`request.stream = Some(true);`
`171`	`170`	`}`
`172`	`171`
`173`		`- Ok(self`
`174`		`- .client`
	`172`	`+ self.client`
`175`	`173`	`.post_stream_mapped_raw_events(`
`176`	`174`	`&format!(`
`177`	`175`	`"/threads/{}/runs/{run_id}/submit_tool_outputs",`
`@@ -181,7 +179,7 @@ impl<'c, C: Config> Runs<'c, C> {`
`181`	`179`	`&self.request_options,`
`182`	`180`	`TryFrom::try_from,`
`183`	`181`	`)`
`184`		`- .await)`
	`182`	`+ .await`
`185`	`183`	`}`
`186`	`184`
`187`	`185`	/// Cancels a run that is `in_progress`
Original file line number	Diff line number	Diff line change
`@@ -60,9 +60,8 @@ impl<'c, C: Config> Speech<'c, C> {`
`60`	`60`
`61`	`61`	`request.stream_format = Some(StreamFormat::SSE);`
`62`	`62`	`}`
`63`		`- Ok(self`
`64`		`- .client`
	`63`	`+ self.client`
`65`	`64`	`.post_stream("/audio/speech", request, &self.request_options)`
`66`		`- .await)`
	`65`	`+ .await`
`67`	`66`	`}`
`68`	`67`	`}`