Date: Thu, 21 May 2026 14:32:25 +0200
Subject: [PATCH 29/43] feat: add AI LLM Gateway tests for Claude, OpenAI, and
Google Gemini tutorials
- Introduced `AbstractAiTutorialTest` base class and provider-specific extensions for easier test creation.
- Added integration tests for basic gateway setups and API key sharing for Claude, OpenAI, and Google Gemini.
- Simulated upstream mock APIs to enable testing token limits, key handling, and input/output transformations.
---
.../ai/provider/google/GoogleLLMRequest.java | 5 +
.../interceptor/mcp/ExchangeToolSupport.java | 2 +-
.../ai/llmgateway/AbstractAiTutorialTest.java | 151 ++++++++++++
.../BasicClaudeLLMGatewayTutorialTest.java | 114 +++++++++
.../claude/SharingApiKeysTutorialTest.java | 223 ++++++++++++++++++
.../google/AbstractGoogleTutorialTest.java | 58 +++++
.../BasicGoogleLLMGatewayTutorialTest.java | 109 +++++++++
.../SharingApiKeysGoogleTutorialTest.java | 219 +++++++++++++++++
.../openai/AbstractOpenAiTutorialTest.java | 61 +++++
.../BasicOpenAiLLMGatewayTutorialTest.java | 105 +++++++++
.../SharingApiKeysOpenAiTutorialTest.java | 208 ++++++++++++++++
11 files changed, 1254 insertions(+), 1 deletion(-)
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java
index da0b174465..07da55b089 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java
@@ -34,7 +34,12 @@ public String getModel() {
var modelPart = uri.substring(modelsIndex + "/models/".length());
+ // Support both ':' and URL-encoded '%3A' / '%3a' as separator before the action suffix
+ // (e.g. ':generateContent' or '%3AgenerateContent').
int colonIndex = modelPart.indexOf(':');
+ if (colonIndex < 0) {
+ colonIndex = modelPart.toLowerCase().indexOf("%3a");
+ }
if (colonIndex >= 0) {
return modelPart.substring(0, colonIndex);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java
index 6bfb14c5c8..ed9f1608d8 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java
@@ -17,7 +17,6 @@
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.predic8.membrane.core.exchange.AbstractExchange;
-import com.predic8.membrane.core.interceptor.mcp.MCPUtil.*;
import com.predic8.membrane.core.mcp.MCPToolsCall;
import com.predic8.membrane.core.mcp.MCPToolsCallResponse;
import org.jetbrains.annotations.Nullable;
@@ -27,6 +26,7 @@
import java.util.*;
import static com.predic8.membrane.core.interceptor.mcp.ExchangeUtils.matchesExchangeFilter;
+import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.*;
import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.integer;
import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.string;
import static java.lang.Integer.MAX_VALUE;
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
new file mode 100644
index 0000000000..fb5a37a07c
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
@@ -0,0 +1,151 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.AbstractInterceptor;
+import com.predic8.membrane.core.interceptor.Outcome;
+import com.predic8.membrane.core.interceptor.flow.ReturnInterceptor;
+import com.predic8.membrane.core.interceptor.templating.StaticInterceptor;
+import com.predic8.membrane.core.proxies.ServiceProxy;
+import com.predic8.membrane.core.proxies.ServiceProxyKey;
+import com.predic8.membrane.core.router.DefaultRouter;
+import com.predic8.membrane.examples.util.DistributionExtractingTestcase;
+import com.predic8.membrane.examples.util.Process2;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+
+import java.util.function.Consumer;
+
+/**
+ * Base class for AI tutorial tests. Starts a local Membrane mock of the upstream LLM API
+ * so tests run without a real API key and without network access to the LLM provider.
+ *
+ * The tutorial YAML's {@code target.url} is rewritten to point at the mock server
+ * before Membrane starts. Subclasses override {@link #getTutorialDir()} and
+ * {@link #getTutorialYaml()} to select the tutorial under test.
+ *
+ *
JUnit 5 lifecycle ordering guarantees that {@code DistributionExtractingTestcase.init()}
+ * (superclass {@code @BeforeEach}) runs first and sets {@code baseDir}, allowing
+ * {@link #startGateway()} to use {@code replaceInFile2()} safely.
+ */
+public abstract class AbstractAiTutorialTest extends DistributionExtractingTestcase {
+
+ protected static final int MOCK_LLM_PORT = 3100;
+
+ protected Process2 process;
+ protected volatile String lastRequestBody;
+ protected volatile String lastRequestApiKey;
+
+ private DefaultRouter mockRouter;
+
+ protected abstract String getTutorialDir();
+ protected abstract String getTutorialYaml();
+
+ @Override
+ protected String getExampleDirName() {
+ return "../tutorials/%s".formatted(getTutorialDir());
+ }
+
+ @Override
+ protected String getParameters() {
+ return "-c %s".formatted(getTutorialYaml());
+ }
+
+ /**
+ * Runs after {@code DistributionExtractingTestcase.init()} sets {@code baseDir}.
+ * Starts the mock, patches the YAML, then starts Membrane.
+ */
+ @BeforeEach
+ void startGateway() throws Exception {
+ startMockLlmApi();
+ replaceInFile2(getTutorialYaml(), getUpstreamApiUrl(), mockApiUrl());
+ process = startServiceProxyScript();
+ }
+
+ @AfterEach
+ void stopGateway() {
+ if (process != null)
+ process.killScript();
+ if (mockRouter != null)
+ mockRouter.stop();
+ }
+
+ /**
+ * The upstream API URL used in the tutorial YAML (to be replaced by the mock URL).
+ */
+ protected String getUpstreamApiUrl() {
+ return "https://api.anthropic.com";
+ }
+
+ protected String mockApiUrl() {
+ return "http://localhost:" + MOCK_LLM_PORT;
+ }
+
+ /**
+ * The HTTP header name from which the upstream API key is read when capturing
+ * requests in the mock. Defaults to {@code "x-api-key"} (Claude). Override to
+ * {@code "authorization"} for OpenAI or {@code "x-goog-api-key"} for Google.
+ */
+ protected String getApiKeyHeader() {
+ return "x-api-key";
+ }
+
+ private void startMockLlmApi() throws Exception {
+ var si = new StaticInterceptor();
+ si.setSrc(mockResponse());
+ si.setContentType("application/json");
+
+ var sp = new ServiceProxy(new ServiceProxyKey(MOCK_LLM_PORT), null, 0);
+ sp.getFlow().add(new BodyCaptureInterceptor(
+ body -> lastRequestBody = body,
+ apiKey -> lastRequestApiKey = apiKey,
+ getApiKeyHeader()));
+ sp.getFlow().add(si);
+ sp.getFlow().add(new ReturnInterceptor());
+
+ mockRouter = new DefaultRouter();
+ mockRouter.add(sp);
+ mockRouter.start();
+ }
+
+ private static class BodyCaptureInterceptor extends AbstractInterceptor {
+
+ private final Consumer bodySink;
+ private final Consumer apiKeySink;
+ private final String apiKeyHeader;
+
+ BodyCaptureInterceptor(Consumer bodySink, Consumer apiKeySink, String apiKeyHeader) {
+ this.bodySink = bodySink;
+ this.apiKeySink = apiKeySink;
+ this.apiKeyHeader = apiKeyHeader;
+ }
+
+ @Override
+ public Outcome handleRequest(Exchange exc) {
+ bodySink.accept(exc.getRequest().getBodyAsStringDecoded());
+ apiKeySink.accept(exc.getRequest().getHeader().getFirstValue(apiKeyHeader));
+ return Outcome.CONTINUE;
+ }
+ }
+
+ protected String mockResponse() {
+ return """
+ {"id":"msg_mock","type":"message","role":"assistant",\
+ "content":[{"type":"text","text":"I am a mock."}],\
+ "model":"claude-sonnet-4-0","stop_reason":"end_turn",\
+ "usage":{"input_tokens":10,"output_tokens":5}}""";
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..3cde3fa976
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
@@ -0,0 +1,114 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.claude;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Integration test for {@code distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml}.
+ *
+ * The tutorial configures a Claude LLM gateway with:
+ *
+ * - {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
+ * - {@code maxOutputTokens: 200} — {@code max_tokens} in the forwarded request is capped to 200
+ *
+ *
+ * The upstream Anthropic API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicClaudeLLMGatewayTutorialTest extends AbstractAiTutorialTest {
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/claude";
+ }
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"))
+ .body("content[0].type", equalTo("text"));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the Claude error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("tokens"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code max_tokens} to 200 before forwarding to the upstream.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("max_tokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
new file mode 100644
index 0000000000..33174d6ac8
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
@@ -0,0 +1,223 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.claude;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml}.
+ *
+ *
The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ *
+ * - alice — key {@code abc123}, budget 250 tokens
+ * - bob — key {@code qwertz}, budget 10 000 tokens
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code claude-sonnet-4-0}, {@code claude-opus-4-0}, {@code claude-haiku-3-5}.
+ */
+public class SharingApiKeysTutorialTest extends AbstractAiTutorialTest {
+
+ private static final String ALICE = "abc123";
+ private static final String BOB = "qwertz";
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/claude";
+ }
+
+ @Override
+ protected String getTutorialYaml() {
+ return "20-Sharing-API-Keys.yaml";
+ }
+
+ @Test
+ void aliceCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+
+ @Test
+ void bobCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", BOB)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+
+ @Test
+ void unknownApiKeyIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "invalid-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(401)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("authentication_error"));
+ // @formatter:on
+ }
+
+ /**
+ * The gateway is configured with its own upstream {@code apiKey}. When a user request
+ * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must
+ * replace it with the configured upstream key before forwarding to the LLM provider.
+ */
+ @Test
+ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(lastRequestApiKey, not(equalTo(ALICE)));
+ assertThat(lastRequestApiKey, equalTo("<>"));
+ }
+
+ @Test
+ void wrongModelIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("wrong-model.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("gpt-5"))
+ .body("error.message", containsString("not allowed"));
+ // @formatter:on
+ }
+
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("prompt is too long"))
+ .body("error.message", containsString("100 maximum"));
+ // @formatter:on
+ }
+
+ /**
+ * Alice has a budget of 250 tokens. Each request with {@code max-output.json} projects
+ * 7 (input estimate) + 200 (capped max_tokens) = 207 tokens. The mock returns 15 tokens
+ * of actual usage per call, so the running total grows by 15 after each response.
+ *
+ * Budget accounting per request:
+ *
+ * 1st: 250 - 0 - 207 = 43 → forwarded; used becomes 15
+ * 2nd: 250 - 15 - 207 = 28 → forwarded; used becomes 30
+ * 3rd: 250 - 30 - 207 = 13 → forwarded; used becomes 45
+ * 4th: 250 - 45 - 207 = -2 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(429)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("rate_limit_error"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("x-api-key", BOB)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java
new file mode 100644
index 0000000000..4e39f7ae6c
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java
@@ -0,0 +1,58 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+
+/**
+ * Base class for Google Gemini LLM-Gateway tutorial tests.
+ *
+ * Overrides the upstream URL and the API-key header so the mock captures
+ * the {@code x-goog-api-key} header that Google uses. The mock response is
+ * formatted as a Gemini {@code generateContent} reply and reports 100 total
+ * tokens (50 prompt + 50 candidates) per call.
+ */
+public abstract class AbstractGoogleTutorialTest extends AbstractAiTutorialTest {
+
+ /** URL prefix used in both Google tutorial YAML files. */
+ @Override
+ protected String getUpstreamApiUrl() {
+ return "https://generativelanguage.googleapis.com";
+ }
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/google";
+ }
+
+ /** Google authenticates via the {@code x-goog-api-key} header. */
+ @Override
+ protected String getApiKeyHeader() {
+ return "x-goog-api-key";
+ }
+
+ /**
+ * Minimal Gemini {@code generateContent} reply with 50 prompt + 50 candidates = 100 total
+ * tokens. The higher per-request cost keeps the token-budget exhaustion test to three
+ * successful requests before alice's 500-token allowance runs out.
+ */
+ @Override
+ protected String mockResponse() {
+ return """
+ {"candidates":[{"content":{"parts":[{"text":"I am a mock."}],"role":"model"},\
+ "finishReason":"STOP"}],\
+ "usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":50,"totalTokenCount":100}}""";
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..16f52d470b
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java
@@ -0,0 +1,109 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Integration test for
+ * {@code distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml}.
+ *
+ *
The tutorial configures a Google Gemini LLM gateway with:
+ *
+ * - {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
+ * - {@code maxOutputTokens: 200} — {@code generationConfig.maxOutputTokens} in the forwarded
+ * request is capped to 200
+ *
+ *
+ * The upstream Google Gemini API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicGoogleLLMGatewayTutorialTest extends AbstractGoogleTutorialTest {
+
+ private static final String GEMINI_ENDPOINT =
+ LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the Google error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(400)
+ .body("error.status", equalTo("INVALID_ARGUMENT"))
+ .body("error.message", containsString("exceeds the maximum allowed"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code generationConfig.maxOutputTokens} to 200 before forwarding.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("generationConfig.maxOutputTokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
new file mode 100644
index 0000000000..567da88b9e
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
@@ -0,0 +1,219 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml}.
+ *
+ *
The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ *
+ * - alice — key {@code abc123}, budget 500 tokens
+ * - bob — key {@code qwertz}, budget 10 000 tokens
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code gemini-2.5-pro}, {@code gemini-2.5-flash}, {@code gemini-2.5-flash-lite},
+ * {@code gemini-2.0-flash}, {@code gemini-2.0-flash-lite}.
+ *
+ * For Google Gemini the model is part of the URL path
+ * ({@code /v1beta/models/:generateContent}), not the request body.
+ */
+public class SharingApiKeysGoogleTutorialTest extends AbstractGoogleTutorialTest {
+
+ private static final String ALICE = "abc123";
+ private static final String BOB = "qwertz";
+
+ private static final String GEMINI_FLASH_ENDPOINT =
+ LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "20-Sharing-API-Keys.yaml";
+ }
+
+ @Test
+ void aliceCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+
+ @Test
+ void bobCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+
+ @Test
+ void unknownApiKeyIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "invalid-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(401)
+ .body("error.status", equalTo("UNAUTHENTICATED"))
+ .body("error.message", containsString("Invalid API key"));
+ // @formatter:on
+ }
+
+ /**
+ * The gateway is configured with its own upstream {@code apiKey}. When a user request
+ * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must
+ * replace it with the configured upstream key before forwarding to the LLM provider.
+ * For Google Gemini, the key is carried in the {@code x-goog-api-key} header.
+ */
+ @Test
+ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .log().ifValidationFails()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(lastRequestApiKey, not(equalTo(ALICE)));
+ assertThat(lastRequestApiKey, equalTo("<>"));
+ }
+
+ /**
+ * For Google Gemini the model is extracted from the URL path. Sending a request to
+ * {@code /v1beta/models/gpt-5:generateContent} uses model {@code gpt-5}, which is not
+ * in the allowed list, so the gateway rejects it with 400.
+ */
+ @Test
+ void wrongModelIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1beta/models/gpt-5:generateContent")
+ .then()
+ .statusCode(400)
+ .body("error.status", equalTo("INVALID_ARGUMENT"))
+ .body("error.message", containsString("gpt-5"))
+ .body("error.message", containsString("not allowed"));
+ // @formatter:on
+ }
+
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(400)
+ .body("error.status", equalTo("INVALID_ARGUMENT"))
+ .body("error.message", containsString("exceeds the maximum allowed"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects
+ * 9 (input estimate) + 200 (capped maxOutputTokens) = 209 tokens. The mock returns
+ * 100 tokens of actual usage per call, so the running total grows by 100 after each response.
+ *
+ * Budget accounting per request:
+ *
+ * 1st: 500 - 0 - 209 = 291 → forwarded; used becomes 100
+ * 2nd: 500 - 100 - 209 = 191 → forwarded; used becomes 200
+ * 3rd: 500 - 200 - 209 = 91 → forwarded; used becomes 300
+ * 4th: 500 - 300 - 209 = -9 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(429)
+ .body("error.status", equalTo("RESOURCE_EXHAUSTED"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java
new file mode 100644
index 0000000000..54136f4c2f
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java
@@ -0,0 +1,61 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+
+/**
+ * Base class for OpenAI LLM-Gateway tutorial tests.
+ *
+ * Overrides the upstream URL and the API-key header so the mock captures
+ * the {@code Authorization} header that OpenAI uses instead of {@code x-api-key}.
+ * The mock response is formatted as an OpenAI Responses-API reply and reports
+ * 100 total tokens (50 input + 50 output) per call.
+ */
+public abstract class AbstractOpenAiTutorialTest extends AbstractAiTutorialTest {
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/openai";
+ }
+
+ @Override
+ protected String getUpstreamApiUrl() {
+ return "https://api.openai.com";
+ }
+
+ /**
+ * OpenAI authenticates via {@code Authorization: Bearer }.
+ * The full header value (including the "Bearer " prefix) is captured.
+ */
+ @Override
+ protected String getApiKeyHeader() {
+ return "authorization";
+ }
+
+ /**
+ * Minimal OpenAI Responses-API reply with 50 input + 50 output = 100 total tokens.
+ * The higher per-request cost (vs. the default Claude mock) keeps the token-budget
+ * exhaustion test to three successful requests before alice's 500-token allowance runs out.
+ */
+ @Override
+ protected String mockResponse() {
+ return """
+ {"id":"resp_mock","object":"response","model":"gpt-5-nano",\
+ "output":[{"type":"message","role":"assistant",\
+ "content":[{"type":"output_text","text":"I am a mock."}]}],\
+ "usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}""";
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..6dd96ee098
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java
@@ -0,0 +1,105 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Integration test for
+ * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}.
+ *
+ * The tutorial configures an OpenAI LLM gateway with:
+ *
+ * - {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
+ * - {@code maxOutputTokens: 200} — {@code max_output_tokens} in the forwarded request is capped to 200
+ *
+ *
+ * The upstream OpenAI API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest {
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the OpenAI error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(400)
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.code", equalTo("context_length_exceeded"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code max_output_tokens} to 200 before forwarding to the upstream.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
new file mode 100644
index 0000000000..7bd410fc24
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
@@ -0,0 +1,208 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml}.
+ *
+ *
The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ *
+ * - alice — key {@code abc123}, budget 500 tokens
+ * - bob — key {@code qwertz}, budget 10 000 tokens
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code gpt-5.4}, {@code gpt-5-nano}, {@code gpt-5-mini}.
+ */
+public class SharingApiKeysOpenAiTutorialTest extends AbstractOpenAiTutorialTest {
+
+ private static final String ALICE = "abc123";
+ private static final String BOB = "qwertz";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "20-Sharing-API-Keys.yaml";
+ }
+
+ @Test
+ void aliceCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ @Test
+ void bobCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ @Test
+ void unknownApiKeyIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer invalid-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(401)
+ .body("error.code", equalTo("invalid_authentication"));
+ // @formatter:on
+ }
+
+ /**
+ * The gateway is configured with its own upstream {@code apiKey}. When a user request
+ * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must
+ * replace it with the configured upstream key before forwarding to the LLM provider.
+ * For OpenAI, the key is carried in the {@code Authorization: Bearer } header.
+ */
+ @Test
+ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(lastRequestApiKey, not(equalTo("Bearer " + ALICE)));
+ assertThat(lastRequestApiKey, equalTo("Bearer <>"));
+ }
+
+ @Test
+ void wrongModelIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("wrong-model.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(400)
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.code", equalTo("model_not_allowed"))
+ .body("error.message", containsString("gpt-4"))
+ .body("error.message", containsString("not allowed"));
+ // @formatter:on
+ }
+
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(400)
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.code", equalTo("context_length_exceeded"))
+ .body("error.message", containsString("maximum context length"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects
+ * 9 (input estimate) + 200 (capped max_output_tokens) = 209 tokens. The mock returns
+ * 100 tokens of actual usage per call, so the running total grows by 100 after each response.
+ *
+ * Budget accounting per request:
+ *
+ * 1st: 500 - 0 - 209 = 291 → forwarded; used becomes 100
+ * 2nd: 500 - 100 - 209 = 191 → forwarded; used becomes 200
+ * 3rd: 500 - 200 - 209 = 91 → forwarded; used becomes 300
+ * 4th: 500 - 300 - 209 = -9 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(429)
+ .body("error.type", equalTo("rate_limit_error"))
+ .body("error.code", equalTo("token_limit_exceeded"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+}
From 9d38d849ef3afb0d3aa6b5ba9d21454dd2713c22 Mon Sep 17 00:00:00 2001
From: thomas
Date: Thu, 21 May 2026 15:54:03 +0200
Subject: [PATCH 30/43] feat: improve token handling, configuration validation,
and examples for LLM Gateway
- Ensure thread-safe access to users in `SimpleAiApiStore` with `List.copyOf`.
- Introduce `visibleRemaining` to handle non-negative token values in `GoogleErrorCreator`.
- Add configuration validation in `LLMGatewayInterceptor` to enforce API key substitution.
- Enhance token limit handling to adjust output tokens dynamically in `LLMGatewayInterceptor`.
- Update Google and Claude tutorials with clearer instructions for API key usage and token limits.
---
.../interceptor/ai/LLMGatewayInterceptor.java | 28 +++++++++++++++----
.../provider/google/GoogleErrorCreator.java | 4 ++-
.../ai/store/SimpleAiApiStore.java | 2 +-
.../claude/20-Sharing-API-Keys.yaml | 2 +-
.../google/10-Basic-LLM-Gateway.yaml | 2 +-
.../google/20-Sharing-API-Keys.yaml | 2 +-
6 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
index 41c536c39b..8a63100c08 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
@@ -11,6 +11,7 @@
import com.predic8.membrane.core.interceptor.ai.provider.LLMRequest;
import com.predic8.membrane.core.interceptor.ai.store.AiApiStore;
import com.predic8.membrane.core.interceptor.ai.store.AiApiUser;
+import com.predic8.membrane.core.util.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -55,6 +56,11 @@ public void init() {
errorCreator = provider.getErrorCreator();
if (store != null)
store.init(router);
+
+ // Check if the replacement markers are still there
+ if (apiKey.contains("<<") && apiKey.contains(">>")) {
+ throw new ConfigurationException("The configuration contains the replacement marker %s. Substitute it with the API key of the model.".formatted(apiKey));
+ }
}
@Override
@@ -69,7 +75,8 @@ public Outcome handleRequest(Exchange exc) {
}
if (!exc.getRequest().isPOSTRequest()) {
- aiReq.setApiKey(apiKey);
+ if (apiKey != null)
+ aiReq.setApiKey(apiKey);
return CONTINUE;
}
@@ -90,7 +97,7 @@ public Outcome handleRequest(Exchange exc) {
// Check store limits
if (store != null) {
- var effectiveMaxTokens = Math.min(aiReq.getRequestedMaxOutputTokens(), maxOutputTokens);
+ var effectiveMaxTokens = computeEffectiveMaxOutputTokens(aiReq.getRequestedMaxOutputTokens(), maxOutputTokens);
var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens);
log.debug("User {} has {} remaining tokens left", user, remaining);
if (remaining <= 0) {
@@ -109,9 +116,14 @@ public Outcome handleRequest(Exchange exc) {
var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
- if (maxOutputTokens != 0 && (requestedMaxOutputTokens == -1 || requestedMaxOutputTokens > maxOutputTokens)) {
- log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens);
- aiReq.setMaxOutputTokens(maxOutputTokens);
+ if (maxOutputTokens > 0) {
+ if (requestedMaxOutputTokens == -1) {
+ log.info("No max. output requested. Setting limit to {}.", maxOutputTokens);
+ aiReq.setMaxOutputTokens(maxOutputTokens);
+ } else if (requestedMaxOutputTokens > maxOutputTokens) {
+ log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens);
+ aiReq.setMaxOutputTokens(maxOutputTokens);
+ }
}
if (maxInputTokens != 0) {
@@ -136,6 +148,12 @@ public Outcome handleRequest(Exchange exc) {
return CONTINUE;
}
+ long computeEffectiveMaxOutputTokens(long requestedMaxOutputTokens, long maxOutputTokens) {
+ if (requestedMaxOutputTokens <= 0)
+ return maxOutputTokens;
+ return Math.min(requestedMaxOutputTokens, maxOutputTokens);
+ }
+
@Override
public Outcome handleResponse(Exchange exc) {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java
index 281a314594..92a115194a 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java
@@ -21,6 +21,8 @@ public Response tokenLimitExceeded(long tokenRequired,
long tokenRemaining,
long tokenResetInSeconds) {
+ var visibleRemaining = Math.max(0, tokenRemaining);
+
return statusCode(429).json(
envelope(
429,
@@ -29,7 +31,7 @@ public Response tokenLimitExceeded(long tokenRequired,
Request requires %d tokens but only %d remain.
Retry after %d seconds.
"""
- .formatted(tokenRequired, tokenRemaining, tokenResetInSeconds)
+ .formatted(tokenRequired, visibleRemaining, tokenResetInSeconds)
.trim(),
"RESOURCE_EXHAUSTED"
)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java
index 0793a2c75d..f9b8218608 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java
@@ -87,7 +87,7 @@ public void setUsers(List users) {
public List getUsers() {
synchronized (lock) {
- return users;
+ return List.copyOf(users);
}
}
diff --git a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
index 6cb3f0c0b3..44bc28a8ee 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
@@ -24,7 +24,7 @@
#
# 5. Requested Max. Output Tokens Exceeded
# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages
-# Check Membrane log: totalTokens should not exceed 200 even it was requested in max-output.json
+# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json
api:
port: 2000
diff --git a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
index ce7ab99ef6..a86eec6a27 100644
--- a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
@@ -2,7 +2,7 @@
#
# Tutorial: Basic LLM Gateway (Google Gemini)
#
-# Replace <> with your OpenAI API key.
+# Replace <> with your Google API key.
#
# 1. Hello World
# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @simple.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent
diff --git a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
index 89db5cd5b7..0b7c8569e7 100644
--- a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
@@ -24,7 +24,7 @@
#
# 5. Requested Max. Output Tokens Exceeded
# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @max-output.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent
-# Check Membrane log: totalTokens should not exceed 200 even it was requested in max-output.json
+# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json
api:
port: 2000
From df3d145a5066234218e2bf0b6fa486954444d7f1 Mon Sep 17 00:00:00 2001
From: thomas
Date: Fri, 22 May 2026 08:01:09 +0200
Subject: [PATCH 31/43] feat: add streaming integration tests for OpenAI in LLM
Gateway
- Introduced `StreamingOpenAiLLMGatewayTutorialTest` with SSE mocking and validation.
- Added JSON fixtures (`stream.json`, `max-output-stream.json`) for testing streaming requests.
- Enhanced base test framework to support `text/event-stream` responses.
- Updated `LLMGatewayInterceptor` to handle streaming scenarios with capped tokens.
---
.../interceptor/ai/LLMGatewayInterceptor.java | 3 +-
.../ai/llmgateway/AbstractAiTutorialTest.java | 12 +-
...StreamingOpenAiLLMGatewayTutorialTest.java | 135 ++++++++++++++++++
.../llm-gateway/openai/max-output-stream.json | 6 +
.../ai/llm-gateway/openai/stream.json | 5 +
5 files changed, 159 insertions(+), 2 deletions(-)
create mode 100644 distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java
create mode 100644 distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json
create mode 100644 distribution/tutorials/ai/llm-gateway/openai/stream.json
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
index 8a63100c08..89cc3d2ede 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
@@ -53,12 +53,13 @@ public class LLMGatewayInterceptor extends AbstractInterceptor {
@Override
public void init() {
+ super.init();
errorCreator = provider.getErrorCreator();
if (store != null)
store.init(router);
// Check if the replacement markers are still there
- if (apiKey.contains("<<") && apiKey.contains(">>")) {
+ if (apiKey != null && apiKey.contains("<<") && apiKey.contains(">>")) {
throw new ConfigurationException("The configuration contains the replacement marker %s. Substitute it with the API key of the model.".formatted(apiKey));
}
}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
index fb5a37a07c..ded96258e5 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
@@ -29,6 +29,8 @@
import java.util.function.Consumer;
+import static com.predic8.membrane.core.http.MimeType.APPLICATION_JSON;
+
/**
* Base class for AI tutorial tests. Starts a local Membrane mock of the upstream LLM API
* so tests run without a real API key and without network access to the LLM provider.
@@ -103,10 +105,18 @@ protected String getApiKeyHeader() {
return "x-api-key";
}
+ /**
+ * Content-Type the mock LLM server sends back. Defaults to {@code "application/json"}
+ * for regular responses. Override to {@code "text/event-stream"} in streaming test classes.
+ */
+ protected String mockContentType() {
+ return APPLICATION_JSON;
+ }
+
private void startMockLlmApi() throws Exception {
var si = new StaticInterceptor();
si.setSrc(mockResponse());
- si.setContentType("application/json");
+ si.setContentType(mockContentType());
var sp = new ServiceProxy(new ServiceProxyKey(MOCK_LLM_PORT), null, 0);
sp.getFlow().add(new BodyCaptureInterceptor(
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..679cfca6a7
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java
@@ -0,0 +1,135 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.time.Duration;
+
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for the streaming (SSE) path of
+ * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}.
+ *
+ * The mock upstream returns {@code Content-Type: text/event-stream} with three
+ * SSE events so the gateway's SSE processing path is exercised end-to-end without
+ * a real OpenAI connection:
+ *
+ *
+ * - {@code response.created} — initial acknowledgement
+ * - {@code response.output_text.delta} — incremental text chunk
+ * - {@code response.completed} — terminal event carrying usage statistics
+ *
+ *
+ * Because RestAssured does not handle server-sent events well, these tests use the
+ * Java {@link java.net.http.HttpClient} directly — the same approach used in
+ * {@code ServerSentEventsTutorialTest}.
+ */
+public class StreamingOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest {
+
+ private static final String RESPONSES_ENDPOINT = LOCALHOST_2000 + "/v1/responses";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /** Tell the mock server to respond as a finite SSE stream. */
+ @Override
+ protected String mockContentType() {
+ return "text/event-stream";
+ }
+
+ /**
+ * A minimal but complete SSE body: one delta event followed by the terminal
+ * {@code response.completed} event that carries the usage node the gateway
+ * reads for token accounting.
+ */
+ @Override
+ protected String mockResponse() {
+ return """
+ event: response.created
+ data: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress","model":"gpt-5-nano"}}
+
+ event: response.output_text.delta
+ data: {"type":"response.output_text.delta","item_id":"msg_mock","output_index":0,"content_index":0,"delta":"I am a mock."}
+
+ event: response.completed
+ data: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed","model":"gpt-5-nano","output":[{"type":"message","id":"msg_mock","status":"completed","role":"assistant","content":[{"type":"output_text","text":"I am a mock."}]}],"usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}}
+
+ """;
+ }
+
+ /**
+ * The gateway must forward a streaming request and pass the {@code text/event-stream}
+ * response through to the client intact. The response body must contain the SSE events
+ * emitted by the upstream, including the delta text.
+ */
+ @Test
+ void streamingResponseIsForwarded() throws IOException, InterruptedException {
+ var response = sendStreamingRequest("stream.json");
+
+ assertEquals(200, response.statusCode());
+ assertTrue(response.headers().firstValue("content-type").orElse("").contains("text/event-stream"),
+ "Expected Content-Type text/event-stream");
+ assertTrue(response.body().contains("response.output_text.delta"),
+ "SSE body must contain the delta event name");
+ assertTrue(response.body().contains("I am a mock."),
+ "SSE body must contain the delta text");
+ assertTrue(response.body().contains("response.completed"),
+ "SSE body must contain the terminal event");
+ }
+
+ /**
+ * When the request carries {@code "max_output_tokens": 500} and the gateway is
+ * configured with {@code maxOutputTokens: 200}, the gateway must rewrite the field
+ * to 200 before forwarding — even for streaming requests.
+ *
+ *
The mock captures the forwarded request body so we can assert the capped value.
+ */
+ @Test
+ void streamingOutputTokensAreCappedBeforeForwarding() throws IOException, InterruptedException {
+ var response = sendStreamingRequest("max-output-stream.json");
+
+ assertEquals(200, response.statusCode());
+ assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200));
+ }
+
+ // -------------------------------------------------------------------------
+
+ private HttpResponse sendStreamingRequest(String fixture) throws IOException, InterruptedException {
+ var request = HttpRequest.newBuilder()
+ .uri(URI.create(RESPONSES_ENDPOINT))
+ .timeout(Duration.ofSeconds(10))
+ .header("Content-Type", "application/json")
+ .header("Authorization", "Bearer test-key")
+ .POST(HttpRequest.BodyPublishers.ofString(readFileFromBaseDir(fixture)))
+ .build();
+
+ try (var client = HttpClient.newHttpClient()) {
+ return client.send(request, HttpResponse.BodyHandlers.ofString());
+ }
+ }
+}
diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json
new file mode 100644
index 0000000000..0a747d70e4
--- /dev/null
+++ b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json
@@ -0,0 +1,6 @@
+{
+ "model": "gpt-5-nano",
+ "input": "Explain in detail who you are?",
+ "max_output_tokens": 500,
+ "stream": true
+}
diff --git a/distribution/tutorials/ai/llm-gateway/openai/stream.json b/distribution/tutorials/ai/llm-gateway/openai/stream.json
new file mode 100644
index 0000000000..1c75ce00aa
--- /dev/null
+++ b/distribution/tutorials/ai/llm-gateway/openai/stream.json
@@ -0,0 +1,5 @@
+{
+ "model": "gpt-5-nano",
+ "input": "Who are you?",
+ "stream": true
+}
From d088dab34f900872ac4d08e3e5bbef233ac2bd92 Mon Sep 17 00:00:00 2001
From: thomas
Date: Fri, 22 May 2026 09:01:56 +0200
Subject: [PATCH 32/43] feat: standardize API key handling and logging in LLM
Gateway tests
- Replaced raw API key placeholders with `TEST_API_KEY` constant in tutorial tests to ensure consistency.
- Added `TEST_API_KEY` to `AbstractAiTutorialTest` for upstream key substitution verification.
- Updated `log4j2.xml` to limit logging to `com.predic8.membrane.core.interceptor.ai`.
- Introduced PostgreSQL dependency in `pom.xml` for future enhancements.
---
.../tutorials/ai/llmgateway/AbstractAiTutorialTest.java | 8 ++++++++
.../ai/llmgateway/claude/SharingApiKeysTutorialTest.java | 2 +-
.../google/SharingApiKeysGoogleTutorialTest.java | 2 +-
.../openai/SharingApiKeysOpenAiTutorialTest.java | 2 +-
4 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
index ded96258e5..77c674ee1e 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java
@@ -47,6 +47,13 @@ public abstract class AbstractAiTutorialTest extends DistributionExtractingTestc
protected static final int MOCK_LLM_PORT = 3100;
+ /**
+ * Value substituted for the {@code <>} placeholder in tutorial
+ * YAMLs before Membrane starts. Tests that verify upstream key-substitution assert against
+ * this constant instead of the raw placeholder text.
+ */
+ protected static final String TEST_API_KEY = "test-upstream-key";
+
protected Process2 process;
protected volatile String lastRequestBody;
protected volatile String lastRequestApiKey;
@@ -74,6 +81,7 @@ protected String getParameters() {
void startGateway() throws Exception {
startMockLlmApi();
replaceInFile2(getTutorialYaml(), getUpstreamApiUrl(), mockApiUrl());
+ replaceInFile2(getTutorialYaml(), "<>", TEST_API_KEY);
process = startServiceProxyScript();
}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
index 33174d6ac8..3514870774 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
@@ -120,7 +120,7 @@ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
// @formatter:on
assertThat(lastRequestApiKey, not(equalTo(ALICE)));
- assertThat(lastRequestApiKey, equalTo("<>"));
+ assertThat(lastRequestApiKey, equalTo(TEST_API_KEY));
}
@Test
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
index 567da88b9e..79b1a71e3e 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
@@ -119,7 +119,7 @@ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
// @formatter:on
assertThat(lastRequestApiKey, not(equalTo(ALICE)));
- assertThat(lastRequestApiKey, equalTo("<>"));
+ assertThat(lastRequestApiKey, equalTo(TEST_API_KEY));
}
/**
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
index 7bd410fc24..88a6d380ad 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
@@ -110,7 +110,7 @@ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
// @formatter:on
assertThat(lastRequestApiKey, not(equalTo("Bearer " + ALICE)));
- assertThat(lastRequestApiKey, equalTo("Bearer <>"));
+ assertThat(lastRequestApiKey, equalTo("Bearer " + TEST_API_KEY));
}
@Test
From ab2fac928c0ae7713a036bf257594090328b5f66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6rdes?=
Date: Fri, 22 May 2026 12:49:27 +0200
Subject: [PATCH 33/43] Update tutorial to use Anthropic-specific API key and
headers
---
.../ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
index 8931dfd28a..5b30514e2f 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
@@ -2,18 +2,18 @@
#
# Tutorial: Basic LLM Gateway (Antropic Claude)
#
-# Replace <> with your OpenAI API key.
+# Replace <> with your Claude API key.
#
# 1. Hello World
# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @simple.json http://localhost:2000/v1/messages
# Check the response and the Membrane logs.
#
# 2. Exceed the input token limit
-# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-input.json http://localhost:2000/v1/messages
+# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-input.json http://localhost:2000/v1/messages
# Returns an error because the request exceeds maxInputTokens.
#
# 3. Exceed the output token limit
-# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-output.json http://localhost:2000/v1/messages
+# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages
# Check the Membrane log for limiting max tokens to 200
api:
From 37f7515eaaa97c1e9b9c133703948dd5168e70ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6rdes?=
Date: Fri, 22 May 2026 14:30:02 +0200
Subject: [PATCH 34/43] Fix handling of invalid max output token requests in
LLMGatewayInterceptor
---
.../membrane/core/interceptor/ai/LLMGatewayInterceptor.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
index 89cc3d2ede..4542c16548 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
@@ -118,7 +118,7 @@ public Outcome handleRequest(Exchange exc) {
var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
if (maxOutputTokens > 0) {
- if (requestedMaxOutputTokens == -1) {
+ if (requestedMaxOutputTokens <= 0) {
log.info("No max. output requested. Setting limit to {}.", maxOutputTokens);
aiReq.setMaxOutputTokens(maxOutputTokens);
} else if (requestedMaxOutputTokens > maxOutputTokens) {
From 44850fe510f33de3ccc67bc98eb78c38752c0803 Mon Sep 17 00:00:00 2001
From: thomas
Date: Tue, 26 May 2026 11:59:47 +0200
Subject: [PATCH 35/43] refactor: migrate OpenAI provider to Chat Completions
framework and add policies support
- Unified OpenAI and Chat Completions error handling under `ChatCompletionsErrorCreator`.
- Deprecated older OpenAI-specific classes in favor of `ChatCompletions` equivalents.
- Introduced detailed usage policies handling in `LLMGatewayInterceptor`.
- Updated YAML tutorials to reflect the new `policies` configuration model.
---
.../ai/ChatCompletionDoneEvent.java | 15 ----
.../ai/provider/AbstractLLMMessage.java | 25 -------
.../ai/provider/LLMErrorCreator.java | 24 ------
.../interceptor/ai/provider/LLMProvider.java | 13 ----
.../interceptor/ai/provider/LLMRequest.java | 29 --------
.../interceptor/ai/provider/LLMResponse.java | 18 -----
.../ai/provider/claude/ClaudeProvider.java | 33 ---------
.../ai/provider/claude/ContentBlockStart.java | 23 ------
.../ai/provider/claude/ToolUse.java | 22 ------
.../ai/provider/google/GoogleProvider.java | 33 ---------
.../ai/provider/openai/OpenAIProvider.java | 44 -----------
.../OpenAiChatCompletionsLLMResponse.java | 55 --------------
.../core/interceptor/ai/store/AiApiStore.java | 29 --------
.../core/interceptor/ai/store/Usage.java | 3 -
.../{ai => llmgateway}/AbstractLLMEvent.java | 16 +++-
.../llmgateway/ChatCompletionDoneEvent.java | 29 ++++++++
.../ChatCompletionEvent.java | 16 +++-
.../LLMGatewayInterceptor.java | 74 ++++++++++---------
.../{ai => llmgateway}/ResponsesApiEvent.java | 16 +++-
.../provider/AbstractLLMErrorCreator.java | 16 +++-
.../provider/AbstractLLMMessage.java | 39 ++++++++++
.../provider/AbstractLLMRequest.java | 16 +++-
.../provider/AbstractLLMResponse.java | 16 +++-
.../llmgateway/provider/LLMErrorCreator.java | 38 ++++++++++
.../llmgateway/provider/LLMProvider.java | 27 +++++++
.../llmgateway/provider/LLMRequest.java | 43 +++++++++++
.../llmgateway/provider/LLMResponse.java | 32 ++++++++
.../ChatCompletionsErrorCreator.java} | 20 ++++-
.../ChatCompletionsProvider.java | 62 ++++++++++++++++
.../ChatCompletionsRequest.java} | 23 +++++-
.../ChatCompletionsResponse.java | 69 +++++++++++++++++
.../provider/claude/ClaudeErrorCreator.java | 20 ++++-
.../provider/claude/ClaudeErrorResponse.java | 16 +++-
.../provider/claude/ClaudeLLMRequest.java | 18 ++++-
.../provider/claude/ClaudeLLMResponse.java | 22 +++++-
.../provider/claude/ClaudeProvider.java | 47 ++++++++++++
.../provider/claude/ContentBlockDelta.java | 16 +++-
.../provider/claude/ContentBlockStart.java | 37 ++++++++++
.../provider/claude/MessageDelta.java | 18 ++++-
.../llmgateway/provider/claude/ToolUse.java | 36 +++++++++
.../provider/google/GoogleErrorCreator.java | 18 ++++-
.../provider/google/GoogleLLMRequest.java | 18 ++++-
.../provider/google/GoogleLLMResponse.java | 22 +++++-
.../provider/google/GoogleProvider.java | 47 ++++++++++++
.../openai/AbstractOpenAiLLMRequest.java | 18 ++++-
.../openai/OpenAIChatCompletionsRequest.java | 29 ++++++++
.../provider/openai/OpenAIProvider.java | 59 +++++++++++++++
.../openai/OpenAiLLMResponsesRequest.java | 16 +++-
.../openai/OpenAiLLMResponsesResponse.java | 24 ++++--
.../llmgateway/store/AiApiStore.java | 43 +++++++++++
.../{ai => llmgateway}/store/AiApiUser.java | 16 +++-
.../store/JDBCAiApiUsageStore.java | 18 ++++-
.../store/SimpleAiApiStore.java | 16 +++-
.../interceptor/llmgateway/store/Usage.java | 17 +++++
.../membrane/core/util/http/SSEParser.java | 14 ++++
.../core/util/http/SSEParserTest.java | 14 ++++
.../claude/20-Sharing-API-Keys.yaml | 9 ++-
.../google/20-Sharing-API-Keys.yaml | 13 ++--
.../openai/20-Sharing-API-Keys.yaml | 9 ++-
59 files changed, 1109 insertions(+), 459 deletions(-)
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionDoneEvent.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMMessage.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMErrorCreator.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMProvider.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMRequest.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMResponse.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeProvider.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockStart.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ToolUse.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleProvider.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAIProvider.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiChatCompletionsLLMResponse.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiStore.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/Usage.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/AbstractLLMEvent.java (68%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/ChatCompletionEvent.java (71%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/LLMGatewayInterceptor.java (83%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/ResponsesApiEvent.java (68%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/AbstractLLMErrorCreator.java (55%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/AbstractLLMRequest.java (76%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/AbstractLLMResponse.java (80%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai/provider/openai/OpenAiErrorCreator.java => llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java} (71%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai/provider/openai/OpenAiLLMChatCompletionsRequest.java => llmgateway/provider/chatcompletions/ChatCompletionsRequest.java} (53%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/ClaudeErrorCreator.java (76%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/ClaudeErrorResponse.java (78%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/ClaudeLLMRequest.java (79%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/ClaudeLLMResponse.java (72%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/ContentBlockDelta.java (56%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/claude/MessageDelta.java (74%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/google/GoogleErrorCreator.java (80%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/google/GoogleLLMRequest.java (82%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/google/GoogleLLMResponse.java (56%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/openai/AbstractOpenAiLLMRequest.java (75%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/openai/OpenAiLLMResponsesRequest.java (59%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/provider/openai/OpenAiLLMResponsesResponse.java (65%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/store/AiApiUser.java (77%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/store/JDBCAiApiUsageStore.java (77%)
rename core/src/main/java/com/predic8/membrane/core/interceptor/{ai => llmgateway}/store/SimpleAiApiStore.java (82%)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionDoneEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionDoneEvent.java
deleted file mode 100644
index 520118262a..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionDoneEvent.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai;
-
-import com.fasterxml.jackson.databind.node.NullNode;
-
-public class ChatCompletionDoneEvent extends AbstractLLMEvent {
-
- public ChatCompletionDoneEvent() {
- super(NullNode.getInstance());
- }
-
- @Override
- public String getType() {
- return "chat.completion.done";
- }
-}
\ No newline at end of file
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMMessage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMMessage.java
deleted file mode 100644
index 488dabe3ce..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMMessage.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
-
-import com.predic8.membrane.core.exchange.Exchange;
-
-public class AbstractLLMMessage {
-
- protected final Exchange exchange;
-
- public enum API { COMPLETIONS, NORMAL }
-
- protected API api;
-
- protected AbstractLLMMessage(Exchange exchange) {
- this.exchange = exchange;
- api = getAPI(exchange);
- }
-
- protected API getAPI(Exchange exchange) {
- if (exchange.getRequest().getUri().contains("/chat/completions")) {
- return API.COMPLETIONS;
- } else {
- return API.NORMAL;
- }
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMErrorCreator.java
deleted file mode 100644
index ee06f9b7c3..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMErrorCreator.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
-
-import com.predic8.membrane.core.http.Response;
-
-import java.util.Collection;
-
-public interface LLMErrorCreator {
-
- Response invalidRequestError(String message);
-
- Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds);
-
- Response modelNotAllowed(String model, Collection allowedModels);
-
- Response authenticationFailed();
-
- /**
- *
- * @param maxTokens as configured
- * @param estimatedTokens estimated number of input tokens
- * @return Response error response
- */
- Response inputTokensExceeded(long maxTokens, long estimatedTokens);
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMProvider.java
deleted file mode 100644
index 5b52994751..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMProvider.java
+++ /dev/null
@@ -1,13 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
-
-import com.predic8.membrane.core.exchange.Exchange;
-
-import java.util.function.Consumer;
-
-public interface LLMProvider {
-
- LLMRequest getLLMRequest(Exchange request);
- LLMResponse getLLMResponse(Exchange request, Consumer postProcessor);
- LLMErrorCreator getErrorCreator();
-
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMRequest.java
deleted file mode 100644
index a6f377686b..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMRequest.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
-
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.util.List;
-
-public interface LLMRequest {
-
- String getModel();
-
- String getApiKey();
-
- void setApiKey(String apiKey);
-
- /**
- * The max number of tokens that the model is allowed to generate as specified by the client.
- * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set.
- */
- long getRequestedMaxOutputTokens();
-
- void setMaxOutputTokens(int maxOutputTokens);
-
- long estimateInputTokens();
-
- ObjectNode getJson();
-
- List getTools();
-
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMResponse.java
deleted file mode 100644
index fd4979ca7e..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/LLMResponse.java
+++ /dev/null
@@ -1,18 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
-
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
-import com.predic8.membrane.core.util.http.SSEParser.SSEEvent;
-
-import java.util.Set;
-
-public interface LLMResponse {
-
- boolean isError();
-
- Usage getUsage();
-
- Set getTerminalEvents();
-
- void process(SSEEvent event);
-
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeProvider.java
deleted file mode 100644
index 99ba4820e7..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeProvider.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
-
-import com.predic8.membrane.annot.MCElement;
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMProvider;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMRequest;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-
-import java.util.function.Consumer;
-
-/**
- * @description (Experimental) Anthroic Claude provider configuration
- * Use to configure a LLM gateway to use the anthropic API
- */
-@MCElement( name="claude")
-public class ClaudeProvider implements LLMProvider {
-
- @Override
- public LLMRequest getLLMRequest(Exchange exchange) {
- return new ClaudeLLMRequest(exchange);
- }
-
- @Override
- public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) {
- return new ClaudeLLMResponse(request, postProcessor);
- }
-
- @Override
- public LLMErrorCreator getErrorCreator() {
- return new ClaudeErrorCreator();
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockStart.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockStart.java
deleted file mode 100644
index b98f1d5827..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockStart.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
-
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-public class ContentBlockStart {
-
- private ToolUse toolUse;
-
- public static ContentBlockStart from(ObjectNode on) {
- var cbs = new ContentBlockStart();
- var cb = (ObjectNode) on.path("content_block");
-
- if ("tool_use".equals(cb.path("type").asText())) {
- cbs.toolUse = ToolUse.from(cb);
- }
-
- return cbs;
- }
-
- public ToolUse getToolUse() {
- return toolUse;
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ToolUse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ToolUse.java
deleted file mode 100644
index 59ef545e68..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ToolUse.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
-
-import com.fasterxml.jackson.databind.node.ObjectNode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class ToolUse {
-
- private static final Logger log = LoggerFactory.getLogger(ToolUse.class);
-
- private String name;
-
- public static ToolUse from(ObjectNode on) {
- var tu = new ToolUse();
- tu.name = on.path("name").asText();
- return tu;
- }
-
- public String getName() {
- return name;
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleProvider.java
deleted file mode 100644
index 4ee8860f89..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleProvider.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.google;
-
-import com.predic8.membrane.annot.MCElement;
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMProvider;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMRequest;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-
-import java.util.function.Consumer;
-
-/**
- * @description (Experimental)Google AI provider configuration
- * Use to configure a LLM gateway to use the Google LLM API
- */
-@MCElement( name="google",id = "google-ai-provider")
-public class GoogleProvider implements LLMProvider {
-
- @Override
- public LLMRequest getLLMRequest(Exchange exchange) {
- return new GoogleLLMRequest(exchange);
- }
-
- @Override
- public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) {
- return new GoogleLLMResponse(request, postProcessor);
- }
-
- @Override
- public LLMErrorCreator getErrorCreator() {
- return new GoogleErrorCreator();
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAIProvider.java
deleted file mode 100644
index 8a1aa29436..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAIProvider.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
-
-import com.predic8.membrane.annot.MCElement;
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMProvider;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMRequest;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-
-import java.util.function.Consumer;
-
-/**
- * @description OpenAI provider configuration
- * Use to configure a LLM gateway to use the OpenAI API
- */
-@MCElement( name="openai")
-public class OpenAIProvider implements LLMProvider {
-
- @Override
- public LLMRequest getLLMRequest(Exchange exchange) {
- if (isResponsesApi(exchange)) {
- return new OpenAiLLMResponsesRequest(exchange);
- }
-
- return new OpenAiLLMChatCompletionsRequest(exchange);
- }
-
- @Override
- public LLMResponse getLLMResponse(Exchange exchange, Consumer postProcessor) {
- if (isResponsesApi(exchange)) {
- return new OpenAiLLMResponsesResponse(exchange,postProcessor);
- }
- return new OpenAiChatCompletionsLLMResponse(exchange, postProcessor);
- }
-
- @Override
- public LLMErrorCreator getErrorCreator() {
- return new OpenAiErrorCreator();
- }
-
- static boolean isResponsesApi(Exchange exchange) {
- return exchange.getRequest().getUri().startsWith("/v1/responses");
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiChatCompletionsLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiChatCompletionsLLMResponse.java
deleted file mode 100644
index bb0a206a8c..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiChatCompletionsLLMResponse.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
-
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.AbstractLLMEvent;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMResponse;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
-import com.predic8.membrane.core.util.http.SSEParser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Set;
-import java.util.function.Consumer;
-
-public class OpenAiChatCompletionsLLMResponse extends AbstractLLMResponse {
-
- private static final Logger log = LoggerFactory.getLogger(OpenAiChatCompletionsLLMResponse.class);
-
- public OpenAiChatCompletionsLLMResponse(Exchange exchange, Consumer postProcessor) {
- super(exchange, postProcessor);
- }
-
- @Override
- public Usage getUsage() {
-
- var usage = json.path("usage");
-
- var inputTokens = usage.path("prompt_tokens").asInt(0);
- var outputTokens = usage.path("completion_tokens").asInt(0);
- var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens);
-
- return new Usage(
- inputTokens,
- outputTokens,
- totalTokens
- );
- }
-
- @Override
- public Set getTerminalEvents() {
- return Set.of("[DONE]");
- }
-
- @Override
- protected void processTerminalEvent(SSEParser.SSEEvent terminal) {
- postProcessor.accept(OpenAiChatCompletionsLLMResponse.this);
- }
-
- @Override
- public void process(SSEParser.SSEEvent e) {
- log.debug("Data: {}", e.data());
- var event = AbstractLLMEvent.create(e);
- log.debug("Event: {}", event);
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiStore.java
deleted file mode 100644
index 73674eeef1..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiStore.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.store;
-
-import com.predic8.membrane.core.router.Router;
-
-import java.util.Optional;
-
-/**
- * @TODO
- * - Store .status, .error, .model, .stop_reason
- */
-public interface AiApiStore {
-
- default void init(Router router) {
- }
-
- void store(AiApiUser user, Usage usage);
-
- Optional getUser(String token);
-
- /**
- * Checks if the user has enough tokens to make the request.
- * @param user The user to check
- * @return Estimated number of tokens that the user has left after this request
- */
- long checkLimit(AiApiUser user, long inputTokens, long outputTokens);
-
- long getRemainingResetTime();
-}
-
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/Usage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/Usage.java
deleted file mode 100644
index 9288bba508..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/Usage.java
+++ /dev/null
@@ -1,3 +0,0 @@
-package com.predic8.membrane.core.interceptor.ai.store;
-
-public record Usage(int inputTokens, int outputTokens, int totalTokens) {}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/AbstractLLMEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
similarity index 68%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/AbstractLLMEvent.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
index d39b3ddafe..ed9fe0929c 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/AbstractLLMEvent.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java
new file mode 100644
index 0000000000..cc234b8113
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java
@@ -0,0 +1,29 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
+
+import com.fasterxml.jackson.databind.node.NullNode;
+
+public class ChatCompletionDoneEvent extends AbstractLLMEvent {
+
+ public ChatCompletionDoneEvent() {
+ super(NullNode.getInstance());
+ }
+
+ @Override
+ public String getType() {
+ return "chat.completion.done";
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java
similarity index 71%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionEvent.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java
index 86531144d9..1fde1e736f 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ChatCompletionEvent.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
import com.fasterxml.jackson.databind.JsonNode;
import org.slf4j.Logger;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
similarity index 83%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
index 89cc3d2ede..3725415684 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCChildElement;
@@ -6,17 +20,15 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.AbstractInterceptor;
import com.predic8.membrane.core.interceptor.Outcome;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMProvider;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMRequest;
-import com.predic8.membrane.core.interceptor.ai.store.AiApiStore;
-import com.predic8.membrane.core.interceptor.ai.store.AiApiUser;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiStore;
+import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiUser;
import com.predic8.membrane.core.util.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.List;
-
import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
import static com.predic8.membrane.core.interceptor.Outcome.RETURN;
import static com.predic8.membrane.core.util.json.JsonUtil.setJsonBody;
@@ -47,7 +59,8 @@ public class LLMGatewayInterceptor extends AbstractInterceptor {
private String apiKey;
private int maxOutputTokens;
private int maxInputTokens;
- private List models;
+
+ private Policies policies = new Policies();
private AiApiStore store;
@@ -135,10 +148,10 @@ public Outcome handleRequest(Exchange exc) {
}
}
- if (models != null) {
+ if (policies.getModels() != null) {
var model = aiReq.getModel();
- if (!models.contains(model)) {
- exc.setResponse(errorCreator.modelNotAllowed(model, models));
+ if (!policies.getModels().contains(model)) {
+ exc.setResponse(errorCreator.modelNotAllowed(model, policies.getModels()));
return RETURN;
}
}
@@ -157,14 +170,9 @@ long computeEffectiveMaxOutputTokens(long requestedMaxOutputTokens, long maxOutp
@Override
public Outcome handleResponse(Exchange exc) {
-
provider.getLLMResponse(exc, res -> {
var user = exc.getProperty(MEMBRANE_AI_USER, AiApiUser.class);
- if (log.isDebugEnabled() && user != null) {
- log.debug("Token usage of user {}: {}", user, res.getUsage());
- } else {
- log.info("Token usage: {}", res.getUsage());
- }
+ log.debug("Token usage of user {}: {}", user, res.getUsage());
if (store != null) {
store.store(user, res.getUsage());
}
@@ -196,7 +204,7 @@ public AiApiStore getAiStore() {
* A store is needed for user authentication at the gateway.
* The gateway will use the store to enforce token limits and log usage statistics.
*/
- @MCChildElement(allowForeign = true, order = 10)
+ @MCChildElement(allowForeign = true, order = 30)
public void setAiStore(AiApiStore store) {
this.store = store;
}
@@ -235,19 +243,6 @@ public void setMaxInputTokens(int maxInputTokens) {
this.maxInputTokens = maxInputTokens;
}
- public List getModels() {
- return models;
- }
-
- /**
- * @param models List of models that can be used by the gateway.
- * @desciption Restricts the models that can be used by the gateway.
- * @default null (no restriction)
- */
- @MCAttribute
- public void setModels(List models) {
- this.models = models;
- }
public LLMProvider getProvider() {
return provider;
@@ -258,8 +253,21 @@ public LLMProvider getProvider() {
* @description The LLM provider to use. Currently, OpenAI, Anthropic and Gemini are supported.
* The provider determines the API used to talk to the LLM. The provider can be different as long as the API is supported.
*/
- @MCChildElement(allowForeign = true)
+ @MCChildElement(order = 10)
public void setProvider(LLMProvider provider) {
this.provider = provider;
}
+
+ public Policies getPolicies() {
+ return policies;
+ }
+
+ /**
+ *
+ * @param policies Usage policy for the LLM Gateway.
+ */
+ @MCChildElement(order = 20)
+ public void setPolicies(Policies policies) {
+ this.policies = policies;
+ }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ResponsesApiEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java
similarity index 68%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/ResponsesApiEvent.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java
index af2a351dc6..4b726bec62 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/ResponsesApiEvent.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java
similarity index 55%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMErrorCreator.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java
index 6e6d739711..6ecf4d7ef5 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMErrorCreator.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
import com.fasterxml.jackson.databind.ObjectMapper;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java
new file mode 100644
index 0000000000..391324f38e
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java
@@ -0,0 +1,39 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.exchange.Exchange;
+
+public class AbstractLLMMessage {
+
+ protected final Exchange exchange;
+
+ public enum API { COMPLETIONS, NORMAL }
+
+ protected API api;
+
+ protected AbstractLLMMessage(Exchange exchange) {
+ this.exchange = exchange;
+ api = getAPI(exchange);
+ }
+
+ protected API getAPI(Exchange exchange) {
+ if (exchange.getRequest().getUri().contains("/chat/completions")) {
+ return API.COMPLETIONS;
+ } else {
+ return API.NORMAL;
+ }
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
similarity index 76%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
index 95ecc7a77e..f5955d6acb 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java
similarity index 80%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMResponse.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java
index 4967d34af4..4732d0a0a5 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/AbstractLLMResponse.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java
new file mode 100644
index 0000000000..732a1332fe
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java
@@ -0,0 +1,38 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.http.Response;
+
+import java.util.Collection;
+
+public interface LLMErrorCreator {
+
+ Response invalidRequestError(String message);
+
+ Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds);
+
+ Response modelNotAllowed(String model, Collection allowedModels);
+
+ Response authenticationFailed();
+
+ /**
+ *
+ * @param maxTokens as configured
+ * @param estimatedTokens estimated number of input tokens
+ * @return Response error response
+ */
+ Response inputTokensExceeded(long maxTokens, long estimatedTokens);
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
new file mode 100644
index 0000000000..1fb2fc4eae
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
@@ -0,0 +1,27 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.exchange.Exchange;
+
+import java.util.function.Consumer;
+
+public interface LLMProvider {
+
+ LLMRequest getLLMRequest(Exchange request);
+ LLMResponse getLLMResponse(Exchange request, Consumer postProcessor);
+ LLMErrorCreator getErrorCreator();
+
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
new file mode 100644
index 0000000000..371115e911
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
@@ -0,0 +1,43 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.util.List;
+
+public interface LLMRequest {
+
+ String getModel();
+
+ String getApiKey();
+
+ void setApiKey(String apiKey);
+
+ /**
+ * The max number of tokens that the model is allowed to generate as specified by the client.
+ * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set.
+ */
+ long getRequestedMaxOutputTokens();
+
+ void setMaxOutputTokens(int maxOutputTokens);
+
+ long estimateInputTokens();
+
+ ObjectNode getJson();
+
+ List getTools();
+
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java
new file mode 100644
index 0000000000..3d3ed9bd78
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java
@@ -0,0 +1,32 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
+import com.predic8.membrane.core.util.http.SSEParser.SSEEvent;
+
+import java.util.Set;
+
+public interface LLMResponse {
+
+ boolean isError();
+
+ Usage getUsage();
+
+ Set getTerminalEvents();
+
+ void process(SSEEvent event);
+
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java
similarity index 71%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiErrorCreator.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java
index 7f51494ad5..643786b0a4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiErrorCreator.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java
@@ -1,14 +1,28 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
import com.predic8.membrane.core.http.Response;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator;
import java.util.Collection;
import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE;
import static com.predic8.membrane.core.http.Response.*;
-public class OpenAiErrorCreator extends AbstractLLMErrorCreator {
+public class ChatCompletionsErrorCreator extends AbstractLLMErrorCreator {
@Override
public Response invalidRequestError(String message) {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
new file mode 100644
index 0000000000..1ac5be3699
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
@@ -0,0 +1,62 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
+
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+
+import java.util.function.Consumer;
+
+/**
+ * @description
+ * OpenAI Chat Completions API compatible provider.
+ * Can be used for the following providers:
+ *
+ * - Azure OpenAI
+ * - Google Gemini (OpenAI compatible endpoint)
+ * - TogetherAI
+ * - Fireworks AI
+ * - DeepSeek AI
+ * - OpenRouter
+ * - Mistral AI
+ * - DeepInfra
+ * - SiliconFlow
+ * - NVIDIA NIM
+ * - ML Studio
+ * - vLLM
+ * - Ollama
+ *
+ */
+@MCElement(name = "chatCompletions")
+public class ChatCompletionsProvider implements LLMProvider {
+ @Override
+ public LLMRequest getLLMRequest(Exchange request) {
+ return new ChatCompletionsRequest(request);
+ }
+
+ @Override
+ public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) {
+ return new ChatCompletionsResponse(request, postProcessor);
+ }
+
+ @Override
+ public LLMErrorCreator getErrorCreator() {
+ return null;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
similarity index 53%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMChatCompletionsRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
index 5c57339682..4ecbf9065a 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMChatCompletionsRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
@@ -1,14 +1,29 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest;
import java.util.List;
import static java.util.Collections.emptyList;
-public class OpenAiLLMChatCompletionsRequest extends AbstractOpenAiLLMRequest {
+public class ChatCompletionsRequest extends AbstractOpenAiLLMRequest {
- public OpenAiLLMChatCompletionsRequest(Exchange exchange) {
+ public ChatCompletionsRequest(Exchange exchange) {
super(exchange);
if (json == null) {
@@ -24,7 +39,7 @@ public OpenAiLLMChatCompletionsRequest(Exchange exchange) {
@Override
public void setMaxOutputTokens(int maxOutputTokens) {
- json.put("max_completion_tokens", maxOutputTokens);
+ json.put("max_tokens", maxOutputTokens);
}
public List getTools() {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java
new file mode 100644
index 0000000000..2b1acc0047
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java
@@ -0,0 +1,69 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
+import com.predic8.membrane.core.util.http.SSEParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Set;
+import java.util.function.Consumer;
+
+public class ChatCompletionsResponse extends AbstractLLMResponse {
+
+ private static final Logger log = LoggerFactory.getLogger(ChatCompletionsResponse.class);
+
+ public ChatCompletionsResponse(Exchange exchange, Consumer postProcessor) {
+ super(exchange, postProcessor);
+ }
+
+ @Override
+ public Usage getUsage() {
+
+ var usage = json.path("usage");
+
+ var inputTokens = usage.path("prompt_tokens").asInt(0);
+ var outputTokens = usage.path("completion_tokens").asInt(0);
+ var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens);
+
+ return new Usage(
+ inputTokens,
+ outputTokens,
+ totalTokens
+ );
+ }
+
+ @Override
+ public Set getTerminalEvents() {
+ return Set.of("[DONE]");
+ }
+
+ @Override
+ protected void processTerminalEvent(SSEParser.SSEEvent terminal) {
+ postProcessor.accept(ChatCompletionsResponse.this);
+ }
+
+ @Override
+ public void process(SSEParser.SSEEvent e) {
+ log.debug("Data: {}", e.data());
+ var event = AbstractLLMEvent.create(e);
+ log.debug("Event: {}", event);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java
similarity index 76%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorCreator.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java
index 506b6d2083..1fbcf2f1a1 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorCreator.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java
@@ -1,8 +1,22 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.predic8.membrane.core.http.Response;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.ai.provider.claude.ClaudeErrorResponse.ClaudeError;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.claude.ClaudeErrorResponse.ClaudeError;
import java.util.Collection;
import java.util.UUID;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java
similarity index 78%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorResponse.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java
index 2bdce96c2e..0ff004834e 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeErrorResponse.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.core.JsonProcessingException;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
similarity index 79%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
index 2a1151e855..fa5279afe4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
@@ -1,8 +1,22 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java
similarity index 72%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMResponse.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java
index f487d43b0d..8d534643ea 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ClaudeLLMResponse.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java
@@ -1,10 +1,24 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.fasterxml.jackson.databind.JsonNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMResponse;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
import com.predic8.membrane.core.util.http.SSEParser.SSEEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
new file mode 100644
index 0000000000..a296575058
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
@@ -0,0 +1,47 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
+
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+
+import java.util.function.Consumer;
+
+/**
+ * @description (Experimental) Anthroic Claude provider configuration
+ * Use to configure a LLM gateway to use the anthropic API
+ */
+@MCElement( name="claude")
+public class ClaudeProvider implements LLMProvider {
+
+ @Override
+ public LLMRequest getLLMRequest(Exchange exchange) {
+ return new ClaudeLLMRequest(exchange);
+ }
+
+ @Override
+ public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) {
+ return new ClaudeLLMResponse(request, postProcessor);
+ }
+
+ @Override
+ public LLMErrorCreator getErrorCreator() {
+ return new ClaudeErrorCreator();
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java
similarity index 56%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockDelta.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java
index f8d32c1c97..5e5a0648bb 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/ContentBlockDelta.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java
new file mode 100644
index 0000000000..bdf2be207b
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java
@@ -0,0 +1,37 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+public class ContentBlockStart {
+
+ private ToolUse toolUse;
+
+ public static ContentBlockStart from(ObjectNode on) {
+ var cbs = new ContentBlockStart();
+ var cb = (ObjectNode) on.path("content_block");
+
+ if ("tool_use".equals(cb.path("type").asText())) {
+ cbs.toolUse = ToolUse.from(cb);
+ }
+
+ return cbs;
+ }
+
+ public ToolUse getToolUse() {
+ return toolUse;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/MessageDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java
similarity index 74%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/MessageDelta.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java
index a99b04e2bb..4aa68fa737 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/claude/MessageDelta.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java
@@ -1,8 +1,22 @@
-package com.predic8.membrane.core.interceptor.ai.provider.claude;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
public class MessageDelta {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java
new file mode 100644
index 0000000000..5694468d9e
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java
@@ -0,0 +1,36 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.claude;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ToolUse {
+
+ private static final Logger log = LoggerFactory.getLogger(ToolUse.class);
+
+ private String name;
+
+ public static ToolUse from(ObjectNode on) {
+ var tu = new ToolUse();
+ tu.name = on.path("name").asText();
+ return tu;
+ }
+
+ public String getName() {
+ return name;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java
similarity index 80%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java
index 92a115194a..1b86f0f39b 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleErrorCreator.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java
@@ -1,7 +1,21 @@
-package com.predic8.membrane.core.interceptor.ai.provider.google;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.google;
import com.predic8.membrane.core.http.Response;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator;
import java.util.Collection;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
similarity index 82%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
index 07da55b089..bd60b10617 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
@@ -1,9 +1,23 @@
-package com.predic8.membrane.core.interceptor.ai.provider.google;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.google;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
public class GoogleLLMRequest extends AbstractLLMRequest {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java
similarity index 56%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMResponse.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java
index db04ae85df..abf1c0a592 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/google/GoogleLLMResponse.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java
@@ -1,9 +1,23 @@
-package com.predic8.membrane.core.interceptor.ai.provider.google;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.google;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMResponse;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
import com.predic8.membrane.core.util.http.SSEParser;
import java.util.Set;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
new file mode 100644
index 0000000000..b1b36ea1df
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
@@ -0,0 +1,47 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.google;
+
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+
+import java.util.function.Consumer;
+
+/**
+ * @description (Experimental)Google AI provider configuration
+ * Use to configure a LLM gateway to use the Google LLM API
+ */
+@MCElement( name="google",id = "google-ai-provider")
+public class GoogleProvider implements LLMProvider {
+
+ @Override
+ public LLMRequest getLLMRequest(Exchange exchange) {
+ return new GoogleLLMRequest(exchange);
+ }
+
+ @Override
+ public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) {
+ return new GoogleLLMResponse(request, postProcessor);
+ }
+
+ @Override
+ public LLMErrorCreator getErrorCreator() {
+ return new GoogleErrorCreator();
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/AbstractOpenAiLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
similarity index 75%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/AbstractOpenAiLLMRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
index 7b8a76e4d1..b49e7440fc 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/AbstractOpenAiLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
@@ -1,8 +1,22 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
import com.fasterxml.jackson.databind.JsonNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
public abstract class AbstractOpenAiLLMRequest extends AbstractLLMRequest {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
new file mode 100644
index 0000000000..8c6e474398
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
@@ -0,0 +1,29 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest;
+
+public class OpenAIChatCompletionsRequest extends ChatCompletionsRequest {
+ public OpenAIChatCompletionsRequest(Exchange exchange) {
+ super(exchange);
+ }
+
+ @Override
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ json.put("max_completion_tokens", maxOutputTokens);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
new file mode 100644
index 0000000000..e55d40bd47
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
@@ -0,0 +1,59 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsResponse;
+
+import java.util.function.Consumer;
+
+/**
+ * @description OpenAI provider configuration
+ * Use to configure a LLM gateway to use the OpenAI API
+ */
+@MCElement( name="openai")
+public class OpenAIProvider implements LLMProvider {
+
+ @Override
+ public LLMRequest getLLMRequest(Exchange exchange) {
+ if (isResponsesApi(exchange)) {
+ return new OpenAiLLMResponsesRequest(exchange);
+ }
+ return new OpenAIChatCompletionsRequest(exchange);
+ }
+
+ @Override
+ public LLMResponse getLLMResponse(Exchange exchange, Consumer postProcessor) {
+ if (isResponsesApi(exchange)) {
+ return new OpenAiLLMResponsesResponse(exchange,postProcessor);
+ }
+ return new ChatCompletionsResponse(exchange, postProcessor);
+ }
+
+ @Override
+ public LLMErrorCreator getErrorCreator() {
+ return new ChatCompletionsErrorCreator();
+ }
+
+ static boolean isResponsesApi(Exchange exchange) {
+ return exchange.getRequest().getUri().startsWith("/v1/responses");
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
similarity index 59%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
index 2568755848..3caa187c88 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
import com.predic8.membrane.core.exchange.Exchange;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java
similarity index 65%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesResponse.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java
index 67e836ebd6..15263fbd55 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/provider/openai/OpenAiLLMResponsesResponse.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java
@@ -1,11 +1,25 @@
-package com.predic8.membrane.core.interceptor.ai.provider.openai;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.ai.AbstractLLMEvent;
-import com.predic8.membrane.core.interceptor.ai.provider.AbstractLLMResponse;
-import com.predic8.membrane.core.interceptor.ai.provider.LLMResponse;
-import com.predic8.membrane.core.interceptor.ai.store.Usage;
+import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import com.predic8.membrane.core.interceptor.llmgateway.store.Usage;
import com.predic8.membrane.core.util.http.SSEParser;
import com.predic8.membrane.core.util.json.JsonUtil;
import org.slf4j.Logger;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java
new file mode 100644
index 0000000000..c764e17ac9
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java
@@ -0,0 +1,43 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.store;
+
+import com.predic8.membrane.core.router.Router;
+
+import java.util.Optional;
+
+/**
+ * @TODO
+ * - Store .status, .error, .model, .stop_reason
+ */
+public interface AiApiStore {
+
+ default void init(Router router) {
+ }
+
+ void store(AiApiUser user, Usage usage);
+
+ Optional getUser(String token);
+
+ /**
+ * Checks if the user has enough tokens to make the request.
+ * @param user The user to check
+ * @return Estimated number of tokens that the user has left after this request
+ */
+ long checkLimit(AiApiUser user, long inputTokens, long outputTokens);
+
+ long getRemainingResetTime();
+}
+
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiUser.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
similarity index 77%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiUser.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
index cd3ab76b4b..da8b792680 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/AiApiUser.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.store;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.store;
import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCElement;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/JDBCAiApiUsageStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java
similarity index 77%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/JDBCAiApiUsageStore.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java
index 16457a97db..7541c08a2c 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/JDBCAiApiUsageStore.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.store;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.store;
import com.predic8.membrane.annot.MCElement;
import com.predic8.membrane.core.router.Router;
@@ -41,7 +55,7 @@ public void init(Router router) {
}
@Override
- public void store(AiApiUser user, com.predic8.membrane.core.interceptor.ai.store.Usage usage) {
+ public void store(AiApiUser user, com.predic8.membrane.core.interceptor.llmgateway.store.Usage usage) {
try (var connection = getConnection(); var ps = connection.prepareStatement(INSERT_SQL)) {
ps.setString(1, user.getName());
ps.setInt(2, usage.inputTokens());
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
similarity index 82%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
index f9b8218608..106892c39f 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/ai/store/SimpleAiApiStore.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
@@ -1,4 +1,18 @@
-package com.predic8.membrane.core.interceptor.ai.store;
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.store;
import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCChildElement;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java
new file mode 100644
index 0000000000..3bcc626858
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java
@@ -0,0 +1,17 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.store;
+
+public record Usage(int inputTokens, int outputTokens, int totalTokens) {}
diff --git a/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java
index acddbc7428..405312ba4e 100644
--- a/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java
+++ b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java
@@ -1,3 +1,17 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
package com.predic8.membrane.core.util.http;
import com.fasterxml.jackson.core.JsonProcessingException;
diff --git a/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java
index c08ecd3a09..7738321f85 100644
--- a/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java
+++ b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java
@@ -1,3 +1,17 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
package com.predic8.membrane.core.util.http;
import com.predic8.membrane.core.http.Chunk;
diff --git a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
index 44bc28a8ee..e3550da714 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
@@ -35,10 +35,11 @@ api:
# Limits per request
maxInputTokens: 100
maxOutputTokens: 200
- models:
- - claude-sonnet-4-0
- - claude-opus-4-0
- - claude-haiku-3-5
+ policies:
+ models:
+ - claude-sonnet-4-0
+ - claude-opus-4-0
+ - claude-haiku-3-5
simpleStore:
# User-facing API keys for the LLM Gateway
users:
diff --git a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
index 0b7c8569e7..2b6e344edd 100644
--- a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
@@ -35,12 +35,13 @@ api:
# Limits per request
maxInputTokens: 100
maxOutputTokens: 200
- models:
- - gemini-2.5-pro
- - gemini-2.5-flash
- - gemini-2.5-flash-lite
- - gemini-2.0-flash
- - gemini-2.0-flash-lite
+ policies:
+ models:
+ - gemini-2.5-pro
+ - gemini-2.5-flash
+ - gemini-2.5-flash-lite
+ - gemini-2.0-flash
+ - gemini-2.0-flash-lite
simpleStore:
# User-facing API keys for the LLM Gateway
users:
diff --git a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
index b0231ca3d8..19f8295c69 100644
--- a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
@@ -34,10 +34,11 @@ api:
# Limits per request
maxInputTokens: 100
maxOutputTokens: 200
- models:
- - gpt-5.4
- - gpt-5-nano
- - gpt-5-mini
+ policies:
+ models:
+ - gpt-5.4
+ - gpt-5-nano
+ - gpt-5-mini
openai: {}
simpleStore:
# User-facing API keys for the LLM Gateway
From b644a65df7a7782b8f099821f2ca72f6a22ca0c0 Mon Sep 17 00:00:00 2001
From: thomas
Date: Tue, 26 May 2026 15:51:50 +0200
Subject: [PATCH 36/43] feat: introduce `Policies` class and update LLM Gateway
to support policy-based token and model restrictions
- Added `Policies` class for defining restrictions on models, input tokens, and output tokens in the LLM Gateway.
- Replaced `maxInputTokens` and `maxOutputTokens` fields in `LLMGatewayInterceptor` with `Policies`.
- Updated YAML tutorials (OpenAI, Claude, Google) to use the new `policies` configuration.
---
.../llmgateway/LLMGatewayInterceptor.java | 54 +++----------
.../core/interceptor/llmgateway/Policies.java | 76 +++++++++++++++++++
.../claude/10-Basic-LLM-Gateway.yaml | 5 +-
.../claude/20-Sharing-API-Keys.yaml | 14 ++--
.../google/10-Basic-LLM-Gateway.yaml | 5 +-
.../google/20-Sharing-API-Keys.yaml | 6 +-
.../openai/10-Basic-LLM-Gateway.yaml | 5 +-
.../openai/20-Sharing-API-Keys.yaml | 6 +-
8 files changed, 109 insertions(+), 62 deletions(-)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
index 89197b079b..2824842f67 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
@@ -57,8 +57,6 @@ public class LLMGatewayInterceptor extends AbstractInterceptor {
private LLMErrorCreator errorCreator;
private String apiKey;
- private int maxOutputTokens;
- private int maxInputTokens;
private Policies policies = new Policies();
@@ -111,7 +109,7 @@ public Outcome handleRequest(Exchange exc) {
// Check store limits
if (store != null) {
- var effectiveMaxTokens = computeEffectiveMaxOutputTokens(aiReq.getRequestedMaxOutputTokens(), maxOutputTokens);
+ var effectiveMaxTokens = computeEffectiveMaxOutputTokens(aiReq.getRequestedMaxOutputTokens(), policies.getMaxOutputTokens());
var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens);
log.debug("User {} has {} remaining tokens left", user, remaining);
if (remaining <= 0) {
@@ -130,20 +128,20 @@ public Outcome handleRequest(Exchange exc) {
var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
- if (maxOutputTokens > 0) {
+ if (policies.getMaxOutputTokens() > 0) {
if (requestedMaxOutputTokens <= 0) {
- log.info("No max. output requested. Setting limit to {}.", maxOutputTokens);
- aiReq.setMaxOutputTokens(maxOutputTokens);
- } else if (requestedMaxOutputTokens > maxOutputTokens) {
- log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens);
- aiReq.setMaxOutputTokens(maxOutputTokens);
+ log.info("No max. output requested. Setting limit to {}.", policies.getMaxOutputTokens());
+ aiReq.setMaxOutputTokens(policies.getMaxOutputTokens());
+ } else if (requestedMaxOutputTokens > policies.getMaxOutputTokens()) {
+ log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, policies.getMaxOutputTokens());
+ aiReq.setMaxOutputTokens(policies.getMaxOutputTokens());
}
}
- if (maxInputTokens != 0) {
- if (inputTokens > maxInputTokens) {
- log.info("Input tokens {} exceed the limit of {}.", inputTokens, maxInputTokens);
- exc.setResponse(errorCreator.inputTokensExceeded(maxInputTokens, inputTokens));
+ if (policies.getMaxInputTokens() != 0) {
+ if (inputTokens > policies.getMaxInputTokens()) {
+ log.info("Input tokens {} exceed the limit of {}.", inputTokens, policies.getMaxInputTokens());
+ exc.setResponse(errorCreator.inputTokensExceeded(policies.getMaxInputTokens(), inputTokens));
return RETURN;
}
}
@@ -214,36 +212,6 @@ public String getDisplayName() {
return "LLM Gateway";
}
- public int getMaxOutputTokens() {
- return maxOutputTokens;
- }
-
- /**
- * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response.
- * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway
- * sends to the LLM provider. The provider may use a different limit.
- * @default 0 (unlimited)
- */
- @MCAttribute
- public void setMaxOutputTokens(int maxOutputTokens) {
- this.maxOutputTokens = maxOutputTokens;
- }
-
- public int getMaxInputTokens() {
- return maxInputTokens;
- }
-
- /**
- * @param maxInputTokens Maximum number of tokens that a request can use.
- * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size.
- * Actual token usage may be deviate from this value.
- */
- @MCAttribute
- public void setMaxInputTokens(int maxInputTokens) {
- this.maxInputTokens = maxInputTokens;
- }
-
-
public LLMProvider getProvider() {
return provider;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
new file mode 100644
index 0000000000..cfbf960e10
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
@@ -0,0 +1,76 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
+
+import com.predic8.membrane.annot.MCAttribute;
+import com.predic8.membrane.annot.MCElement;
+
+import java.util.List;
+
+/**
+ * LLM Gateway policies for token usage and model restrictions.
+ */
+@MCElement(name = "policies", topLevel = false, id="llm-gateway-policies")
+public class Policies {
+
+ private List models;
+ private int maxOutputTokens;
+ private int maxInputTokens;
+
+ public List getModels() {
+ return models;
+ }
+
+ /**
+ * @param models List of models that can be used by the gateway.
+ * @desciption Restricts the models that can be used by the gateway.
+ * @default null (no restriction)
+ */
+ @MCAttribute
+ public void setModels(List models) {
+ this.models = models;
+ }
+
+
+ public int getMaxOutputTokens() {
+ return maxOutputTokens;
+ }
+
+ /**
+ * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response.
+ * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway
+ * sends to the LLM provider. The provider may use a different limit.
+ * @default 0 (unlimited)
+ */
+ @MCAttribute
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ this.maxOutputTokens = maxOutputTokens;
+ }
+
+ public int getMaxInputTokens() {
+ return maxInputTokens;
+ }
+
+ /**
+ * @param maxInputTokens Maximum number of tokens that a request can use.
+ * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size.
+ * Actual token usage may be deviate from this value.
+ */
+ @MCAttribute
+ public void setMaxInputTokens(int maxInputTokens) {
+ this.maxInputTokens = maxInputTokens;
+ }
+
+}
diff --git a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
index 5b30514e2f..ddaaaedcf1 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
@@ -21,7 +21,8 @@ api:
flow:
- llmGateway:
claude: {}
- maxInputTokens: 100
- maxOutputTokens: 200
+ policies:
+ maxInputTokens: 100
+ maxOutputTokens: 200
target:
url: https://api.anthropic.com
\ No newline at end of file
diff --git a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
index e3550da714..3a6a54f2f4 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml
@@ -32,14 +32,14 @@ api:
- llmGateway:
claude: {}
apiKey: <>
- # Limits per request
- maxInputTokens: 100
- maxOutputTokens: 200
policies:
- models:
- - claude-sonnet-4-0
- - claude-opus-4-0
- - claude-haiku-3-5
+ # Limits per request
+ maxInputTokens: 100
+ maxOutputTokens: 200
+ models:
+ - claude-sonnet-4-0
+ - claude-opus-4-0
+ - claude-haiku-3-5
simpleStore:
# User-facing API keys for the LLM Gateway
users:
diff --git a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
index a86eec6a27..2cbf4c236d 100644
--- a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml
@@ -21,7 +21,8 @@ api:
flow:
- llmGateway:
google: {}
- maxInputTokens: 100
- maxOutputTokens: 200
+ policies:
+ maxInputTokens: 100
+ maxOutputTokens: 200
target:
url: https://generativelanguage.googleapis.com
\ No newline at end of file
diff --git a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
index 2b6e344edd..4a9ef00ba4 100644
--- a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml
@@ -32,10 +32,10 @@ api:
- llmGateway:
google: {}
apiKey: <>
- # Limits per request
- maxInputTokens: 100
- maxOutputTokens: 200
policies:
+ # Limits per request
+ maxInputTokens: 100
+ maxOutputTokens: 200
models:
- gemini-2.5-pro
- gemini-2.5-flash
diff --git a/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml
index 07ce7c4aff..0074494b40 100644
--- a/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml
@@ -20,7 +20,8 @@ api:
flow:
- llmGateway:
openai: {}
- maxInputTokens: 100
- maxOutputTokens: 200
+ policies:
+ maxInputTokens: 100
+ maxOutputTokens: 200
target:
url: https://api.openai.com
diff --git a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
index 19f8295c69..8aa3e72f4d 100644
--- a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
+++ b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml
@@ -31,10 +31,10 @@ api:
flow:
- llmGateway:
apiKey: <>
- # Limits per request
- maxInputTokens: 100
- maxOutputTokens: 200
policies:
+ # Limits per request
+ maxInputTokens: 100
+ maxOutputTokens: 200
models:
- gpt-5.4
- gpt-5-nano
From 2449491d5c69ef0a5bb131254105a91eedb84307 Mon Sep 17 00:00:00 2001
From: thomas
Date: Wed, 27 May 2026 16:04:38 +0200
Subject: [PATCH 37/43] feat: refactor policies and introduce system prompt
support
- Replaced `Policies` class implementation with `DefaultPolicies` and `NullPolicies` for enhanced flexibility.
- Added `SystemPrompt` class to support dynamic system prompt management in LLM Gateway.
- Updated `LLMGatewayInterceptor` to delegate policy enforcement and system prompt handling to respective components.
- Extended providers (OpenAI, Claude, Google Gemini) with standardized system prompt methods (`getSystemPrompt`, `setSystemPrompt`, `removeSystemPrompt`).
- Enhanced test coverage with `AbstractLLMRequestTest` for API key handling and bearer token case insensitivity.
---
.../llmgateway/DefaultPolicies.java | 126 ++++++++++++++++++
.../llmgateway/LLMGatewayInterceptor.java | 46 +++----
.../interceptor/llmgateway/NullPolicies.java | 31 +++++
.../core/interceptor/llmgateway/Policies.java | 77 ++---------
.../interceptor/llmgateway/SystemPrompt.java | 70 ++++++++++
.../provider/AbstractLLMRequest.java | 5 +-
.../llmgateway/provider/LLMRequest.java | 8 ++
.../ChatCompletionsRequest.java | 60 +++++++++
.../provider/claude/ClaudeLLMRequest.java | 38 ++++++
.../provider/google/GoogleLLMRequest.java | 45 +++++++
.../openai/OpenAiLLMResponsesRequest.java | 31 +++++
.../provider/AbstractLLMRequestTest.java | 55 ++++++++
12 files changed, 495 insertions(+), 97 deletions(-)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
create mode 100644 core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
new file mode 100644
index 0000000000..184b591136
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
@@ -0,0 +1,126 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway;
+
+import com.predic8.membrane.annot.MCAttribute;
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.Outcome;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
+import static com.predic8.membrane.core.interceptor.Outcome.RETURN;
+
+/**
+ * @description LLM Gateway policies for token usage and model restrictions.
+ */
+@MCElement(name = "policies", id="llm-gateway-policies")
+public class DefaultPolicies implements Policies {
+
+ private static final Logger log = LoggerFactory.getLogger(LLMGatewayInterceptor.class);
+
+ private LLMErrorCreator errorCreator;
+
+ private List models;
+ private int maxOutputTokens;
+ private int maxInputTokens;
+
+ public void init(LLMErrorCreator errorCreator) {
+ this.errorCreator = errorCreator;
+ }
+
+ public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
+
+ var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
+ var inputTokens = aiReq.estimateInputTokens();
+
+ if (maxOutputTokens > 0) {
+ if (requestedMaxOutputTokens <= 0) {
+ log.info("No max. output requested. Setting limit to {}.", maxOutputTokens);
+ aiReq.setMaxOutputTokens(maxOutputTokens);
+ } else if (requestedMaxOutputTokens > maxOutputTokens) {
+ log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens);
+ aiReq.setMaxOutputTokens(maxOutputTokens);
+ }
+ }
+
+ if (maxInputTokens != 0) {
+ if (inputTokens > maxInputTokens) {
+ log.info("Input tokens {} exceed the limit of {}.", inputTokens, maxInputTokens);
+ exc.setResponse(errorCreator.inputTokensExceeded(maxInputTokens, inputTokens));
+ return RETURN;
+ }
+ }
+
+ if (models != null) {
+ var model = aiReq.getModel();
+ if (!models.contains(model)) {
+ exc.setResponse(errorCreator.modelNotAllowed(model, models));
+ return RETURN;
+ }
+ }
+
+ return CONTINUE;
+ }
+
+ public List getModels() {
+ return models;
+ }
+
+ /**
+ * @param models List of models that can be used by the gateway.
+ * @desciption Restricts the models that can be used by the gateway.
+ * @default null (no restriction)
+ */
+ @MCAttribute
+ public void setModels(List models) {
+ this.models = models;
+ }
+
+
+ public int getMaxOutputTokens() {
+ return maxOutputTokens;
+ }
+
+ /**
+ * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response.
+ * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway
+ * sends to the LLM provider. The provider may use a different limit.
+ * @default 0 (unlimited)
+ */
+ @MCAttribute
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ this.maxOutputTokens = maxOutputTokens;
+ }
+
+ public int getMaxInputTokens() {
+ return maxInputTokens;
+ }
+
+ /**
+ * @param maxInputTokens Maximum number of tokens that a request can use.
+ * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size.
+ * Actual token usage may be deviate from this value.
+ */
+ @MCAttribute
+ public void setMaxInputTokens(int maxInputTokens) {
+ this.maxInputTokens = maxInputTokens;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
index 2824842f67..8bca1b36c4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
@@ -58,7 +58,9 @@ public class LLMGatewayInterceptor extends AbstractInterceptor {
private String apiKey;
- private Policies policies = new Policies();
+ private Policies policies = new NullPolicies();
+
+ private SystemPrompt systemPrompt;
private AiApiStore store;
@@ -66,6 +68,7 @@ public class LLMGatewayInterceptor extends AbstractInterceptor {
public void init() {
super.init();
errorCreator = provider.getErrorCreator();
+ policies.init(errorCreator);
if (store != null)
store.init(router);
@@ -126,36 +129,18 @@ public Outcome handleRequest(Exchange exc) {
log.debug("Requested model: {}", aiReq.getModel());
- var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
-
- if (policies.getMaxOutputTokens() > 0) {
- if (requestedMaxOutputTokens <= 0) {
- log.info("No max. output requested. Setting limit to {}.", policies.getMaxOutputTokens());
- aiReq.setMaxOutputTokens(policies.getMaxOutputTokens());
- } else if (requestedMaxOutputTokens > policies.getMaxOutputTokens()) {
- log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, policies.getMaxOutputTokens());
- aiReq.setMaxOutputTokens(policies.getMaxOutputTokens());
- }
+ var outcome = policies.handleRequest(aiReq,exc);
+ if (outcome != CONTINUE) {
+ return outcome;
}
- if (policies.getMaxInputTokens() != 0) {
- if (inputTokens > policies.getMaxInputTokens()) {
- log.info("Input tokens {} exceed the limit of {}.", inputTokens, policies.getMaxInputTokens());
- exc.setResponse(errorCreator.inputTokensExceeded(policies.getMaxInputTokens(), inputTokens));
- return RETURN;
+ if (systemPrompt != null) {
+ outcome = systemPrompt.handleRequest(aiReq,exc);
+ if (outcome != CONTINUE) {
+ return outcome;
}
}
- if (policies.getModels() != null) {
- var model = aiReq.getModel();
- if (!policies.getModels().contains(model)) {
- exc.setResponse(errorCreator.modelNotAllowed(model, policies.getModels()));
- return RETURN;
- }
- }
-
- log.debug("Agent provides the tools: {}", aiReq.getTools());
-
setJsonBody(exc.getRequest(), aiReq.getJson());
return CONTINUE;
}
@@ -238,4 +223,13 @@ public Policies getPolicies() {
public void setPolicies(Policies policies) {
this.policies = policies;
}
+
+ public SystemPrompt getSystemPrompt() {
+ return systemPrompt;
+ }
+
+ @MCChildElement
+ public void setSystemPrompt(SystemPrompt systemPrompt) {
+ this.systemPrompt = systemPrompt;
+ }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
new file mode 100644
index 0000000000..8acd6df555
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
@@ -0,0 +1,31 @@
+package com.predic8.membrane.core.interceptor.llmgateway;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.Outcome;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+
+import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
+
+public class NullPolicies implements Policies {
+
+ @Override
+ public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
+ return CONTINUE;
+ }
+
+ @Override
+ public void init(LLMErrorCreator errorCreator) {
+
+ }
+
+ @Override
+ public int getMaxOutputTokens() {
+ return 0;
+ }
+
+ @Override
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ }
+}
+
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
index cfbf960e10..62419f0ed2 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
@@ -1,76 +1,17 @@
-/* Copyright 2026 predic8 GmbH, www.predic8.com
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
package com.predic8.membrane.core.interceptor.llmgateway;
-import com.predic8.membrane.annot.MCAttribute;
-import com.predic8.membrane.annot.MCElement;
-
-import java.util.List;
-
-/**
- * LLM Gateway policies for token usage and model restrictions.
- */
-@MCElement(name = "policies", topLevel = false, id="llm-gateway-policies")
-public class Policies {
-
- private List models;
- private int maxOutputTokens;
- private int maxInputTokens;
-
- public List getModels() {
- return models;
- }
-
- /**
- * @param models List of models that can be used by the gateway.
- * @desciption Restricts the models that can be used by the gateway.
- * @default null (no restriction)
- */
- @MCAttribute
- public void setModels(List models) {
- this.models = models;
- }
-
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.Outcome;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
- public int getMaxOutputTokens() {
- return maxOutputTokens;
- }
+public interface Policies {
- /**
- * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response.
- * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway
- * sends to the LLM provider. The provider may use a different limit.
- * @default 0 (unlimited)
- */
- @MCAttribute
- public void setMaxOutputTokens(int maxOutputTokens) {
- this.maxOutputTokens = maxOutputTokens;
- }
+ Outcome handleRequest(LLMRequest aiReq, Exchange exc);
- public int getMaxInputTokens() {
- return maxInputTokens;
- }
+ void init(LLMErrorCreator errorCreator);
- /**
- * @param maxInputTokens Maximum number of tokens that a request can use.
- * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size.
- * Actual token usage may be deviate from this value.
- */
- @MCAttribute
- public void setMaxInputTokens(int maxInputTokens) {
- this.maxInputTokens = maxInputTokens;
- }
+ int getMaxOutputTokens();
+ void setMaxOutputTokens(int maxOutputTokens);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
new file mode 100644
index 0000000000..c301d46c34
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
@@ -0,0 +1,70 @@
+package com.predic8.membrane.core.interceptor.llmgateway;
+
+import com.predic8.membrane.annot.MCAttribute;
+import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.Outcome;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
+
+/**
+ * @description When used with older chat completions API the instruction is converted to a system message like:
+ * "system": "You are a helpful assistant."
+ */
+@MCElement(name = "systemPrompt")
+public class SystemPrompt {
+
+ private static final Logger log = LoggerFactory.getLogger(SystemPrompt.class);
+ public static final String INSTRUCTIONS = "instructions";
+
+ enum Action {
+ REJECT, REMOVE, OVERWRITE, APPEND, PREPEND
+ }
+
+ private Action action;
+ private String content = "";
+
+ public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
+ var instructions = aiReq.getSystemPrompt() == null ? "" : aiReq.getSystemPrompt();
+ switch (action) {
+ case OVERWRITE -> {
+ log.debug("Overwriting instructions: {}", content);
+ aiReq.setSystemPrompt(content);
+ }
+ case PREPEND -> {
+ log.debug("Prepending instructions: {}", content);
+ aiReq.setSystemPrompt( content + "\n" + instructions);
+ }
+ case APPEND -> {
+ log.debug("Appending instructions: {}", content);
+ aiReq.setSystemPrompt(instructions + "\n" + content);
+ }
+ case REMOVE -> {
+ log.info("Removing instructions: {}", instructions);
+ aiReq.removeSystemPrompt();
+ }
+ }
+ return CONTINUE;
+ }
+
+ public Action getAction() {
+ return action;
+ }
+
+ @MCAttribute
+ public void setAction(Action action) {
+ this.action = action;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ @MCAttribute
+ public void setContent(String content) {
+ this.content = content;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
index f5955d6acb..54df557594 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
@@ -70,12 +70,11 @@ public String getApiKey() {
return null;
}
- int index = ah.indexOf(BEARER_PREFIX);
- if (index < 0) {
+ if (!ah.regionMatches(true, 0, BEARER_PREFIX, 0, BEARER_PREFIX.length())) {
return null;
}
- var token = ah.substring(index + BEARER_PREFIX.length()).trim();
+ var token = ah.substring(BEARER_PREFIX.length()).trim();
return token.isEmpty() ? null : token;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
index 371115e911..64dee19dad 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
@@ -40,4 +40,12 @@ public interface LLMRequest {
List getTools();
+ String getSystemPrompt();
+
+ boolean isChatCompletion();
+
+ void setSystemPrompt(String systemPrompt);
+
+ void removeSystemPrompt();
}
+
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
index 4ecbf9065a..951fb99edc 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
@@ -14,6 +14,7 @@
package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest;
@@ -53,6 +54,65 @@ public List getTools() {
.toList();
}
+ /**
+ * Returns the content of the first {@code "role": "system"} message,
+ * or an empty string if none is present.
+ */
+ @Override
+ public String getSystemPrompt() {
+ for (var message : json.path("messages")) {
+ if ("system".equals(message.path("role").asText())) {
+ return message.path("content").asText("");
+ }
+ }
+ return "";
+ }
+
+ /**
+ * Sets the system prompt in the {@code "messages"} array.
+ * If a system message already exists its {@code "content"} is updated in place;
+ * otherwise a new {@code {"role":"system","content":"..."}} entry is prepended.
+ *
+ * Chat Completions API wire format:
+ *
{@code
+ * { "messages": [{"role": "system", "content": "You are a helpful assistant."}, ...] }
+ * }
+ */
+ @Override
+ public void setSystemPrompt(String systemPrompt) {
+ var messages = json.withArray("messages");
+ for (var message : messages) {
+ if ("system".equals(message.path("role").asText())) {
+ ((ObjectNode) message).put("content", systemPrompt);
+ return;
+ }
+ }
+ // No system message found — prepend one
+ var systemMessage = json.objectNode();
+ systemMessage.put("role", "system");
+ systemMessage.put("content", systemPrompt);
+ messages.insert(0, systemMessage);
+ }
+
+ /**
+ * Removes all {@code "role": "system"} messages from the {@code "messages"} array.
+ * Has no effect if no system message is present.
+ */
+ @Override
+ public void removeSystemPrompt() {
+ var messages = json.withArray("messages");
+ for (int i = messages.size() - 1; i >= 0; i--) {
+ if ("system".equals(messages.get(i).path("role").asText())) {
+ messages.remove(i);
+ }
+ }
+ }
+
+ @Override
+ public boolean isChatCompletion() {
+ return true;
+ }
+
@Override
public long getRequestedMaxOutputTokens() {
return json.path("max_completion_tokens").asLong(0);
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
index fa5279afe4..e99d06a979 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
@@ -20,6 +20,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+/**
+ * system field for system prompt
+ */
public class ClaudeLLMRequest extends AbstractLLMRequest {
private static final Logger log = LoggerFactory.getLogger(ClaudeLLMRequest.class);
@@ -80,6 +83,20 @@ public long estimateInputTokens() {
return tokens;
}
+ /**
+ * Returns the system prompt from the top-level {@code "system"} field,
+ * or an empty string if no system prompt is set.
+ */
+ @Override
+ public String getSystemPrompt() {
+ return json.path("system").asText("");
+ }
+
+ @Override
+ public boolean isChatCompletion() {
+ return false;
+ }
+
private boolean isThinking() {
var thinking = json.path("thinking");
return thinking.isObject() && "enabled".equals(thinking.path("type").asText());
@@ -105,4 +122,25 @@ public void setApiKey(String apiKey) {
exchange.getRequest().getHeader().removeFields(X_API_KEY);
exchange.getRequest().getHeader().add(X_API_KEY, apiKey);
}
+
+ /**
+ * Sets the top-level {@code "system"} field to {@code systemPrompt}.
+ * Replaces any existing system prompt.
+ *
+ * Claude API wire format:
+ *
{@code { "system": "You are a helpful assistant.", "messages": [...] }}
+ */
+ @Override
+ public void setSystemPrompt(String systemPrompt) {
+ json.put("system", systemPrompt);
+ }
+
+ /**
+ * Removes the top-level {@code "system"} field entirely.
+ * Has no effect if no system prompt is present.
+ */
+ @Override
+ public void removeSystemPrompt() {
+ json.remove("system");
+ }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
index bd60b10617..ad7d4328a3 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
@@ -97,6 +97,51 @@ public long estimateInputTokens() {
return Math.max(1, Math.round(chars / 4.0 * 1.15));
}
+ /**
+ * Returns the text of the first part inside {@code systemInstruction},
+ * or an empty string if no system prompt is set.
+ *
+ * Gemini API wire format:
+ *
{@code
+ * { "systemInstruction": { "parts": [{ "text": "You are a helpful assistant." }] } }
+ * }
+ */
+ @Override
+ public String getSystemPrompt() {
+ for (var part : json.path("systemInstruction").path("parts")) {
+ if (part.path("text").isTextual()) {
+ return part.path("text").asText("");
+ }
+ }
+ return "";
+ }
+
+ /**
+ * Sets {@code systemInstruction} to a single text part carrying {@code systemPrompt}.
+ * Replaces any existing system instruction.
+ */
+ @Override
+ public void setSystemPrompt(String systemPrompt) {
+ json.putObject("systemInstruction")
+ .putArray("parts")
+ .addObject()
+ .put("text", systemPrompt);
+ }
+
+ /**
+ * Removes the {@code systemInstruction} field entirely.
+ * Has no effect if no system instruction is present.
+ */
+ @Override
+ public void removeSystemPrompt() {
+ json.remove("systemInstruction");
+ }
+
+ @Override
+ public boolean isChatCompletion() {
+ return exchange.getRequest().getUri().contains("/chat/completions");
+ }
+
private long countText(JsonNode node) {
if (node == null || node.isMissingNode() || node.isNull()) {
return 0;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
index 3caa187c88..ef7867f1d0 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
@@ -37,6 +37,37 @@ public List getTools() {
.toList();
}
+ @Override
+ public String getSystemPrompt() {
+ return json.path("instructions").asText();
+ }
+
+ @Override
+ public boolean isChatCompletion() {
+ return false;
+ }
+
+ /**
+ * Sets the {@code "instructions"} field, which is the system prompt in the
+ * OpenAI Responses API. Replaces any existing value.
+ *
+ * OpenAI Responses API wire format:
+ *
{@code { "instructions": "You are a helpful assistant.", "input": "..." }}
+ */
+ @Override
+ public void setSystemPrompt(String systemPrompt) {
+ json.put("instructions", systemPrompt);
+ }
+
+ /**
+ * Removes the {@code "instructions"} field entirely.
+ * Has no effect if no system prompt is present.
+ */
+ @Override
+ public void removeSystemPrompt() {
+ json.remove("instructions");
+ }
+
@Override
public long getRequestedMaxOutputTokens() {
if (json.has("max_output_tokens"))
diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
new file mode 100644
index 0000000000..747b24e540
--- /dev/null
+++ b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
@@ -0,0 +1,55 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.net.URISyntaxException;
+
+import static com.predic8.membrane.core.http.Request.post;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class AbstractLLMRequestTest {
+
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "Bearer test-api-key",
+ "bearer test-api-key",
+ "BEARER test-api-key",
+ "bEaReR test-api-key"
+ })
+ void getApiKeyAcceptsBearerCaseInsensitive(String authorization) throws URISyntaxException {
+ var request = new TestLLMRequest(post("http://localhost/chat/completions")
+ .header("Authorization", authorization)
+ .json("{}")
+ .buildExchange());
+
+ assertEquals("test-api-key", request.getApiKey());
+ }
+
+ private static class TestLLMRequest extends AbstractLLMRequest {
+
+ TestLLMRequest(Exchange exchange) {
+ super(exchange);
+ }
+
+ @Override
+ public long getRequestedMaxOutputTokens() {
+ return -1;
+ }
+
+ @Override
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ }
+
+ @Override
+ public long estimateInputTokens() {
+ return 0;
+ }
+
+ @Override
+ public String getSystemPrompt() {
+ return null;
+ }
+ }
+}
\ No newline at end of file
From 60a66e6446ac6330c7ab4cca51e38c5bd2522f50 Mon Sep 17 00:00:00 2001
From: thomas
Date: Wed, 27 May 2026 16:06:35 +0200
Subject: [PATCH 38/43] feat: extend `SystemPrompt` with new actions and update
tests
- Added `setSystemPrompt`, `removeSystemPrompt`, and `isChatCompletion` methods for enhanced prompt management.
- Refactored `SystemPrompt.Action` to remove unused `REJECT` action.
- Updated `AbstractLLMRequestTest` to validate new `SystemPrompt` behaviors.
---
.../core/interceptor/llmgateway/SystemPrompt.java | 5 ++---
.../provider/AbstractLLMRequestTest.java | 15 +++++++++++++++
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
index c301d46c34..a63c1535c0 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
@@ -18,10 +18,9 @@
public class SystemPrompt {
private static final Logger log = LoggerFactory.getLogger(SystemPrompt.class);
- public static final String INSTRUCTIONS = "instructions";
- enum Action {
- REJECT, REMOVE, OVERWRITE, APPEND, PREPEND
+ public enum Action {
+ REMOVE, OVERWRITE, APPEND, PREPEND
}
private Action action;
diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
index 747b24e540..af841ffed8 100644
--- a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
+++ b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
@@ -51,5 +51,20 @@ public long estimateInputTokens() {
public String getSystemPrompt() {
return null;
}
+
+ @Override
+ public boolean isChatCompletion() {
+ return false;
+ }
+
+ @Override
+ public void setSystemPrompt(String systemPrompt) {
+
+ }
+
+ @Override
+ public void removeSystemPrompt() {
+
+ }
}
}
\ No newline at end of file
From 9c8f24cced6904a47bdd1aceb6ed3329a2f45ce9 Mon Sep 17 00:00:00 2001
From: thomas
Date: Thu, 28 May 2026 08:53:58 +0200
Subject: [PATCH 39/43] feat: improve policy validation and consolidate system
prompt handling across providers
- Added validation for token limits in `DefaultPolicies` and `AiApiUser` classes.
- Refactored system prompt methods (`setSystemPrompts`, `getRequestedMaxOutputTokens`) for consistency across LLM providers.
- Standardized concatenation logic for multi-prompt handling in providers (Claude, OpenAI, Google, Chat Completions).
- Enhanced error handling with `ConfigurationException` in token-related attributes.
---
.../llmgateway/DefaultPolicies.java | 13 +++++--
.../interceptor/llmgateway/SystemPrompt.java | 10 +++---
.../llmgateway/provider/LLMRequest.java | 2 +-
.../ChatCompletionsProvider.java | 2 +-
.../ChatCompletionsRequest.java | 36 ++++++++++---------
.../provider/claude/ClaudeLLMRequest.java | 11 +++---
.../provider/google/GoogleLLMRequest.java | 16 ++++++---
.../openai/AbstractOpenAiLLMRequest.java | 3 +-
.../openai/OpenAiLLMResponsesRequest.java | 11 +++---
.../llmgateway/store/AiApiUser.java | 10 ++++--
10 files changed, 69 insertions(+), 45 deletions(-)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
index 184b591136..caa8ae52aa 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
@@ -20,6 +20,7 @@
import com.predic8.membrane.core.interceptor.Outcome;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.util.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,10 +32,10 @@
/**
* @description LLM Gateway policies for token usage and model restrictions.
*/
-@MCElement(name = "policies", id="llm-gateway-policies")
+@MCElement(name = "policies", id = "llm-gateway-policies")
public class DefaultPolicies implements Policies {
- private static final Logger log = LoggerFactory.getLogger(LLMGatewayInterceptor.class);
+ private static final Logger log = LoggerFactory.getLogger(DefaultPolicies.class);
private LLMErrorCreator errorCreator;
@@ -45,7 +46,7 @@ public class DefaultPolicies implements Policies {
public void init(LLMErrorCreator errorCreator) {
this.errorCreator = errorCreator;
}
-
+
public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
@@ -107,6 +108,9 @@ public int getMaxOutputTokens() {
*/
@MCAttribute
public void setMaxOutputTokens(int maxOutputTokens) {
+ if (maxOutputTokens < 0) {
+ throw new IllegalArgumentException("maxOutputTokens must be >= 0");
+ }
this.maxOutputTokens = maxOutputTokens;
}
@@ -121,6 +125,9 @@ public int getMaxInputTokens() {
*/
@MCAttribute
public void setMaxInputTokens(int maxInputTokens) {
+ if (maxInputTokens < 0) {
+ throw new ConfigurationException("maxInputTokens must be >= 0");
+ }
this.maxInputTokens = maxInputTokens;
}
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
index a63c1535c0..e969cec75f 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
@@ -8,6 +8,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.List;
+
import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
/**
@@ -23,7 +25,7 @@ public enum Action {
REMOVE, OVERWRITE, APPEND, PREPEND
}
- private Action action;
+ private Action action = Action.OVERWRITE;
private String content = "";
public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
@@ -31,15 +33,15 @@ public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
switch (action) {
case OVERWRITE -> {
log.debug("Overwriting instructions: {}", content);
- aiReq.setSystemPrompt(content);
+ aiReq.setSystemPrompts(List.of(content));
}
case PREPEND -> {
log.debug("Prepending instructions: {}", content);
- aiReq.setSystemPrompt( content + "\n" + instructions);
+ aiReq.setSystemPrompts(List.of(content, instructions));
}
case APPEND -> {
log.debug("Appending instructions: {}", content);
- aiReq.setSystemPrompt(instructions + "\n" + content);
+ aiReq.setSystemPrompts(List.of(instructions, content));
}
case REMOVE -> {
log.info("Removing instructions: {}", instructions);
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
index 64dee19dad..31a65919c9 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
@@ -44,7 +44,7 @@ public interface LLMRequest {
boolean isChatCompletion();
- void setSystemPrompt(String systemPrompt);
+ void setSystemPrompts(List prompts);
void removeSystemPrompt();
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
index 1ac5be3699..e2089bbcc0 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
@@ -57,6 +57,6 @@ public LLMResponse getLLMResponse(Exchange request, Consumer postPr
@Override
public LLMErrorCreator getErrorCreator() {
- return null;
+ return new ChatCompletionsErrorCreator();
}
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
index 951fb99edc..e9f11b2db9 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
@@ -14,7 +14,6 @@
package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions;
-import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest;
@@ -69,29 +68,29 @@ public String getSystemPrompt() {
}
/**
- * Sets the system prompt in the {@code "messages"} array.
- * If a system message already exists its {@code "content"} is updated in place;
- * otherwise a new {@code {"role":"system","content":"..."}} entry is prepended.
+ * Replaces all system messages with one separate {@code {"role":"system","content":"..."}} message
+ * per prompt, prepended to the messages array in list order.
*
* Chat Completions API wire format:
*
{@code
- * { "messages": [{"role": "system", "content": "You are a helpful assistant."}, ...] }
+ * { "messages": [
+ * {"role": "system", "content": "prompt 1"},
+ * {"role": "system", "content": "prompt 2"},
+ * ...user messages...
+ * ]}
* }
*/
@Override
- public void setSystemPrompt(String systemPrompt) {
+ public void setSystemPrompts(List prompts) {
+ removeSystemPrompt();
var messages = json.withArray("messages");
- for (var message : messages) {
- if ("system".equals(message.path("role").asText())) {
- ((ObjectNode) message).put("content", systemPrompt);
- return;
- }
+ // Insert in reverse so that prompts[0] ends up at index 0
+ for (int i = prompts.size() - 1; i >= 0; i--) {
+ var systemMessage = json.objectNode();
+ systemMessage.put("role", "system");
+ systemMessage.put("content", prompts.get(i));
+ messages.insert(0, systemMessage);
}
- // No system message found — prepend one
- var systemMessage = json.objectNode();
- systemMessage.put("role", "system");
- systemMessage.put("content", systemPrompt);
- messages.insert(0, systemMessage);
}
/**
@@ -115,7 +114,10 @@ public boolean isChatCompletion() {
@Override
public long getRequestedMaxOutputTokens() {
- return json.path("max_completion_tokens").asLong(0);
+ // Prefer max_completion_tokens (modern OpenAI/o1+), fall back to max_tokens (legacy / all other providers)
+ long v = json.path("max_completion_tokens").asLong(0);
+ if (v > 0) return v;
+ return json.path("max_tokens").asLong(0);
}
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
index e99d06a979..40eb7261ec 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
@@ -20,6 +20,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.List;
+
/**
* system field for system prompt
*/
@@ -124,15 +126,14 @@ public void setApiKey(String apiKey) {
}
/**
- * Sets the top-level {@code "system"} field to {@code systemPrompt}.
- * Replaces any existing system prompt.
+ * Concatenates all prompts (newline-separated) into the top-level {@code "system"} field.
*
* Claude API wire format:
- *
{@code { "system": "You are a helpful assistant.", "messages": [...] }}
+ * {@code { "system": "prompt 1\nprompt 2", "messages": [...] }}
*/
@Override
- public void setSystemPrompt(String systemPrompt) {
- json.put("system", systemPrompt);
+ public void setSystemPrompts(List prompts) {
+ json.put("system", String.join("\n", prompts));
}
/**
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
index ad7d4328a3..adb62a63f4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
@@ -19,6 +19,8 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+import java.util.List;
+
public class GoogleLLMRequest extends AbstractLLMRequest {
/**
@@ -117,15 +119,18 @@ public String getSystemPrompt() {
}
/**
- * Sets {@code systemInstruction} to a single text part carrying {@code systemPrompt}.
- * Replaces any existing system instruction.
+ * Concatenates all prompts (newline-separated) into a single text part under
+ * {@code systemInstruction}. Replaces any existing system instruction.
+ *
+ * Gemini API wire format:
+ *
{@code { "systemInstruction": { "parts": [{ "text": "prompt 1\nprompt 2" }] } }}
*/
@Override
- public void setSystemPrompt(String systemPrompt) {
+ public void setSystemPrompts(List prompts) {
json.putObject("systemInstruction")
.putArray("parts")
.addObject()
- .put("text", systemPrompt);
+ .put("text", String.join("\n", prompts));
}
/**
@@ -139,7 +144,8 @@ public void removeSystemPrompt() {
@Override
public boolean isChatCompletion() {
- return exchange.getRequest().getUri().contains("/chat/completions");
+ // Gemini uses its own generateContent API, not Chat Completions
+ return false;
}
private long countText(JsonNode node) {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
index b49e7440fc..0686831bba 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
@@ -31,8 +31,9 @@ public long estimateInputTokens() {
chars += estimateChatCompletitions();
- // system instructions
+ // system instructions: "system" (chat completions) or "instructions" (responses API)
chars += countText(json.path("system"));
+ chars += countText(json.path("instructions"));
// tools/functions contribute significantly
chars += countJsonSize(json.path("tools"));
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
index ef7867f1d0..7825772d92 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
@@ -39,7 +39,7 @@ public List getTools() {
@Override
public String getSystemPrompt() {
- return json.path("instructions").asText();
+ return json.path("instructions").asText("");
}
@Override
@@ -48,15 +48,14 @@ public boolean isChatCompletion() {
}
/**
- * Sets the {@code "instructions"} field, which is the system prompt in the
- * OpenAI Responses API. Replaces any existing value.
+ * Concatenates all prompts (newline-separated) into the {@code "instructions"} field.
*
* OpenAI Responses API wire format:
- *
{@code { "instructions": "You are a helpful assistant.", "input": "..." }}
+ * {@code { "instructions": "prompt 1\nprompt 2", "input": "..." }}
*/
@Override
- public void setSystemPrompt(String systemPrompt) {
- json.put("instructions", systemPrompt);
+ public void setSystemPrompts(List prompts) {
+ json.put("instructions", String.join("\n", prompts));
}
/**
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
index da8b792680..d2a5c9b018 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java
@@ -16,12 +16,13 @@
import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCElement;
+import com.predic8.membrane.core.util.ConfigurationException;
import java.util.concurrent.atomic.AtomicLong;
import static java.lang.Long.MAX_VALUE;
-@MCElement(name = "users", component = false, id="ai-api-users")
+@MCElement(name = "users", component = false, id = "ai-api-users")
public class AiApiUser {
private String name;
@@ -33,6 +34,7 @@ public class AiApiUser {
/**
* Updates the store with the number of tokens used in this call
+ *
* @param usage The number of tokens used
*/
public void addTokensUsedInPeriod(Usage usage) {
@@ -45,6 +47,7 @@ public void resetTokensUsedInPeriod() {
/**
* Checks if the user has enough tokens to make the request.
+ *
* @param tokensNeededForRequest The number of tokens that the user needs to make the request
* @return The estimated number of tokens that the user has left after this request
*/
@@ -59,8 +62,8 @@ public String getName() {
}
/**
- * @description Name of the API user, group or cost center.
* @param name of the user
+ * @description Name of the API user, group or cost center.
*/
@MCAttribute()
public void setName(String name) {
@@ -93,6 +96,9 @@ public long getTokens() {
*/
@MCAttribute
public void setTokens(long tokens) {
+ if (tokens < 0) {
+ throw new ConfigurationException("tokens must be >= 0");
+ }
this.tokens = tokens;
}
From 9f615abf7d198d52036f601df24be74fd16459fa Mon Sep 17 00:00:00 2001
From: thomas
Date: Fri, 29 May 2026 15:05:31 +0200
Subject: [PATCH 40/43] feat: add AbstractLLMProvider, extend request handling,
and improve multipart utility
- Introduced `AbstractLLMProvider` to streamline and centralize LLM request type handling.
- Added `AbstractLLMRequest` and extended support for specialized requests (e.g., `AudioRequest`, `ImagesRequest`, `FilesRequest`, `OrganizationRequest`).
- Implemented `MultipartUtil` to simplify handling of multipart HTTP messages.
- Updated providers (OpenAI, Claude, Chat Completions) to align with new abstractions and support for `IOException`.
- Enhanced test coverage with `MultipartUtilTest` and additional provider-specific unit tests.
---
.../predic8/membrane/core/http/Header.java | 63 +++--
.../predic8/membrane/core/http/Message.java | 7 +
.../llmgateway/DefaultPolicies.java | 48 ++--
.../llmgateway/LLMGatewayInterceptor.java | 66 ++---
.../interceptor/llmgateway/NullPolicies.java | 4 +-
.../core/interceptor/llmgateway/Policies.java | 4 +-
.../interceptor/llmgateway/SystemPrompt.java | 14 +-
.../provider/AbstractLLMProvider.java | 35 +++
.../provider/AbstractLLMRequest.java | 58 +----
.../llmgateway/provider/JSONRequest.java | 8 +
.../llmgateway/provider/LLMProvider.java | 3 +-
.../llmgateway/provider/LLMRequest.java | 27 --
.../provider/ModelInputRequest.java | 30 +++
.../ChatCompletionsProvider.java | 3 +-
.../ChatCompletionsRequest.java | 8 +-
.../provider/claude/ClaudeLLMRequest.java | 13 +-
.../provider/claude/ClaudeProvider.java | 3 +-
.../provider/google/GoogleLLMRequest.java | 14 +-
.../provider/google/GoogleProvider.java | 3 +-
.../openai/AbstractOpenAiLLMRequest.java | 8 +-
.../provider/openai/AudioRequest.java | 12 +
.../provider/openai/FilesRequest.java | 12 +
.../provider/openai/ImagesRequest.java | 11 +
.../openai/OpenAIChatCompletionsRequest.java | 4 +-
.../provider/openai/OpenAIProvider.java | 18 +-
.../openai/OpenAiLLMResponsesRequest.java | 8 +-
.../provider/openai/OrganizationRequest.java | 11 +
.../llmgateway/store/SimpleAiApiStore.java | 2 +-
.../core/interceptor/log/LogInterceptor.java | 4 +
.../core/multipart/MultipartUtil.java | 98 +++++++
.../predic8/membrane/core/multipart/Part.java | 243 ++++++++----------
.../core/multipart/XOPReconstitutor.java | 102 +++++---
.../membrane/core/http/HeaderTest.java | 61 ++++-
...ava => AbstractModelInputRequestTest.java} | 17 +-
.../core/multipart/MultipartUtilTest.java | 229 +++++++++++++++++
.../BasicClaudeLLMGatewayTutorialTest.java | 2 +
.../SharingApiKeysOpenAiTutorialTest.java | 1 +
37 files changed, 858 insertions(+), 396 deletions(-)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java
create mode 100644 core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java
rename core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/{AbstractLLMRequestTest.java => AbstractModelInputRequestTest.java} (79%)
create mode 100644 core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
diff --git a/core/src/main/java/com/predic8/membrane/core/http/Header.java b/core/src/main/java/com/predic8/membrane/core/http/Header.java
index 68844f5a18..cc435d43c4 100644
--- a/core/src/main/java/com/predic8/membrane/core/http/Header.java
+++ b/core/src/main/java/com/predic8/membrane/core/http/Header.java
@@ -15,29 +15,41 @@
package com.predic8.membrane.core.http;
import com.predic8.membrane.annot.Constants;
-import com.predic8.membrane.core.http.cookie.*;
-import com.predic8.membrane.core.util.*;
-import jakarta.mail.internet.*;
-import org.jetbrains.annotations.*;
-import org.slf4j.*;
-
-import java.io.*;
-import java.security.*;
-import java.util.*;
+import com.predic8.membrane.core.http.cookie.Cookies;
+import com.predic8.membrane.core.http.cookie.MimeHeaders;
+import com.predic8.membrane.core.http.cookie.ServerCookie;
+import com.predic8.membrane.core.util.EndOfStreamException;
+import com.predic8.membrane.core.util.HttpUtil;
+import jakarta.mail.internet.ContentType;
+import jakarta.mail.internet.ParseException;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.security.InvalidParameterException;
import java.util.ArrayList;
-import java.util.function.*;
-import java.util.regex.*;
-import java.util.stream.*;
-
-import static com.predic8.membrane.core.http.MimeType.*;
-import static com.predic8.membrane.core.util.HttpUtil.*;
-import static java.nio.charset.StandardCharsets.*;
-import static java.util.Arrays.*;
-import static java.util.Collections.*;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Predicate;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import static com.predic8.membrane.core.http.MimeType.isBinary;
+import static com.predic8.membrane.core.util.HttpUtil.readLine;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.util.Arrays.stream;
+import static java.util.Collections.unmodifiableList;
import static java.util.Locale.ROOT;
-import static java.util.regex.Pattern.*;
-import static java.util.stream.Collectors.*;
-import static org.apache.commons.codec.binary.Base64.*;
+import static java.util.regex.Pattern.CASE_INSENSITIVE;
+import static java.util.regex.Pattern.compile;
+import static java.util.stream.Collectors.joining;
+import static java.util.stream.Collectors.toSet;
+import static org.apache.commons.codec.binary.Base64.encodeBase64;
/**
* The headers of an HTTP message.
@@ -331,6 +343,15 @@ public String getContentType() {
return getFirstValue(CONTENT_TYPE);
}
+ /**
+ * Returns {@code true} if the {@code Content-Type} header starts with {@code multipart/}
+ * (e.g. {@code multipart/form-data}, {@code multipart/related}, {@code multipart/mixed}).
+ */
+ public boolean isMultipart() {
+ String ct = getContentType();
+ return ct != null && ct.regionMatches(true, 0, "multipart/", 0, 10);
+ }
+
public String getUserAgent() {
return getFirstValue(USER_AGENT);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/http/Message.java b/core/src/main/java/com/predic8/membrane/core/http/Message.java
index f38024aae0..b96755086f 100644
--- a/core/src/main/java/com/predic8/membrane/core/http/Message.java
+++ b/core/src/main/java/com/predic8/membrane/core/http/Message.java
@@ -338,6 +338,13 @@ public boolean isImage() {
return MimeType.isImage(getHeader().getContentType());
}
+ /**
+ * @return true if the message has a media type of image/*, audio/*, video/*, octect-stream, or application/octet-stream
+ */
+ public boolean isBinary() {
+ return MimeType.isBinary(getHeader().getContentType());
+ }
+
public boolean isXML() {
return MimeType.isXML(getHeader().getContentType());
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
index caa8ae52aa..7c6f9612c4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
@@ -19,7 +19,8 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.Outcome;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.OrganizationRequest;
import com.predic8.membrane.core.util.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,18 +48,44 @@ public void init(LLMErrorCreator errorCreator) {
this.errorCreator = errorCreator;
}
- public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
+ public Outcome handleRequest(ModelInputRequest mir, Exchange exc) {
- var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens();
- var inputTokens = aiReq.estimateInputTokens();
+ if (mir instanceof OrganizationRequest) {
+ return CONTINUE;
+ }
+
+ var outcome = checkTokenLimits(mir, exc);
+ if (outcome != CONTINUE) {
+ return outcome;
+ }
+ outcome = checkModel(mir, exc);
+ if (outcome != CONTINUE) {
+ return outcome;
+ }
+ return CONTINUE;
+ }
+
+ public Outcome checkModel(ModelInputRequest mir, Exchange exc) {
+ var model = mir.getModel();
+ if (models != null && !models.contains(model)) {
+ exc.setResponse(errorCreator.modelNotAllowed(model, models));
+ return RETURN;
+ }
+ return CONTINUE;
+ }
+
+ public Outcome checkTokenLimits(ModelInputRequest mir, Exchange exc) {
+
+ var requestedMaxOutputTokens = mir.getRequestedMaxOutputTokens();
+ var inputTokens = mir.estimateInputTokens();
if (maxOutputTokens > 0) {
if (requestedMaxOutputTokens <= 0) {
log.info("No max. output requested. Setting limit to {}.", maxOutputTokens);
- aiReq.setMaxOutputTokens(maxOutputTokens);
+ mir.setMaxOutputTokens(maxOutputTokens);
} else if (requestedMaxOutputTokens > maxOutputTokens) {
log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens);
- aiReq.setMaxOutputTokens(maxOutputTokens);
+ mir.setMaxOutputTokens(maxOutputTokens);
}
}
@@ -69,15 +96,6 @@ public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
return RETURN;
}
}
-
- if (models != null) {
- var model = aiReq.getModel();
- if (!models.contains(model)) {
- exc.setResponse(errorCreator.modelNotAllowed(model, models));
- return RETURN;
- }
- }
-
return CONTINUE;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
index 8bca1b36c4..fca36d7066 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java
@@ -23,6 +23,7 @@
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiStore;
import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiUser;
import com.predic8.membrane.core.util.ConfigurationException;
@@ -31,7 +32,6 @@
import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
import static com.predic8.membrane.core.interceptor.Outcome.RETURN;
-import static com.predic8.membrane.core.util.json.JsonUtil.setJsonBody;
/*
* @description
@@ -81,23 +81,17 @@ public void init() {
@Override
public Outcome handleRequest(Exchange exc) {
- LLMRequest aiReq;
+ LLMRequest llmReq;
try {
- aiReq = provider.getLLMRequest(exc);
+ llmReq = provider.getLLMRequest(exc);
} catch (Exception e) {
exc.setResponse(errorCreator.invalidRequestError("Error parsing request: " + e.getMessage()));
return RETURN;
}
- if (!exc.getRequest().isPOSTRequest()) {
- if (apiKey != null)
- aiReq.setApiKey(apiKey);
- return CONTINUE;
- }
-
AiApiUser user = null;
if (store != null) {
- var opt = store.getUser(aiReq.getApiKey());
+ var opt = store.getUser(llmReq.getApiKey());
if (opt.isEmpty()) {
exc.setResponse(errorCreator.authenticationFailed());
return RETURN;
@@ -107,41 +101,53 @@ public Outcome handleRequest(Exchange exc) {
exc.setProperty(MEMBRANE_AI_USER, user);
}
- long inputTokens = aiReq.estimateInputTokens();
- log.debug("Estimated input tokens: {}", inputTokens);
-
- // Check store limits
- if (store != null) {
- var effectiveMaxTokens = computeEffectiveMaxOutputTokens(aiReq.getRequestedMaxOutputTokens(), policies.getMaxOutputTokens());
- var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens);
- log.debug("User {} has {} remaining tokens left", user, remaining);
- if (remaining <= 0) {
- log.info("Token limit exceeded. Remaining: {} input: {} maxOutput: {}", remaining, inputTokens, effectiveMaxTokens);
- exc.setResponse(errorCreator.tokenLimitExceeded(inputTokens + effectiveMaxTokens, remaining, store.getRemainingResetTime()));
- return RETURN;
- }
- }
-
// If APIKey is specified, use that for the LLM. Overwrites keys from the client
if (apiKey != null) {
- aiReq.setApiKey(apiKey);
+ llmReq.setApiKey(apiKey);
+ }
+
+ if (!exc.getRequest().isPOSTRequest()) {
+ return CONTINUE;
}
- log.debug("Requested model: {}", aiReq.getModel());
+ if (!(llmReq instanceof ModelInputRequest mir)) {
+ return CONTINUE;
+ }
- var outcome = policies.handleRequest(aiReq,exc);
+ var outcome = policies.handleRequest(mir, exc);
if (outcome != CONTINUE) {
return outcome;
}
if (systemPrompt != null) {
- outcome = systemPrompt.handleRequest(aiReq,exc);
+ outcome = systemPrompt.handleRequest(mir, exc);
if (outcome != CONTINUE) {
return outcome;
}
}
- setJsonBody(exc.getRequest(), aiReq.getJson());
+ // Check store limits
+ if (checkStoreLimits(exc, mir, user) != CONTINUE) {
+ return RETURN;
+ }
+
+ exc.getRequest().setBodyContent(mir.getBody().getContent());
+ return CONTINUE;
+ }
+
+ private Outcome checkStoreLimits(Exchange exc, ModelInputRequest mir, AiApiUser user) {
+ long inputTokens = mir.estimateInputTokens();
+ log.debug("Estimated input tokens: {}", inputTokens);
+ if (store != null) {
+ var effectiveMaxTokens = computeEffectiveMaxOutputTokens(mir.getRequestedMaxOutputTokens(), policies.getMaxOutputTokens());
+ var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens);
+ log.debug("User {} has {} remaining tokens left", user, remaining);
+ if (remaining <= 0) {
+ log.info("Token limit exceeded. Remaining: {} input: {} maxOutput: {}", remaining, inputTokens, effectiveMaxTokens);
+ exc.setResponse(errorCreator.tokenLimitExceeded(inputTokens + effectiveMaxTokens, remaining, store.getRemainingResetTime()));
+ return RETURN;
+ }
+ }
return CONTINUE;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
index 8acd6df555..a1ba392b3b 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java
@@ -3,14 +3,14 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.Outcome;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;
public class NullPolicies implements Policies {
@Override
- public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
+ public Outcome handleRequest(ModelInputRequest mir, Exchange exc) {
return CONTINUE;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
index 62419f0ed2..fc742e30ce 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java
@@ -3,11 +3,11 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.Outcome;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
public interface Policies {
- Outcome handleRequest(LLMRequest aiReq, Exchange exc);
+ Outcome handleRequest(ModelInputRequest mir, Exchange exc);
void init(LLMErrorCreator errorCreator);
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
index e969cec75f..e2382e0135 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java
@@ -4,7 +4,7 @@
import com.predic8.membrane.annot.MCElement;
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.Outcome;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,24 +28,24 @@ public enum Action {
private Action action = Action.OVERWRITE;
private String content = "";
- public Outcome handleRequest(LLMRequest aiReq, Exchange exc) {
- var instructions = aiReq.getSystemPrompt() == null ? "" : aiReq.getSystemPrompt();
+ public Outcome handleRequest(ModelInputRequest mir, Exchange exc) {
+ var instructions = mir.getSystemPrompt() == null ? "" : mir.getSystemPrompt();
switch (action) {
case OVERWRITE -> {
log.debug("Overwriting instructions: {}", content);
- aiReq.setSystemPrompts(List.of(content));
+ mir.setSystemPrompts(List.of(content));
}
case PREPEND -> {
log.debug("Prepending instructions: {}", content);
- aiReq.setSystemPrompts(List.of(content, instructions));
+ mir.setSystemPrompts(List.of(content, instructions));
}
case APPEND -> {
log.debug("Appending instructions: {}", content);
- aiReq.setSystemPrompts(List.of(instructions, content));
+ mir.setSystemPrompts(List.of(instructions, content));
}
case REMOVE -> {
log.info("Removing instructions: {}", instructions);
- aiReq.removeSystemPrompt();
+ mir.removeSystemPrompt();
}
}
return CONTINUE;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
new file mode 100644
index 0000000000..6273c2c5db
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
@@ -0,0 +1,35 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.http.ReadingBodyException;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AudioRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.FilesRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.ImagesRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.OrganizationRequest;
+
+import java.io.IOException;
+
+public abstract class AbstractLLMProvider implements LLMProvider {
+
+ @Override
+ public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
+ var uri = exchange.getRequest().getUri();
+ if (uri.startsWith("/v1/chat/completions")) {
+ return new ChatCompletionsRequest(exchange);
+ }
+ if (uri.startsWith("/v1/files")) {
+ return new FilesRequest(exchange);
+ }
+ if (uri.contains("/v1/images")) {
+ return new ImagesRequest(exchange);
+ }
+ if (uri.contains("/v1/audio")) {
+ return new AudioRequest(exchange);
+ }
+ if (uri.contains("/v1/organization")) {
+ return new OrganizationRequest(exchange);
+ }
+ throw new ReadingBodyException("Unknown request: " + uri);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
index 54df557594..4c7c8fded3 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
@@ -1,60 +1,15 @@
-/* Copyright 2026 predic8 GmbH, www.predic8.com
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
package com.predic8.membrane.core.interceptor.llmgateway.provider;
-import com.fasterxml.jackson.databind.node.ArrayNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.util.json.JsonUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Collections;
-import java.util.List;
import static com.predic8.membrane.core.http.Header.AUTHORIZATION;
-public abstract class AbstractLLMRequest extends AbstractLLMMessage implements LLMRequest {
-
- private static final Logger log = LoggerFactory.getLogger(AbstractLLMRequest.class);
+public class AbstractLLMRequest extends AbstractLLMMessage implements LLMRequest {
public static final String BEARER_PREFIX = "Bearer";
- protected ObjectNode json;
-
- public AbstractLLMRequest(Exchange exchange) {
+ protected AbstractLLMRequest(Exchange exchange) {
super(exchange);
-
- if (exchange.getRequest().isJSON()) {
- json = JsonUtil.getJsonObject(exchange.getRequest()).orElseThrow(() -> new RuntimeException("Cannot parse input as JSON message."));
- } else {
- log.info("Request is not JSON:");
- throw new RuntimeException("Request is not JSON.");
- }
- }
-
- public List getTools() {
- return Collections.emptyList();
- }
-
- protected ArrayNode getToolsNode() {
- if (json == null)
- return null;
- if (json.path("tools").isArray())
- return (ArrayNode) json.path("tools");
- return null;
}
@Override
@@ -79,13 +34,4 @@ public String getApiKey() {
return token.isEmpty() ? null : token;
}
- @Override
- public ObjectNode getJson() {
- return json;
- }
-
- @Override
- public String getModel() {
- return json.path("model").asText();
- }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java
new file mode 100644
index 0000000000..2859b6fc38
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java
@@ -0,0 +1,8 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+interface JSONMessage {
+
+ ObjectNode getJson();
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
index 1fb2fc4eae..457597d70e 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java
@@ -16,11 +16,12 @@
import com.predic8.membrane.core.exchange.Exchange;
+import java.io.IOException;
import java.util.function.Consumer;
public interface LLMProvider {
- LLMRequest getLLMRequest(Exchange request);
+ LLMRequest getLLMRequest(Exchange request) throws IOException;
LLMResponse getLLMResponse(Exchange request, Consumer postProcessor);
LLMErrorCreator getErrorCreator();
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
index 31a65919c9..f80230a755 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java
@@ -14,38 +14,11 @@
package com.predic8.membrane.core.interceptor.llmgateway.provider;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.util.List;
-
public interface LLMRequest {
- String getModel();
-
String getApiKey();
void setApiKey(String apiKey);
- /**
- * The max number of tokens that the model is allowed to generate as specified by the client.
- * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set.
- */
- long getRequestedMaxOutputTokens();
-
- void setMaxOutputTokens(int maxOutputTokens);
-
- long estimateInputTokens();
-
- ObjectNode getJson();
-
- List getTools();
-
- String getSystemPrompt();
-
- boolean isChatCompletion();
-
- void setSystemPrompts(List prompts);
-
- void removeSystemPrompt();
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java
new file mode 100644
index 0000000000..4a779a140c
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java
@@ -0,0 +1,30 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.predic8.membrane.core.http.AbstractBody;
+
+import java.util.List;
+
+public interface ModelInputRequest extends JSONMessage {
+
+ String getModel();
+
+ /**
+ * The max number of tokens that the model is allowed to generate as specified by the client.
+ * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set.
+ */
+ long getRequestedMaxOutputTokens();
+
+ void setMaxOutputTokens(int maxOutputTokens);
+
+ long estimateInputTokens();
+
+ List getTools();
+
+ String getSystemPrompt();
+
+ void setSystemPrompts(List prompts);
+
+ void removeSystemPrompt();
+
+ AbstractBody getBody();
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
index e2089bbcc0..8f1a7a491a 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java
@@ -21,6 +21,7 @@
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import java.io.IOException;
import java.util.function.Consumer;
/**
@@ -46,7 +47,7 @@
@MCElement(name = "chatCompletions")
public class ChatCompletionsProvider implements LLMProvider {
@Override
- public LLMRequest getLLMRequest(Exchange request) {
+ public LLMRequest getLLMRequest(Exchange request) throws IOException {
return new ChatCompletionsRequest(request);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
index e9f11b2db9..b50a536131 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java
@@ -17,13 +17,14 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest;
+import java.io.IOException;
import java.util.List;
import static java.util.Collections.emptyList;
public class ChatCompletionsRequest extends AbstractOpenAiLLMRequest {
- public ChatCompletionsRequest(Exchange exchange) {
+ public ChatCompletionsRequest(Exchange exchange) throws IOException {
super(exchange);
if (json == null) {
@@ -107,11 +108,6 @@ public void removeSystemPrompt() {
}
}
- @Override
- public boolean isChatCompletion() {
- return true;
- }
-
@Override
public long getRequestedMaxOutputTokens() {
// Prefer max_completion_tokens (modern OpenAI/o1+), fall back to max_tokens (legacy / all other providers)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
index 40eb7261ec..1a0e66c3c3 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java
@@ -16,22 +16,24 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
import java.util.List;
/**
* system field for system prompt
*/
-public class ClaudeLLMRequest extends AbstractLLMRequest {
+public class ClaudeLLMRequest extends AbstractModelInputRequest implements ModelInputRequest {
private static final Logger log = LoggerFactory.getLogger(ClaudeLLMRequest.class);
public static final String X_API_KEY = "x-api-key";
- public ClaudeLLMRequest(Exchange exchange) {
+ public ClaudeLLMRequest(Exchange exchange) throws IOException {
super(exchange);
exchange.getRequest().getHeader().setValue( "Accept-Encoding","identity");
@@ -94,11 +96,6 @@ public String getSystemPrompt() {
return json.path("system").asText("");
}
- @Override
- public boolean isChatCompletion() {
- return false;
- }
-
private boolean isThinking() {
var thinking = json.path("thinking");
return thinking.isObject() && "enabled".equals(thinking.path("type").asText());
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
index a296575058..decc7048b2 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
@@ -21,6 +21,7 @@
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import java.io.IOException;
import java.util.function.Consumer;
/**
@@ -31,7 +32,7 @@
public class ClaudeProvider implements LLMProvider {
@Override
- public LLMRequest getLLMRequest(Exchange exchange) {
+ public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
return new ClaudeLLMRequest(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
index adb62a63f4..90f1b1ab36 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java
@@ -17,18 +17,20 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest;
+import java.io.IOException;
import java.util.List;
-public class GoogleLLMRequest extends AbstractLLMRequest {
+public class GoogleLLMRequest extends AbstractModelInputRequest implements ModelInputRequest {
/**
* x-goog-api-key is correct it is not google
*/
public static final String X_GOOG_API_KEY = "x-goog-api-key";
- public GoogleLLMRequest(Exchange exchange) {
+ public GoogleLLMRequest(Exchange exchange) throws IOException {
super(exchange);
}
@@ -142,12 +144,6 @@ public void removeSystemPrompt() {
json.remove("systemInstruction");
}
- @Override
- public boolean isChatCompletion() {
- // Gemini uses its own generateContent API, not Chat Completions
- return false;
- }
-
private long countText(JsonNode node) {
if (node == null || node.isMissingNode() || node.isNull()) {
return 0;
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
index b1b36ea1df..0654b9b52f 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java
@@ -21,6 +21,7 @@
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
+import java.io.IOException;
import java.util.function.Consumer;
/**
@@ -31,7 +32,7 @@
public class GoogleProvider implements LLMProvider {
@Override
- public LLMRequest getLLMRequest(Exchange exchange) {
+ public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
return new GoogleLLMRequest(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
index 0686831bba..9e75ef5ec5 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java
@@ -16,11 +16,13 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest;
-public abstract class AbstractOpenAiLLMRequest extends AbstractLLMRequest {
+import java.io.IOException;
- public AbstractOpenAiLLMRequest(Exchange exchange) {
+public abstract class AbstractOpenAiLLMRequest extends AbstractModelInputRequest {
+
+ public AbstractOpenAiLLMRequest(Exchange exchange) throws IOException {
super(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
new file mode 100644
index 0000000000..df028457a3
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
@@ -0,0 +1,12 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest;
+
+import java.io.IOException;
+
+public class AudioRequest extends AbstractModelInputRequest {
+ public AudioRequest(Exchange exchange) throws IOException {
+ super(exchange);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
new file mode 100644
index 0000000000..cc56d7b492
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
@@ -0,0 +1,12 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+
+public class FilesRequest extends AbstractLLMRequest {
+
+ public FilesRequest(Exchange exchange) {
+ super(exchange);
+ }
+
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
new file mode 100644
index 0000000000..74ac706d04
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
@@ -0,0 +1,11 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
+
+public class ImagesRequest extends AbstractLLMRequest {
+
+ public ImagesRequest(Exchange exchange) {
+ super(exchange);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
index 8c6e474398..b26e2794e2 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java
@@ -17,8 +17,10 @@
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest;
+import java.io.IOException;
+
public class OpenAIChatCompletionsRequest extends ChatCompletionsRequest {
- public OpenAIChatCompletionsRequest(Exchange exchange) {
+ public OpenAIChatCompletionsRequest(Exchange exchange) throws IOException {
super(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
index e55d40bd47..9798483ed5 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
@@ -16,13 +16,14 @@
import com.predic8.membrane.annot.MCElement;
import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMProvider;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest;
import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse;
import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsErrorCreator;
import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsResponse;
+import java.io.IOException;
import java.util.function.Consumer;
/**
@@ -30,19 +31,21 @@
* Use to configure a LLM gateway to use the OpenAI API
*/
@MCElement( name="openai")
-public class OpenAIProvider implements LLMProvider {
+public class OpenAIProvider extends AbstractLLMProvider {
@Override
- public LLMRequest getLLMRequest(Exchange exchange) {
- if (isResponsesApi(exchange)) {
+ public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
+ var uri = exchange.getRequest().getUri();
+ if (uri.startsWith("/v1/responses")) {
return new OpenAiLLMResponsesRequest(exchange);
}
- return new OpenAIChatCompletionsRequest(exchange);
+ return super.getLLMRequest(exchange);
}
@Override
public LLMResponse getLLMResponse(Exchange exchange, Consumer postProcessor) {
- if (isResponsesApi(exchange)) {
+ var uri = exchange.getRequest().getUri();
+ if (uri.startsWith("/v1/responses")) {
return new OpenAiLLMResponsesResponse(exchange,postProcessor);
}
return new ChatCompletionsResponse(exchange, postProcessor);
@@ -53,7 +56,4 @@ public LLMErrorCreator getErrorCreator() {
return new ChatCompletionsErrorCreator();
}
- static boolean isResponsesApi(Exchange exchange) {
- return exchange.getRequest().getUri().startsWith("/v1/responses");
- }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
index 7825772d92..945ede46e4 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java
@@ -16,13 +16,14 @@
import com.predic8.membrane.core.exchange.Exchange;
+import java.io.IOException;
import java.util.List;
import static java.util.Collections.emptyList;
public class OpenAiLLMResponsesRequest extends AbstractOpenAiLLMRequest {
- public OpenAiLLMResponsesRequest(Exchange exchange) {
+ public OpenAiLLMResponsesRequest(Exchange exchange) throws IOException {
super(exchange);
}
@@ -42,11 +43,6 @@ public String getSystemPrompt() {
return json.path("instructions").asText("");
}
- @Override
- public boolean isChatCompletion() {
- return false;
- }
-
/**
* Concatenates all prompts (newline-separated) into the {@code "instructions"} field.
*
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java
new file mode 100644
index 0000000000..8d4b5bbb25
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java
@@ -0,0 +1,11 @@
+package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
+
+import com.predic8.membrane.core.exchange.Exchange;
+
+import java.io.IOException;
+
+public class OrganizationRequest extends AbstractOpenAiLLMRequest {
+ public OrganizationRequest(Exchange exchange) throws IOException {
+ super(exchange);
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
index 106892c39f..9f7e91b210 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java
@@ -52,7 +52,7 @@ public class SimpleAiApiStore implements AiApiStore {
@Override
public void store(AiApiUser user, Usage usage) {
if (logUsage)
- log.info("user: {} {}",user.getName(),usage.toString());
+ log.info("user: {} {}", user.getName(), usage.toString());
user.addTokensUsedInPeriod(usage);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java
index 34fe64dbad..b2f2f10a5c 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java
@@ -146,6 +146,10 @@ private String dumpHeaderFields(Message msg) {
}
private static String dumpBody(Message msg) {
+ if (msg.isBinary()) {
+ return "[Binary]";
+ }
+
try {
return "Body:\n%s\n".formatted(msg.getBodyAsStringDecoded());
} catch (Exception e) {
diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java b/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java
new file mode 100644
index 0000000000..d19a808e3a
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java
@@ -0,0 +1,98 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.multipart;
+
+import com.predic8.membrane.core.http.Header;
+import com.predic8.membrane.core.http.Message;
+import com.predic8.membrane.core.util.MessageUtil;
+import jakarta.mail.internet.ParseException;
+import org.apache.commons.fileupload.MultipartStream;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+/**
+ * Utility for splitting multipart HTTP messages into their individual {@link Part}s.
+ *
+ * Example:
+ *
{@code
+ * List parts = MultipartUtil.split(exchange.getRequest());
+ * for (Part part : parts) {
+ * String name = part.getName(); // form field name
+ * String type = part.getContentType(); // e.g. "image/png"
+ * byte[] body = part.getBody();
+ * }
+ * }
+ */
+public class MultipartUtil {
+
+ /**
+ * Splits a multipart message into its individual parts.
+ * The MIME boundary is read from the message's {@code Content-Type} header.
+ *
+ * @param message a request or response whose Content-Type is multipart/*
+ * @return parts in wire order; never null, may be empty
+ * @throws IOException on I/O or parse errors
+ * @throws ParseException if the Content-Type header cannot be parsed
+ */
+ public static List split(Message message) throws IOException, ParseException {
+ var contentType = message.getHeader().getContentTypeObject();
+ if (contentType == null) {
+ throw new IOException("No Content-Type header");
+ }
+ String boundary = contentType.getParameter("boundary");
+ if (boundary == null) {
+ throw new IOException("No boundary parameter in Content-Type: " + contentType);
+ }
+ return split(message, boundary);
+ }
+
+ /**
+ * Splits a multipart message into its individual parts using an explicit boundary.
+ *
+ * @param message a request or response with a multipart body
+ * @param boundary the MIME boundary string (without leading {@code --})
+ * @return parts in wire order; never null, may be empty
+ * @throws IOException on I/O or unsupported Content-Transfer-Encoding
+ */
+ @SuppressWarnings("deprecation")
+ public static List split(Message message, String boundary) throws IOException {
+ List result = new ArrayList<>();
+
+ MultipartStream ms = new MultipartStream(MessageUtil.getContentAsStream(message), boundary.getBytes(UTF_8));
+ boolean hasNext = ms.skipPreamble();
+ while (hasNext) {
+ Header partHeader = new Header(ms.readHeaders());
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ms.readBodyData(baos);
+
+ // Only binary-safe encodings are supported; base64/QP would corrupt binary parts
+ String cte = partHeader.getFirstValue("Content-Transfer-Encoding");
+ if (cte != null && !cte.equalsIgnoreCase("binary")
+ && !cte.equalsIgnoreCase("8bit")
+ && !cte.equalsIgnoreCase("7bit")) {
+ throw new IOException("Content-Transfer-Encoding '" + cte + "' is not supported.");
+ }
+
+ result.add(new Part(partHeader, baos.toByteArray()));
+ hasNext = ms.readBoundary();
+ }
+ return result;
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/Part.java b/core/src/main/java/com/predic8/membrane/core/multipart/Part.java
index 6d3e58c027..5415954eec 100644
--- a/core/src/main/java/com/predic8/membrane/core/multipart/Part.java
+++ b/core/src/main/java/com/predic8/membrane/core/multipart/Part.java
@@ -14,141 +14,120 @@
package com.predic8.membrane.core.multipart;
-import com.predic8.membrane.core.http.*;
+import com.predic8.membrane.core.http.Header;
-import javax.xml.namespace.*;
-import javax.xml.stream.*;
-import javax.xml.stream.events.*;
-import java.io.*;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
-import static java.nio.charset.StandardCharsets.*;
-import static org.apache.commons.codec.binary.Base64.*;
+import static java.nio.charset.StandardCharsets.UTF_8;
+/**
+ * A single part of a multipart HTTP message, consisting of a header block and a body.
+ *
+ * @see MultipartUtil#split(com.predic8.membrane.core.http.Message)
+ */
public class Part {
- private final Header header;
- private final byte[] data;
-
- public Part(Header header, byte[] data) {
- this.header = header;
- this.data = data;
- }
-
- public String getContentID() {
- return header.getFirstValue("Content-ID");
- }
-
- public Header getHeader() {
- return header;
- }
-
- public InputStream getInputStream() {
- return new ByteArrayInputStream(data);
- }
-
- public XMLEvent asXMLEvent() {
- return new Characters() {
-
- @Override
- public void writeAsEncodedUnicode(Writer writer) {
- throw new RuntimeException("not implemented");
- }
-
- @Override
- public boolean isStartElement() {
- return false;
- }
-
- @Override
- public boolean isStartDocument() {
- return false;
- }
-
- @Override
- public boolean isProcessingInstruction() {
- return false;
- }
-
- @Override
- public boolean isNamespace() {
- return false;
- }
-
- @Override
- public boolean isEntityReference() {
- return false;
- }
-
- @Override
- public boolean isEndElement() {
- return false;
- }
-
- @Override
- public boolean isEndDocument() {
- return false;
- }
-
- @Override
- public boolean isCharacters() {
- return true;
- }
-
- @Override
- public boolean isAttribute() {
- return false;
- }
-
- @Override
- public QName getSchemaType() {
- return null;
- }
-
- @Override
- public Location getLocation() {
- return null;
- }
-
- @Override
- public int getEventType() {
- return CHARACTERS;
- }
-
- @Override
- public StartElement asStartElement() {
- return null;
- }
-
- @Override
- public EndElement asEndElement() {
- return null;
- }
-
- @Override
- public Characters asCharacters() {
- return this;
- }
-
- @Override
- public String getData() {
- return new String(encodeBase64(data), UTF_8);
- }
-
- @Override
- public boolean isWhiteSpace() {
- return false;
- }
-
- @Override
- public boolean isCData() {
- return false;
- }
-
- @Override
- public boolean isIgnorableWhiteSpace() {
- return false;
- }
- };
- }
-
+ private static final Pattern NAME_PATTERN =
+ Pattern.compile("(?i)\\bname=\"([^\"]+)\"");
+ private static final Pattern FILENAME_PATTERN =
+ Pattern.compile("(?i)\\bfilename=\"([^\"]+)\"");
+
+ private final Header header;
+ private final byte[] body;
+
+ public Part(Header header, byte[] body) {
+ this.header = header;
+ this.body = body;
+ }
+
+ // -------------------------------------------------------------------------
+ // Header accessors
+ // -------------------------------------------------------------------------
+
+ /**
+ * Returns the part's own header block (may contain Content-Type, Content-ID, etc.).
+ */
+ public Header getHeader() {
+ return header;
+ }
+
+ /**
+ * Returns the {@code Content-ID} header value, or {@code null} if absent.
+ * Used in MIME multipart/related messages (e.g. SOAP XOP).
+ */
+ public String getContentID() {
+ return header.getFirstValue("Content-ID");
+ }
+
+ /**
+ * Returns the {@code Content-Type} of this part (e.g. {@code "image/png"}),
+ * or {@code null} if no Content-Type header is present.
+ */
+ public String getContentType() {
+ return header.getContentType();
+ }
+
+ /**
+ * Returns the {@code name} parameter from the {@code Content-Disposition} header.
+ * This is the form field name in {@code multipart/form-data} submissions.
+ * Returns {@code null} if not present.
+ */
+ public String getName() {
+ return extractDispositionParam(NAME_PATTERN);
+ }
+
+ /**
+ * Returns the {@code filename} parameter from the {@code Content-Disposition} header,
+ * or {@code null} if not present.
+ */
+ public String getFilename() {
+ return extractDispositionParam(FILENAME_PATTERN);
+ }
+
+ // -------------------------------------------------------------------------
+ // Body accessors
+ // -------------------------------------------------------------------------
+
+ /**
+ * Returns the raw body bytes of this part.
+ */
+ public byte[] getBody() {
+ return body;
+ }
+
+ /**
+ * Returns the body decoded as a UTF-8 string.
+ */
+ public String getBodyAsString() {
+ return getBodyAsString(UTF_8);
+ }
+
+ /**
+ * Returns the body decoded using the given charset.
+ */
+ public String getBodyAsString(Charset charset) {
+ return new String(body, charset);
+ }
+
+ /**
+ * Returns a fresh {@link InputStream} over the body bytes.
+ */
+ public InputStream getInputStream() {
+ return new ByteArrayInputStream(body);
+ }
+
+ // -------------------------------------------------------------------------
+ // Internal helpers
+ // -------------------------------------------------------------------------
+
+ private String extractDispositionParam(Pattern pattern) {
+ String disposition = header.getFirstValue("Content-Disposition");
+ if (disposition == null) return null;
+ Matcher m = pattern.matcher(disposition);
+ return m.find() ? m.group(1) : null;
+ }
}
diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
index e9aa70e6e2..1a5e14ee8e 100644
--- a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
+++ b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
@@ -14,20 +14,31 @@
package com.predic8.membrane.core.multipart;
-import com.predic8.membrane.core.http.*;
-import com.predic8.membrane.core.util.*;
-import jakarta.mail.internet.*;
-import org.apache.commons.fileupload.*;
-import org.slf4j.*;
-
-import javax.annotation.concurrent.*;
-import javax.xml.namespace.*;
+import com.predic8.membrane.core.http.BodyCollectingMessageObserver;
+import com.predic8.membrane.core.http.Header;
+import com.predic8.membrane.core.http.Message;
+import com.predic8.membrane.core.util.EndOfStreamException;
+import com.predic8.membrane.core.util.MessageUtil;
+import jakarta.mail.internet.ContentType;
+import jakarta.mail.internet.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.concurrent.ThreadSafe;
+import javax.xml.namespace.QName;
import javax.xml.stream.*;
-import javax.xml.stream.events.*;
-import java.io.*;
-import java.util.*;
-
-import static java.nio.charset.StandardCharsets.*;
+import javax.xml.stream.events.Characters;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Writer;
+import java.util.HashMap;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.commons.codec.binary.Base64.encodeBase64;
/**
* Reassemble a multipart XOP message (see
@@ -90,7 +101,7 @@ public Message getReconstitutedMessage(Message message) throws ParseException, I
if (boundary == null)
return null;
- HashMap parts = split(message, boundary);
+ HashMap parts = splitById(message, boundary);
Part startPart = parts.get(start);
if (startPart == null)
return null;
@@ -132,36 +143,16 @@ public boolean shouldNotContainBody() {
return m;
}
- @SuppressWarnings("deprecation")
- private HashMap split(Message message, String boundary)
- throws IOException, EndOfStreamException {
- HashMap parts = new HashMap<>();
-
- MultipartStream multipartStream = new MultipartStream(MessageUtil.getContentAsStream(message), boundary.getBytes(UTF_8));
- boolean nextPart = multipartStream.skipPreamble();
- while(nextPart) {
- Header header = new Header(multipartStream.readHeaders());
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- multipartStream.readBodyData(baos);
-
- // see http://www.iana.org/assignments/transfer-encodings/transfer-encodings.xml
- String cte = header.getFirstValue("Content-Transfer-Encoding");
- if (cte != null &&
- !cte.equals("binary") &&
- !cte.equals("8bit") &&
- !cte.equals("7bit"))
- throw new RuntimeException("Content-Transfer-Encoding '" + cte + "' not implemented.");
-
-
- Part part = new Part(header, baos.toByteArray());
+ /** Splits the multipart message and indexes parts by Content-ID for XOP lookup. */
+ private HashMap splitById(Message message, String boundary) throws IOException {
+ HashMap byId = new HashMap<>();
+ for (Part part : MultipartUtil.split(message, boundary)) {
String id = part.getContentID();
if (id != null) {
- parts.put(id, part);
+ byId.put(id, part);
}
-
- nextPart = multipartStream.readBoundary();
}
- return parts;
+ return byId;
}
private byte[] fillInXOPParts(InputStream inputStream,
@@ -189,7 +180,7 @@ private byte[] fillInXOPParts(InputStream inputStream,
if (p == null)
throw new RuntimeException("Did not find multipart with id " + href);
- writer.add(p.asXMLEvent());
+ writer.add(base64CharactersEvent(p.getBody()));
xopIncludeOpen = true;
continue;
}
@@ -212,4 +203,33 @@ private byte[] fillInXOPParts(InputStream inputStream,
return baos.toByteArray();
}
+ /** Wraps raw bytes as a base64-encoded XML Characters event for XOP inlining. */
+ private static Characters base64CharactersEvent(byte[] data) {
+ String encoded = new String(encodeBase64(data), UTF_8);
+ return new Characters() {
+ @Override public String getData() { return encoded; }
+ @Override public boolean isCharacters() { return true; }
+ @Override public boolean isWhiteSpace() { return false; }
+ @Override public boolean isCData() { return false; }
+ @Override public boolean isIgnorableWhiteSpace() { return false; }
+ @Override public int getEventType() { return CHARACTERS; }
+ @Override public Characters asCharacters() { return this; }
+ @Override public boolean isStartElement() { return false; }
+ @Override public boolean isEndElement() { return false; }
+ @Override public boolean isStartDocument() { return false; }
+ @Override public boolean isEndDocument() { return false; }
+ @Override public boolean isAttribute() { return false; }
+ @Override public boolean isNamespace() { return false; }
+ @Override public boolean isEntityReference() { return false; }
+ @Override public boolean isProcessingInstruction() { return false; }
+ @Override public QName getSchemaType() { return null; }
+ @Override public Location getLocation() { return null; }
+ @Override public StartElement asStartElement() { return null; }
+ @Override public EndElement asEndElement() { return null; }
+ @Override public void writeAsEncodedUnicode(Writer writer) {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
}
diff --git a/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java b/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java
index 2597281778..add23dfc42 100644
--- a/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java
+++ b/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java
@@ -15,15 +15,21 @@
package com.predic8.membrane.core.http;
import jakarta.activation.MimeType;
-import org.junit.jupiter.api.*;
-import org.junit.jupiter.params.*;
-import org.junit.jupiter.params.provider.*;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
-import java.util.*;
+import java.util.HashSet;
+import java.util.List;
import static com.predic8.membrane.core.http.Header.*;
-import static com.predic8.membrane.core.http.MimeType.*;
-import static java.nio.charset.StandardCharsets.*;
+import static com.predic8.membrane.core.http.MimeType.TEXT_XML;
+import static com.predic8.membrane.core.http.MimeType.isBinary;
+import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.*;
class HeaderTest {
@@ -262,4 +268,47 @@ void unique() {
assertEquals("1, 2", h.getValuesAsString("X-Foo"));
assertEquals("3, 4", h.getValuesAsString("X-BAR"));
}
+
+ @Nested
+ class IsMultipart {
+ @Test
+ void formDataIsMultipart() {
+ var h = new Header();
+ h.add("Content-Type", "multipart/form-data; boundary=abc");
+ assertTrue(h.isMultipart());
+ }
+
+ @Test
+ void relatedIsMultipart() {
+ var h = new Header();
+ h.add("Content-Type", "multipart/related; boundary=abc");
+ assertTrue(h.isMultipart());
+ }
+
+ @Test
+ void mixedIsMultipart() {
+ var h = new Header();
+ h.add("Content-Type", "multipart/mixed; boundary=abc");
+ assertTrue(h.isMultipart());
+ }
+
+ @Test
+ void isCaseInsensitive() {
+ var h = new Header();
+ h.add("Content-Type", "Multipart/Form-Data; boundary=abc");
+ assertTrue(h.isMultipart());
+ }
+
+ @Test
+ void jsonIsNotMultipart() {
+ var h = new Header();
+ h.add("Content-Type", "application/json");
+ assertFalse(h.isMultipart());
+ }
+
+ @Test
+ void missingContentTypeIsNotMultipart() {
+ assertFalse(new Header().isMultipart());
+ }
+ }
}
diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java
similarity index 79%
rename from core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
rename to core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java
index af841ffed8..560d5edf58 100644
--- a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequestTest.java
+++ b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java
@@ -4,12 +4,14 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
+import java.io.IOException;
import java.net.URISyntaxException;
+import java.util.List;
import static com.predic8.membrane.core.http.Request.post;
import static org.junit.jupiter.api.Assertions.assertEquals;
-class AbstractLLMRequestTest {
+class AbstractModelInputRequestTest {
@ParameterizedTest
@ValueSource(strings = {
@@ -18,7 +20,7 @@ class AbstractLLMRequestTest {
"BEARER test-api-key",
"bEaReR test-api-key"
})
- void getApiKeyAcceptsBearerCaseInsensitive(String authorization) throws URISyntaxException {
+ void getApiKeyAcceptsBearerCaseInsensitive(String authorization) throws URISyntaxException, IOException {
var request = new TestLLMRequest(post("http://localhost/chat/completions")
.header("Authorization", authorization)
.json("{}")
@@ -27,9 +29,9 @@ void getApiKeyAcceptsBearerCaseInsensitive(String authorization) throws URISynta
assertEquals("test-api-key", request.getApiKey());
}
- private static class TestLLMRequest extends AbstractLLMRequest {
+ private static class TestLLMRequest extends AbstractModelInputRequest implements ModelInputRequest {
- TestLLMRequest(Exchange exchange) {
+ TestLLMRequest(Exchange exchange) throws IOException {
super(exchange);
}
@@ -53,12 +55,7 @@ public String getSystemPrompt() {
}
@Override
- public boolean isChatCompletion() {
- return false;
- }
-
- @Override
- public void setSystemPrompt(String systemPrompt) {
+ public void setSystemPrompts(List prompts) {
}
diff --git a/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
new file mode 100644
index 0000000000..950896246b
--- /dev/null
+++ b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
@@ -0,0 +1,229 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.multipart;
+
+import com.predic8.membrane.core.http.Response;
+import jakarta.mail.internet.ParseException;
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.jupiter.api.Assertions.*;
+
+class MultipartUtilTest {
+
+ private static final String BOUNDARY = "test-boundary-123";
+ private static final String CRLF = "\r\n";
+
+ // -------------------------------------------------------------------------
+ // Helpers
+ // -------------------------------------------------------------------------
+
+ /** Builds a Response with the given multipart body and boundary. */
+ private static Response response(String body) {
+ return response(body, BOUNDARY);
+ }
+
+ private static Response response(String body, String boundary) {
+ byte[] bytes = body.getBytes(UTF_8);
+ return Response.ok()
+ .header("Content-Type", "multipart/form-data; boundary=\"" + boundary + "\"")
+ .header("Content-Length", String.valueOf(bytes.length))
+ .body(bytes)
+ .build();
+ }
+
+ /**
+ * Builds a minimal multipart body.
+ * Each {@code part} string should contain headers + blank line + body,
+ * e.g. {@code "Content-Disposition: form-data; name=\"x\"\r\n\r\nvalue"}.
+ */
+ private static String multipartBody(String... parts) {
+ var sb = new StringBuilder();
+ for (String part : parts) {
+ sb.append("--").append(BOUNDARY).append(CRLF);
+ sb.append(part).append(CRLF);
+ }
+ sb.append("--").append(BOUNDARY).append("--").append(CRLF);
+ return sb.toString();
+ }
+
+ private static String formField(String name, String value) {
+ return "Content-Disposition: form-data; name=\"" + name + "\"" + CRLF + CRLF + value;
+ }
+
+ // -------------------------------------------------------------------------
+ // split(Message) — auto-reads boundary from Content-Type
+ // -------------------------------------------------------------------------
+
+ @Test
+ void twoFormFieldsAreReturnedInOrder() throws IOException, ParseException {
+ var parts = MultipartUtil.split(response(multipartBody(
+ formField("username", "alice"),
+ formField("message", "Hello World")
+ )));
+
+ assertEquals(2, parts.size());
+ assertEquals("username", parts.get(0).getName());
+ assertEquals("alice", parts.get(0).getBodyAsString());
+ assertEquals("message", parts.get(1).getName());
+ assertEquals("Hello World", parts.get(1).getBodyAsString());
+ }
+
+ @Test
+ void fileUploadPartExposesFilenameAndContentType() throws IOException, ParseException {
+ String part = "Content-Disposition: form-data; name=\"upload\"; filename=\"photo.jpg\"" + CRLF
+ + "Content-Type: image/jpeg" + CRLF
+ + CRLF
+ + "JFIF";
+
+ var parts = MultipartUtil.split(response(multipartBody(part)));
+
+ assertEquals(1, parts.size());
+ assertEquals("upload", parts.get(0).getName());
+ assertEquals("photo.jpg", parts.get(0).getFilename());
+ assertEquals("image/jpeg", parts.get(0).getContentType());
+ assertArrayEquals("JFIF".getBytes(UTF_8), parts.get(0).getBody());
+ }
+
+ @Test
+ void partWithContentIdIsAccessible() throws IOException, ParseException {
+ String part = "Content-Type: application/octet-stream" + CRLF
+ + "Content-ID: " + CRLF
+ + CRLF
+ + "binary";
+
+ var parts = MultipartUtil.split(response(multipartBody(part)));
+
+ assertEquals("", parts.get(0).getContentID());
+ }
+
+ @Test
+ void binaryBodyIsPreservedExactly() throws IOException, ParseException {
+ byte[] payload = {0, 1, 2, (byte) 0xFF, (byte) 0xFE};
+ String header = "Content-Type: application/octet-stream" + CRLF + CRLF;
+ byte[] partBytes = (header).getBytes(UTF_8);
+ byte[] fullPart = new byte[partBytes.length + payload.length];
+ System.arraycopy(partBytes, 0, fullPart, 0, partBytes.length);
+ System.arraycopy(payload, 0, fullPart, partBytes.length, payload.length);
+
+ // Build body manually to embed raw bytes
+ byte[] prefix = ("--" + BOUNDARY + CRLF).getBytes(UTF_8);
+ byte[] suffix = (CRLF + "--" + BOUNDARY + "--" + CRLF).getBytes(UTF_8);
+ byte[] body = new byte[prefix.length + fullPart.length + suffix.length];
+ System.arraycopy(prefix, 0, body, 0, prefix.length);
+ System.arraycopy(fullPart, 0, body, prefix.length, fullPart.length);
+ System.arraycopy(suffix, 0, body, prefix.length + fullPart.length, suffix.length);
+
+ byte[] msgBytes = body;
+ var msg = Response.ok()
+ .header("Content-Type", "multipart/form-data; boundary=\"" + BOUNDARY + "\"")
+ .header("Content-Length", String.valueOf(msgBytes.length))
+ .body(msgBytes)
+ .build();
+
+ var parts = MultipartUtil.split(msg);
+ assertArrayEquals(payload, parts.get(0).getBody());
+ }
+
+ // -------------------------------------------------------------------------
+ // split(Message, boundary) — explicit boundary overload
+ // -------------------------------------------------------------------------
+
+ @Test
+ void explicitBoundaryOverloadProducesSameResult() throws IOException {
+ var body = multipartBody(formField("x", "42"));
+ byte[] bytes = body.getBytes(UTF_8);
+ var msg = Response.ok()
+ .header("Content-Type", "multipart/form-data; boundary=\"other\"") // intentionally wrong
+ .header("Content-Length", String.valueOf(bytes.length))
+ .body(bytes)
+ .build();
+
+ // Pass the correct boundary explicitly — Content-Type boundary is ignored
+ var parts = MultipartUtil.split(msg, BOUNDARY);
+
+ assertEquals(1, parts.size());
+ assertEquals("x", parts.get(0).getName());
+ assertEquals("42", parts.get(0).getBodyAsString());
+ }
+
+ // -------------------------------------------------------------------------
+ // Real-world resource: XOP multipart from ReassembleTest
+ // -------------------------------------------------------------------------
+
+ @SuppressWarnings("DataFlowIssue")
+ @Test
+ void xopResourceSplitsIntoTwoParts() throws IOException {
+ byte[] body = IOUtils.toByteArray(getClass().getResourceAsStream("/multipart/embedded-byte-array.txt"));
+ var response = Response.ok()
+ .header("Content-Type", "multipart/related; "
+ + "type=\"application/xop+xml\"; "
+ + "boundary=\"uuid:168683dc-43b3-4e71-8e66-efb633ef406b\"; "
+ + "start=\"\"; "
+ + "start-info=\"text/xml\"")
+ .header("Content-Length", String.valueOf(body.length))
+ .body(body)
+ .build();
+
+ var parts = MultipartUtil.split(response, "uuid:168683dc-43b3-4e71-8e66-efb633ef406b");
+
+ assertEquals(2, parts.size());
+ assertEquals("", parts.get(0).getContentID());
+ assertEquals("", parts.get(1).getContentID());
+ assertEquals("application/xop+xml; charset=UTF-8; type=\"text/xml\";", parts.get(0).getContentType());
+ assertEquals("application/octet-stream", parts.get(1).getContentType());
+ }
+
+ // -------------------------------------------------------------------------
+ // Error cases
+ // -------------------------------------------------------------------------
+
+ @Test
+ void missingContentTypeThrows() {
+ byte[] bytes = "body".getBytes(UTF_8);
+ var msg = Response.ok()
+ .header("Content-Length", String.valueOf(bytes.length))
+ .body(bytes)
+ .build();
+
+ assertThrows(IOException.class, () -> MultipartUtil.split(msg));
+ }
+
+ @Test
+ void missingBoundaryParameterThrows() {
+ byte[] bytes = "body".getBytes(UTF_8);
+ var msg = Response.ok()
+ .header("Content-Type", "multipart/form-data") // no boundary=
+ .header("Content-Length", String.valueOf(bytes.length))
+ .body(bytes)
+ .build();
+
+ assertThrows(IOException.class, () -> MultipartUtil.split(msg));
+ }
+
+ @Test
+ void unsupportedContentTransferEncodingThrows() {
+ String part = "Content-Disposition: form-data; name=\"x\"" + CRLF
+ + "Content-Transfer-Encoding: quoted-printable" + CRLF
+ + CRLF
+ + "value";
+
+ assertThrows(IOException.class,
+ () -> MultipartUtil.split(response(multipartBody(part))));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
index 3cde3fa976..32614a7431 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java
@@ -62,6 +62,7 @@ void simpleRequestIsForwarded() throws IOException {
.when()
.post(LOCALHOST_2000 + "/v1/messages")
.then()
+ .log().ifValidationFails()
.statusCode(200)
.body("type", equalTo("message"))
.body("content[0].type", equalTo("text"));
@@ -106,6 +107,7 @@ void outputTokensAreCappedBeforeForwarding() throws IOException {
.when()
.post(LOCALHOST_2000 + "/v1/messages")
.then()
+ .log().everything()
.statusCode(200);
// @formatter:on
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
index 88a6d380ad..e1821bc28c 100644
--- a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
@@ -123,6 +123,7 @@ void wrongModelIsRejected() throws IOException {
.when()
.post(LOCALHOST_2000 + "/v1/responses")
.then()
+ .log().ifValidationFails()
.statusCode(400)
.body("error.type", equalTo("invalid_request_error"))
.body("error.code", equalTo("model_not_allowed"))
From a7caaa4a1043eaa451f7009835da28c6d573be48 Mon Sep 17 00:00:00 2001
From: thomas
Date: Fri, 29 May 2026 15:09:49 +0200
Subject: [PATCH 41/43] feat: add `AbstractModelInputRequest` and enhance
OpenAI provider request handling
- Introduced `AbstractModelInputRequest` to support model input parsing for JSON and multipart requests.
- Added `ChatCompletionsRequest` support in `OpenAIProvider` to handle `/v1/chat/completions` URI.
- Simplified `DefaultPolicies` by consolidating `checkModel` call.
- Minor cleanup in `MultipartUtilTest` and improved method signature in `XOPReconstitutor`.
---
.../llmgateway/DefaultPolicies.java | 6 +-
.../provider/AbstractModelInputRequest.java | 146 ++++++++++++++++++
.../provider/openai/OpenAIProvider.java | 3 +
.../core/multipart/XOPReconstitutor.java | 2 +-
.../core/multipart/MultipartUtilTest.java | 1 -
5 files changed, 151 insertions(+), 7 deletions(-)
create mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
index 7c6f9612c4..1219fb66b3 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
@@ -58,11 +58,7 @@ public Outcome handleRequest(ModelInputRequest mir, Exchange exc) {
if (outcome != CONTINUE) {
return outcome;
}
- outcome = checkModel(mir, exc);
- if (outcome != CONTINUE) {
- return outcome;
- }
- return CONTINUE;
+ return checkModel(mir, exc);
}
public Outcome checkModel(ModelInputRequest mir, Exchange exc) {
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
new file mode 100644
index 0000000000..2fb780f214
--- /dev/null
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
@@ -0,0 +1,146 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.core.interceptor.llmgateway.provider;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.predic8.membrane.core.exchange.Exchange;
+import com.predic8.membrane.core.http.AbstractBody;
+import com.predic8.membrane.core.http.Body;
+import com.predic8.membrane.core.multipart.MultipartUtil;
+import com.predic8.membrane.core.util.json.JsonUtil;
+import jakarta.mail.internet.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+public class AbstractModelInputRequest extends AbstractLLMRequest implements ModelInputRequest {
+
+ private static final Logger log = LoggerFactory.getLogger(AbstractModelInputRequest.class);
+
+ private static final ObjectMapper om = new ObjectMapper();
+
+ protected ObjectNode json;
+
+ private String model;
+
+ private AbstractBody body;
+
+ public AbstractModelInputRequest(Exchange exchange) throws IOException {
+ super(exchange);
+
+ if (exchange.getRequest().getHeader().isMultipart()) {
+ try {
+ for (var part : MultipartUtil.split(exchange.getRequest())) {
+ log.info("Part: name={} type={} size={}", part.getName(), part.getContentType(), part.getBody().length);
+ if ("model".equals(part.getName())) {
+ log.info("Model: {}", part.getBodyAsString());
+ model = part.getBodyAsString();
+ }
+ }
+ body = exchange.getRequest().getBody();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ return;
+ }
+
+ if (exchange.getRequest().isJSON()) {
+ json = JsonUtil.getJsonObject(exchange.getRequest()).orElseThrow(() -> new RuntimeException("Cannot parse input as JSON message."));
+ }
+
+ if (json != null) {
+ if (json.has("model")) {
+ model = json.path("model").asText();
+ }
+ }
+ }
+
+ public List getTools() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public String getSystemPrompt() {
+ return "";
+ }
+
+ @Override
+ public void setSystemPrompts(List prompts) {
+ log.warn("Not supported.");
+ }
+
+ @Override
+ public void removeSystemPrompt() {
+ log.warn("Not supported.");
+ }
+
+ protected ArrayNode getToolsNode() {
+ if (json == null)
+ return null;
+ if (json.path("tools").isArray())
+ return (ArrayNode) json.path("tools");
+ return null;
+ }
+
+
+ @Override
+ public ObjectNode getJson() {
+ return json;
+ }
+
+ @Override
+ public String getModel() {
+ return model;
+ }
+
+ @Override
+ public long getRequestedMaxOutputTokens() {
+ return 0;
+ }
+
+ @Override
+ public void setMaxOutputTokens(int maxOutputTokens) {
+ log.warn("Not supported.");
+ }
+
+ @Override
+ public long estimateInputTokens() {
+ return 0;
+ }
+
+ @Override
+ public AbstractBody getBody() {
+ if (body != null)
+ return body;
+ try {
+ return new Body(om
+ .writerWithDefaultPrettyPrinter()
+ .writeValueAsString(json).getBytes(UTF_8));
+ } catch (JsonProcessingException e) {
+ log.info("Could not serialize JSON: {}", e.getMessage());
+ throw new RuntimeException("Could not serialize JSON: " + e.getMessage());
+ }
+ }
+}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
index 9798483ed5..d24e5154c1 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java
@@ -36,6 +36,9 @@ public class OpenAIProvider extends AbstractLLMProvider {
@Override
public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
var uri = exchange.getRequest().getUri();
+ if (uri.startsWith("/v1/chat/completions")) {
+ return new OpenAIChatCompletionsRequest(exchange);
+ }
if (uri.startsWith("/v1/responses")) {
return new OpenAiLLMResponsesRequest(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
index 1a5e14ee8e..c2253e333a 100644
--- a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
+++ b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java
@@ -59,7 +59,7 @@ public XOPReconstitutor() {
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
}
- public InputStream reconstituteIfNecessary(Message message) throws IOException {
+ public InputStream reconstituteIfNecessary(Message message) {
try {
Message reconstitutedMessage = getReconstitutedMessage(message);
if (reconstitutedMessage != null)
diff --git a/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
index 950896246b..86c8fbf1e3 100644
--- a/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
+++ b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java
@@ -166,7 +166,6 @@ void explicitBoundaryOverloadProducesSameResult() throws IOException {
// Real-world resource: XOP multipart from ReassembleTest
// -------------------------------------------------------------------------
- @SuppressWarnings("DataFlowIssue")
@Test
void xopResourceSplitsIntoTwoParts() throws IOException {
byte[] body = IOUtils.toByteArray(getClass().getResourceAsStream("/multipart/embedded-byte-array.txt"));
From 96bc067efb2bc3c622574ea26c31df36450a37df Mon Sep 17 00:00:00 2001
From: thomas
Date: Fri, 5 Jun 2026 08:07:42 +0200
Subject: [PATCH 42/43] refactor: remove specialized request classes and
simplify request handling with `BaseLLMRequest`
---
.../provider/AbstractLLMProvider.java | 19 +------------------
.../provider/AbstractModelInputRequest.java | 2 +-
...actLLMRequest.java => BaseLLMRequest.java} | 4 ++--
.../provider/openai/AudioRequest.java | 12 ------------
.../provider/openai/FilesRequest.java | 12 ------------
.../provider/openai/ImagesRequest.java | 11 -----------
6 files changed, 4 insertions(+), 56 deletions(-)
rename core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/{AbstractLLMRequest.java => BaseLLMRequest.java} (87%)
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
delete mode 100644 core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
index 6273c2c5db..9b833dea56 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java
@@ -1,12 +1,7 @@
package com.predic8.membrane.core.interceptor.llmgateway.provider;
import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.http.ReadingBodyException;
import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AudioRequest;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.FilesRequest;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.ImagesRequest;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.OrganizationRequest;
import java.io.IOException;
@@ -18,18 +13,6 @@ public LLMRequest getLLMRequest(Exchange exchange) throws IOException {
if (uri.startsWith("/v1/chat/completions")) {
return new ChatCompletionsRequest(exchange);
}
- if (uri.startsWith("/v1/files")) {
- return new FilesRequest(exchange);
- }
- if (uri.contains("/v1/images")) {
- return new ImagesRequest(exchange);
- }
- if (uri.contains("/v1/audio")) {
- return new AudioRequest(exchange);
- }
- if (uri.contains("/v1/organization")) {
- return new OrganizationRequest(exchange);
- }
- throw new ReadingBodyException("Unknown request: " + uri);
+ return new BaseLLMRequest(exchange);
}
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
index 2fb780f214..8551dcbc4a 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
@@ -33,7 +33,7 @@
import static java.nio.charset.StandardCharsets.UTF_8;
-public class AbstractModelInputRequest extends AbstractLLMRequest implements ModelInputRequest {
+public class AbstractModelInputRequest extends BaseLLMRequest implements ModelInputRequest {
private static final Logger log = LoggerFactory.getLogger(AbstractModelInputRequest.class);
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java
similarity index 87%
rename from core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
rename to core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java
index 4c7c8fded3..40b317de09 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java
@@ -4,11 +4,11 @@
import static com.predic8.membrane.core.http.Header.AUTHORIZATION;
-public class AbstractLLMRequest extends AbstractLLMMessage implements LLMRequest {
+public class BaseLLMRequest extends AbstractLLMMessage implements LLMRequest {
public static final String BEARER_PREFIX = "Bearer";
- protected AbstractLLMRequest(Exchange exchange) {
+ protected BaseLLMRequest(Exchange exchange) {
super(exchange);
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
deleted file mode 100644
index df028457a3..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AudioRequest.java
+++ /dev/null
@@ -1,12 +0,0 @@
-package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
-
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest;
-
-import java.io.IOException;
-
-public class AudioRequest extends AbstractModelInputRequest {
- public AudioRequest(Exchange exchange) throws IOException {
- super(exchange);
- }
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
deleted file mode 100644
index cc56d7b492..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/FilesRequest.java
+++ /dev/null
@@ -1,12 +0,0 @@
-package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
-
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
-
-public class FilesRequest extends AbstractLLMRequest {
-
- public FilesRequest(Exchange exchange) {
- super(exchange);
- }
-
-}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
deleted file mode 100644
index 74ac706d04..0000000000
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/ImagesRequest.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.predic8.membrane.core.interceptor.llmgateway.provider.openai;
-
-import com.predic8.membrane.core.exchange.Exchange;
-import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest;
-
-public class ImagesRequest extends AbstractLLMRequest {
-
- public ImagesRequest(Exchange exchange) {
- super(exchange);
- }
-}
From 398782a1551342450e99a37da7b08e039a244115 Mon Sep 17 00:00:00 2001
From: thomas
Date: Tue, 9 Jun 2026 16:50:54 +0200
Subject: [PATCH 43/43] fix: correct typos, improve exception handling, and
update default token logic
- Fixed typos in YAML tutorial and Javadoc comments (e.g., "Antropic" to "Anthropic").
- Replaced `IllegalArgumentException` with `ConfigurationException` for token validation in `DefaultPolicies`.
- Updated `getRequestedMaxOutputTokens` to return `-1` instead of `0` for default behavior.
- Added exception for unknown event format in `AbstractLLMEvent`.
---
.../core/interceptor/llmgateway/AbstractLLMEvent.java | 1 +
.../membrane/core/interceptor/llmgateway/DefaultPolicies.java | 4 ++--
.../llmgateway/provider/AbstractModelInputRequest.java | 2 +-
.../llmgateway/provider/claude/ClaudeProvider.java | 2 +-
.../tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml | 2 +-
5 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
index ed9fe0929c..c873d946cf 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java
@@ -48,6 +48,7 @@ public static AbstractLLMEvent create(SSEParser.SSEEvent sse) {
var opt = JsonUtil.getJsonObject(sse.data());
if (opt.isEmpty()) {
log.info("Unknown event format: {}", sse.data());
+ throw new RuntimeException("Unknown event format: " + sse.data());
}
var json = opt.get();
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
index 1219fb66b3..bedbe1627c 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java
@@ -101,7 +101,7 @@ public List getModels() {
/**
* @param models List of models that can be used by the gateway.
- * @desciption Restricts the models that can be used by the gateway.
+ * @description Restricts the models that can be used by the gateway.
* @default null (no restriction)
*/
@MCAttribute
@@ -123,7 +123,7 @@ public int getMaxOutputTokens() {
@MCAttribute
public void setMaxOutputTokens(int maxOutputTokens) {
if (maxOutputTokens < 0) {
- throw new IllegalArgumentException("maxOutputTokens must be >= 0");
+ throw new ConfigurationException("maxOutputTokens must be >= 0");
}
this.maxOutputTokens = maxOutputTokens;
}
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
index 8551dcbc4a..0ba3b4c560 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java
@@ -117,7 +117,7 @@ public String getModel() {
@Override
public long getRequestedMaxOutputTokens() {
- return 0;
+ return -1;
}
@Override
diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
index decc7048b2..e07822b119 100644
--- a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
+++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java
@@ -25,7 +25,7 @@
import java.util.function.Consumer;
/**
- * @description (Experimental) Anthroic Claude provider configuration
+ * @description (Experimental) Anthropic Claude provider configuration
* Use to configure a LLM gateway to use the anthropic API
*/
@MCElement( name="claude")
diff --git a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
index ddaaaedcf1..ff12b0f9c8 100644
--- a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
+++ b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml
@@ -1,6 +1,6 @@
# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json
#
-# Tutorial: Basic LLM Gateway (Antropic Claude)
+# Tutorial: Basic LLM Gateway (Anthropic Claude)
#
# Replace <> with your Claude API key.
#