diff --git a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java
index a018d1b509..8ca91e4ebb 100644
--- a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java
+++ b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java
@@ -33,11 +33,7 @@
import java.util.List;
import java.util.Objects;
-import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findRequiredSetters;
-import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findSingleSetterOrNullForAnnotation;
-import static com.predic8.membrane.annot.yaml.McYamlIntrospector.getSingleChildSetter;
-import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isCollapsed;
-import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isNoEnvelope;
+import static com.predic8.membrane.annot.yaml.McYamlIntrospector.*;
import static com.predic8.membrane.annot.yaml.NodeValidationUtils.ensureMappingStart;
public final class ObjectBinder {
@@ -49,7 +45,8 @@ public final class ObjectBinder {
public static
+ * API Gateway for Large Language Models (LLMs).
+ *
+ *
+ *
Chat Completions API wire format: + *
{@code
+ * { "messages": [
+ * {"role": "system", "content": "prompt 1"},
+ * {"role": "system", "content": "prompt 2"},
+ * ...user messages...
+ * ]}
+ * }
+ */
+ @Override
+ public void setSystemPrompts(ListClaude API wire format: + *
{@code { "system": "prompt 1\nprompt 2", "messages": [...] }}
+ */
+ @Override
+ public void setSystemPrompts(ListGemini API wire format: + *
{@code
+ * { "systemInstruction": { "parts": [{ "text": "You are a helpful assistant." }] } }
+ * }
+ */
+ @Override
+ public String getSystemPrompt() {
+ for (var part : json.path("systemInstruction").path("parts")) {
+ if (part.path("text").isTextual()) {
+ return part.path("text").asText("");
+ }
+ }
+ return "";
+ }
+
+ /**
+ * Concatenates all prompts (newline-separated) into a single text part under
+ * {@code systemInstruction}. Replaces any existing system instruction.
+ *
+ * Gemini API wire format: + *
{@code { "systemInstruction": { "parts": [{ "text": "prompt 1\nprompt 2" }] } }}
+ */
+ @Override
+ public void setSystemPrompts(ListOpenAI Responses API wire format: + *
{@code { "instructions": "prompt 1\nprompt 2", "input": "..." }}
+ */
+ @Override
+ public void setSystemPrompts(ListExample: + *
{@code
+ * List parts = MultipartUtil.split(exchange.getRequest());
+ * for (Part part : parts) {
+ * String name = part.getName(); // form field name
+ * String type = part.getContentType(); // e.g. "image/png"
+ * byte[] body = part.getBody();
+ * }
+ * }
+ */
+public class MultipartUtil {
+
+ /**
+ * Splits a multipart message into its individual parts.
+ * The MIME boundary is read from the message's {@code Content-Type} header.
+ *
+ * @param message a request or response whose Content-Type is multipart/*
+ * @return parts in wire order; never null, may be empty
+ * @throws IOException on I/O or parse errors
+ * @throws ParseException if the Content-Type header cannot be parsed
+ */
+ public static ListThe tutorial YAML's {@code target.url} is rewritten to point at the mock server + * before Membrane starts. Subclasses override {@link #getTutorialDir()} and + * {@link #getTutorialYaml()} to select the tutorial under test. + * + *
JUnit 5 lifecycle ordering guarantees that {@code DistributionExtractingTestcase.init()}
+ * (superclass {@code @BeforeEach}) runs first and sets {@code baseDir}, allowing
+ * {@link #startGateway()} to use {@code replaceInFile2()} safely.
+ */
+public abstract class AbstractAiTutorialTest extends DistributionExtractingTestcase {
+
+ protected static final int MOCK_LLM_PORT = 3100;
+
+ /**
+ * Value substituted for the {@code < The tutorial configures a Claude LLM gateway with:
+ * The upstream Anthropic API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicClaudeLLMGatewayTutorialTest extends AbstractAiTutorialTest {
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/claude";
+ }
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .log().ifValidationFails()
+ .statusCode(200)
+ .body("type", equalTo("message"))
+ .body("content[0].type", equalTo("text"));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the Claude error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("tokens"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code max_tokens} to 200 before forwarding to the upstream.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "test-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .log().everything()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("max_tokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
new file mode 100644
index 0000000000..3514870774
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java
@@ -0,0 +1,223 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.claude;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml}.
+ *
+ * The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ * Budget accounting per request:
+ * Overrides the upstream URL and the API-key header so the mock captures
+ * the {@code x-goog-api-key} header that Google uses. The mock response is
+ * formatted as a Gemini {@code generateContent} reply and reports 100 total
+ * tokens (50 prompt + 50 candidates) per call.
+ */
+public abstract class AbstractGoogleTutorialTest extends AbstractAiTutorialTest {
+
+ /** URL prefix used in both Google tutorial YAML files. */
+ @Override
+ protected String getUpstreamApiUrl() {
+ return "https://generativelanguage.googleapis.com";
+ }
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/google";
+ }
+
+ /** Google authenticates via the {@code x-goog-api-key} header. */
+ @Override
+ protected String getApiKeyHeader() {
+ return "x-goog-api-key";
+ }
+
+ /**
+ * Minimal Gemini {@code generateContent} reply with 50 prompt + 50 candidates = 100 total
+ * tokens. The higher per-request cost keeps the token-budget exhaustion test to three
+ * successful requests before alice's 500-token allowance runs out.
+ */
+ @Override
+ protected String mockResponse() {
+ return """
+ {"candidates":[{"content":{"parts":[{"text":"I am a mock."}],"role":"model"},\
+ "finishReason":"STOP"}],\
+ "usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":50,"totalTokenCount":100}}""";
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..16f52d470b
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java
@@ -0,0 +1,109 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Integration test for
+ * {@code distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml}.
+ *
+ * The tutorial configures a Google Gemini LLM gateway with:
+ * The upstream Google Gemini API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicGoogleLLMGatewayTutorialTest extends AbstractGoogleTutorialTest {
+
+ private static final String GEMINI_ENDPOINT =
+ LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the Google error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(400)
+ .body("error.status", equalTo("INVALID_ARGUMENT"))
+ .body("error.message", containsString("exceeds the maximum allowed"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code generationConfig.maxOutputTokens} to 200 before forwarding.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", "test-key")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_ENDPOINT)
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("generationConfig.maxOutputTokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
new file mode 100644
index 0000000000..79b1a71e3e
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java
@@ -0,0 +1,219 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml}.
+ *
+ * The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ * For Google Gemini the model is part of the URL path
+ * ({@code /v1beta/models/ Budget accounting per request:
+ * Overrides the upstream URL and the API-key header so the mock captures
+ * the {@code Authorization} header that OpenAI uses instead of {@code x-api-key}.
+ * The mock response is formatted as an OpenAI Responses-API reply and reports
+ * 100 total tokens (50 input + 50 output) per call.
+ */
+public abstract class AbstractOpenAiTutorialTest extends AbstractAiTutorialTest {
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/openai";
+ }
+
+ @Override
+ protected String getUpstreamApiUrl() {
+ return "https://api.openai.com";
+ }
+
+ /**
+ * OpenAI authenticates via {@code Authorization: Bearer The tutorial configures an OpenAI LLM gateway with:
+ * The upstream OpenAI API is replaced by a local mock server so no real API key is needed.
+ */
+public class BasicOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest {
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /**
+ * A request within the token limits is forwarded to the upstream and its response is returned.
+ */
+ @Test
+ void simpleRequestIsForwarded() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ /**
+ * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the
+ * gateway before reaching the upstream. The response uses the OpenAI error format.
+ */
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(400)
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.code", equalTo("context_length_exceeded"))
+ .body("error.message", containsString("100"));
+ // @formatter:on
+ }
+
+ /**
+ * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows,
+ * the gateway rewrites {@code max_output_tokens} to 200 before forwarding to the upstream.
+ * The mock captures the forwarded body so we can verify the value was actually capped.
+ */
+ @Test
+ void outputTokensAreCappedBeforeForwarding() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer test-key")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200));
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
new file mode 100644
index 0000000000..e1821bc28c
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java
@@ -0,0 +1,209 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+import static io.restassured.RestAssured.given;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Integration tests for
+ * {@code distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml}.
+ *
+ * The tutorial demonstrates sharing a single upstream API key between multiple users,
+ * each identified by their own gateway key and subject to individual token budgets:
+ * Budget accounting per request:
+ * The mock upstream returns {@code Content-Type: text/event-stream} with three
+ * SSE events so the gateway's SSE processing path is exercised end-to-end without
+ * a real OpenAI connection:
+ *
+ * Because RestAssured does not handle server-sent events well, these tests use the
+ * Java {@link java.net.http.HttpClient} directly — the same approach used in
+ * {@code ServerSentEventsTutorialTest}.
+ */
+public class StreamingOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest {
+
+ private static final String RESPONSES_ENDPOINT = LOCALHOST_2000 + "/v1/responses";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "10-Basic-LLM-Gateway.yaml";
+ }
+
+ /** Tell the mock server to respond as a finite SSE stream. */
+ @Override
+ protected String mockContentType() {
+ return "text/event-stream";
+ }
+
+ /**
+ * A minimal but complete SSE body: one delta event followed by the terminal
+ * {@code response.completed} event that carries the usage node the gateway
+ * reads for token accounting.
+ */
+ @Override
+ protected String mockResponse() {
+ return """
+ event: response.created
+ data: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress","model":"gpt-5-nano"}}
+
+ event: response.output_text.delta
+ data: {"type":"response.output_text.delta","item_id":"msg_mock","output_index":0,"content_index":0,"delta":"I am a mock."}
+
+ event: response.completed
+ data: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed","model":"gpt-5-nano","output":[{"type":"message","id":"msg_mock","status":"completed","role":"assistant","content":[{"type":"output_text","text":"I am a mock."}]}],"usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}}
+
+ """;
+ }
+
+ /**
+ * The gateway must forward a streaming request and pass the {@code text/event-stream}
+ * response through to the client intact. The response body must contain the SSE events
+ * emitted by the upstream, including the delta text.
+ */
+ @Test
+ void streamingResponseIsForwarded() throws IOException, InterruptedException {
+ var response = sendStreamingRequest("stream.json");
+
+ assertEquals(200, response.statusCode());
+ assertTrue(response.headers().firstValue("content-type").orElse("").contains("text/event-stream"),
+ "Expected Content-Type text/event-stream");
+ assertTrue(response.body().contains("response.output_text.delta"),
+ "SSE body must contain the delta event name");
+ assertTrue(response.body().contains("I am a mock."),
+ "SSE body must contain the delta text");
+ assertTrue(response.body().contains("response.completed"),
+ "SSE body must contain the terminal event");
+ }
+
+ /**
+ * When the request carries {@code "max_output_tokens": 500} and the gateway is
+ * configured with {@code maxOutputTokens: 200}, the gateway must rewrite the field
+ * to 200 before forwarding — even for streaming requests.
+ *
+ * The mock captures the forwarded request body so we can assert the capped value.
+ */
+ @Test
+ void streamingOutputTokensAreCappedBeforeForwarding() throws IOException, InterruptedException {
+ var response = sendStreamingRequest("max-output-stream.json");
+
+ assertEquals(200, response.statusCode());
+ assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200));
+ }
+
+ // -------------------------------------------------------------------------
+
+ private HttpResponse
+ *
+ *
+ *
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code claude-sonnet-4-0}, {@code claude-opus-4-0}, {@code claude-haiku-3-5}.
+ */
+public class SharingApiKeysTutorialTest extends AbstractAiTutorialTest {
+
+ private static final String ALICE = "abc123";
+ private static final String BOB = "qwertz";
+
+ @Override
+ protected String getTutorialDir() {
+ return "ai/llm-gateway/claude";
+ }
+
+ @Override
+ protected String getTutorialYaml() {
+ return "20-Sharing-API-Keys.yaml";
+ }
+
+ @Test
+ void aliceCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+
+ @Test
+ void bobCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", BOB)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+
+ @Test
+ void unknownApiKeyIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", "invalid-key")
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(401)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("authentication_error"));
+ // @formatter:on
+ }
+
+ /**
+ * The gateway is configured with its own upstream {@code apiKey}. When a user request
+ * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must
+ * replace it with the configured upstream key before forwarding to the LLM provider.
+ */
+ @Test
+ void userApiKeyIsReplacedWithGatewayApiKey() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+
+ assertThat(lastRequestApiKey, not(equalTo(ALICE)));
+ assertThat(lastRequestApiKey, equalTo(TEST_API_KEY));
+ }
+
+ @Test
+ void wrongModelIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("wrong-model.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("gpt-5"))
+ .body("error.message", containsString("not allowed"));
+ // @formatter:on
+ }
+
+ @Test
+ void inputTokenLimitExceededIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-input.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(400)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("invalid_request_error"))
+ .body("error.message", containsString("prompt is too long"))
+ .body("error.message", containsString("100 maximum"));
+ // @formatter:on
+ }
+
+ /**
+ * Alice has a budget of 250 tokens. Each request with {@code max-output.json} projects
+ * 7 (input estimate) + 200 (capped max_tokens) = 207 tokens. The mock returns 15 tokens
+ * of actual usage per call, so the running total grows by 15 after each response.
+ *
+ *
+ * 1st: 250 - 0 - 207 = 43 → forwarded; used becomes 15
+ * 2nd: 250 - 15 - 207 = 28 → forwarded; used becomes 30
+ * 3rd: 250 - 30 - 207 = 13 → forwarded; used becomes 45
+ * 4th: 250 - 45 - 207 = -2 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-api-key", ALICE)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(429)
+ .body("type", equalTo("error"))
+ .body("error.type", equalTo("rate_limit_error"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("x-api-key", BOB)
+ .header("anthropic-version", "2023-06-01")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/messages")
+ .then()
+ .statusCode(200)
+ .body("type", equalTo("message"));
+ // @formatter:on
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java
new file mode 100644
index 0000000000..4e39f7ae6c
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java
@@ -0,0 +1,58 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.google;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+
+/**
+ * Base class for Google Gemini LLM-Gateway tutorial tests.
+ *
+ *
+ *
+ *
+ *
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code gemini-2.5-pro}, {@code gemini-2.5-flash}, {@code gemini-2.5-flash-lite},
+ * {@code gemini-2.0-flash}, {@code gemini-2.0-flash-lite}.
+ *
+ *
+ * 1st: 500 - 0 - 209 = 291 → forwarded; used becomes 100
+ * 2nd: 500 - 100 - 209 = 191 → forwarded; used becomes 200
+ * 3rd: 500 - 200 - 209 = 91 → forwarded; used becomes 300
+ * 4th: 500 - 300 - 209 = -9 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(429)
+ .body("error.status", equalTo("RESOURCE_EXHAUSTED"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("x-goog-api-key", BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(GEMINI_FLASH_ENDPOINT)
+ .then()
+ .statusCode(200)
+ .body("candidates[0].content.parts[0].text", equalTo("I am a mock."));
+ // @formatter:on
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java
new file mode 100644
index 0000000000..54136f4c2f
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java
@@ -0,0 +1,61 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest;
+
+/**
+ * Base class for OpenAI LLM-Gateway tutorial tests.
+ *
+ *
+ *
+ *
+ *
+ *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200},
+ * allowed models: {@code gpt-5.4}, {@code gpt-5-nano}, {@code gpt-5-mini}.
+ */
+public class SharingApiKeysOpenAiTutorialTest extends AbstractOpenAiTutorialTest {
+
+ private static final String ALICE = "abc123";
+ private static final String BOB = "qwertz";
+
+ @Override
+ protected String getTutorialYaml() {
+ return "20-Sharing-API-Keys.yaml";
+ }
+
+ @Test
+ void aliceCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ @Test
+ void bobCanSendRequest() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+
+ @Test
+ void unknownApiKeyIsRejected() throws IOException {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer invalid-key")
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(401)
+ .body("error.code", equalTo("invalid_authentication"));
+ // @formatter:on
+ }
+
+ /**
+ * The gateway is configured with its own upstream {@code apiKey}. When a user request
+ * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must
+ * replace it with the configured upstream key before forwarding to the LLM provider.
+ * For OpenAI, the key is carried in the {@code Authorization: Bearer
+ * 1st: 500 - 0 - 209 = 291 → forwarded; used becomes 100
+ * 2nd: 500 - 100 - 209 = 191 → forwarded; used becomes 200
+ * 3rd: 500 - 200 - 209 = 91 → forwarded; used becomes 300
+ * 4th: 500 - 300 - 209 = -9 → rejected with 429
+ *
+ *
+ * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests
+ * after alice is blocked.
+ */
+ @Test
+ void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException {
+ for (int i = 0; i < 3; i++) {
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200);
+ // @formatter:on
+ }
+
+ // Alice's budget is now exhausted
+ // @formatter:off
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + ALICE)
+ .body(readFileFromBaseDir("max-output.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(429)
+ .body("error.type", equalTo("rate_limit_error"))
+ .body("error.code", equalTo("token_limit_exceeded"));
+
+ // Bob's budget is independent — he can still send requests
+ given()
+ .contentType("application/json")
+ .header("Authorization", "Bearer " + BOB)
+ .body(readFileFromBaseDir("simple.json"))
+ .when()
+ .post(LOCALHOST_2000 + "/v1/responses")
+ .then()
+ .statusCode(200)
+ .body("object", equalTo("response"));
+ // @formatter:on
+ }
+}
diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java
new file mode 100644
index 0000000000..679cfca6a7
--- /dev/null
+++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java
@@ -0,0 +1,135 @@
+/* Copyright 2026 predic8 GmbH, www.predic8.com
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+package com.predic8.membrane.tutorials.ai.llmgateway.openai;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.time.Duration;
+
+import static io.restassured.path.json.JsonPath.from;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for the streaming (SSE) path of
+ * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}.
+ *
+ *
+ *
+ *
+ *