diff --git a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java index a018d1b509..8ca91e4ebb 100644 --- a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java +++ b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java @@ -33,11 +33,7 @@ import java.util.List; import java.util.Objects; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findRequiredSetters; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findSingleSetterOrNullForAnnotation; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.getSingleChildSetter; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isCollapsed; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isNoEnvelope; +import static com.predic8.membrane.annot.yaml.McYamlIntrospector.*; import static com.predic8.membrane.annot.yaml.NodeValidationUtils.ensureMappingStart; public final class ObjectBinder { @@ -49,7 +45,8 @@ public final class ObjectBinder { public static T bind(ParsingContext pc, Class clazz, JsonNode node) throws ConfigurationParsingException { try { - T configObj = clazz.getConstructor().newInstance(); + T configObj = instantiate(clazz); + BeanDefinition currentBeanDefinition = BeanDefinitionContext.current(); if (currentBeanDefinition != null && pc.getRegistry() != null) { pc.getRegistry().rememberBeanDefinition(configObj, currentBeanDefinition); @@ -102,6 +99,14 @@ public static T bind(ParsingContext pc, Class clazz, JsonNode node) th } } + private static @NotNull T instantiate(Class clazz) throws InvocationTargetException, InstantiationException, IllegalAccessException { + try { + return clazz.getConstructor().newInstance(); + } catch (NoSuchMethodException e) { + throw new ConfigurationParsingException("Class %s does not have a public no-arg constructor.".formatted(clazz.getName())); + } + } + private static @NotNull T handleCollapsed(ParsingContext ctx, Class clazz, JsonNode node, T configObj) { if (node.isNull()) throw new ConfigurationParsingException("Collapsed element must not be null."); @@ -117,7 +122,6 @@ private static T handleNoEnvelopeList(ParsingContext pc, Class clazz, return configObj; } - @SuppressWarnings("ConstantValue") private static void applyCollapsedScalar(Class clazz, JsonNode node, T target) { Method attributeSetter = findSingleSetterOrNullForAnnotation(clazz, MCAttribute.class); Method textSetter = findSingleSetterOrNullForAnnotation(clazz, MCTextContent.class); diff --git a/core/src/main/java/com/predic8/membrane/core/http/Header.java b/core/src/main/java/com/predic8/membrane/core/http/Header.java index 68844f5a18..cc435d43c4 100644 --- a/core/src/main/java/com/predic8/membrane/core/http/Header.java +++ b/core/src/main/java/com/predic8/membrane/core/http/Header.java @@ -15,29 +15,41 @@ package com.predic8.membrane.core.http; import com.predic8.membrane.annot.Constants; -import com.predic8.membrane.core.http.cookie.*; -import com.predic8.membrane.core.util.*; -import jakarta.mail.internet.*; -import org.jetbrains.annotations.*; -import org.slf4j.*; - -import java.io.*; -import java.security.*; -import java.util.*; +import com.predic8.membrane.core.http.cookie.Cookies; +import com.predic8.membrane.core.http.cookie.MimeHeaders; +import com.predic8.membrane.core.http.cookie.ServerCookie; +import com.predic8.membrane.core.util.EndOfStreamException; +import com.predic8.membrane.core.util.HttpUtil; +import jakarta.mail.internet.ContentType; +import jakarta.mail.internet.ParseException; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.InvalidParameterException; import java.util.ArrayList; -import java.util.function.*; -import java.util.regex.*; -import java.util.stream.*; - -import static com.predic8.membrane.core.http.MimeType.*; -import static com.predic8.membrane.core.util.HttpUtil.*; -import static java.nio.charset.StandardCharsets.*; -import static java.util.Arrays.*; -import static java.util.Collections.*; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import static com.predic8.membrane.core.http.MimeType.isBinary; +import static com.predic8.membrane.core.util.HttpUtil.readLine; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Arrays.stream; +import static java.util.Collections.unmodifiableList; import static java.util.Locale.ROOT; -import static java.util.regex.Pattern.*; -import static java.util.stream.Collectors.*; -import static org.apache.commons.codec.binary.Base64.*; +import static java.util.regex.Pattern.CASE_INSENSITIVE; +import static java.util.regex.Pattern.compile; +import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toSet; +import static org.apache.commons.codec.binary.Base64.encodeBase64; /** * The headers of an HTTP message. @@ -331,6 +343,15 @@ public String getContentType() { return getFirstValue(CONTENT_TYPE); } + /** + * Returns {@code true} if the {@code Content-Type} header starts with {@code multipart/} + * (e.g. {@code multipart/form-data}, {@code multipart/related}, {@code multipart/mixed}). + */ + public boolean isMultipart() { + String ct = getContentType(); + return ct != null && ct.regionMatches(true, 0, "multipart/", 0, 10); + } + public String getUserAgent() { return getFirstValue(USER_AGENT); } diff --git a/core/src/main/java/com/predic8/membrane/core/http/Message.java b/core/src/main/java/com/predic8/membrane/core/http/Message.java index f38024aae0..b96755086f 100644 --- a/core/src/main/java/com/predic8/membrane/core/http/Message.java +++ b/core/src/main/java/com/predic8/membrane/core/http/Message.java @@ -338,6 +338,13 @@ public boolean isImage() { return MimeType.isImage(getHeader().getContentType()); } + /** + * @return true if the message has a media type of image/*, audio/*, video/*, octect-stream, or application/octet-stream + */ + public boolean isBinary() { + return MimeType.isBinary(getHeader().getContentType()); + } + public boolean isXML() { return MimeType.isXML(getHeader().getContentType()); } diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java new file mode 100644 index 0000000000..ed9fe0929c --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java @@ -0,0 +1,69 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(AbstractLLMEvent.class); + + protected static final ObjectMapper om = new ObjectMapper(); + + protected final JsonNode json; + + protected AbstractLLMEvent(JsonNode json) { + this.json = json; + } + + public abstract String getType(); + + public JsonNode getJson() { + return json; + } + + public static AbstractLLMEvent create(SSEParser.SSEEvent sse) { + + if ("[DONE]".equals(sse.data())) { + return new ChatCompletionDoneEvent(); + } + + var opt = JsonUtil.getJsonObject(sse.data()); + if (opt.isEmpty()) { + log.info("Unknown event format: {}", sse.data()); + } + + var json = opt.get(); + + // Responses API + if (json.has("type")) { + return new ResponsesApiEvent(json); + } + + // Chat Completions API + if ("chat.completion.chunk".equals(json.path("object").asText())) { + return new ChatCompletionEvent(json); + } + + log.debug("Unknown event format: {}", json); + + return null; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java new file mode 100644 index 0000000000..cc234b8113 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java @@ -0,0 +1,29 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.node.NullNode; + +public class ChatCompletionDoneEvent extends AbstractLLMEvent { + + public ChatCompletionDoneEvent() { + super(NullNode.getInstance()); + } + + @Override + public String getType() { + return "chat.completion.done"; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java new file mode 100644 index 0000000000..1fde1e736f --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java @@ -0,0 +1,77 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ChatCompletionEvent extends AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(ChatCompletionEvent.class); + + public ChatCompletionEvent(JsonNode json) { + super(json); + + parseChoices(json); + + } + + + private static void parseChoices(JsonNode json) { + for (JsonNode choice : json.path("choices")) { + + JsonNode delta = choice.path("delta"); + + if (delta.has("content")) { + log.debug("Content delta: {}", + delta.path("content").asText()); + } + + if (delta.has("tool_calls")) { + + for (JsonNode tc : delta.path("tool_calls")) { + + JsonNode fn = tc.path("function"); + + if (fn.has("name")) { + log.debug("Tool call name delta: {}", + fn.path("name").asText()); + } + + if (fn.has("arguments")) { + log.debug("Tool call arguments delta: {}", + fn.path("arguments").asText()); + } + } + } + + String finishReason = choice.path("finish_reason").asText(null); + + if (finishReason != null && !"null".equals(finishReason)) { + log.debug("Finish reason: {}", finishReason); + } + } + } + + @Override + public String getType() { + return "chat.completion.chunk"; + } + + public JsonNode getChoices() { + return json.path("choices"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java new file mode 100644 index 0000000000..1219fb66b3 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/DefaultPolicies.java @@ -0,0 +1,147 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.OrganizationRequest; +import com.predic8.membrane.core.util.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE; +import static com.predic8.membrane.core.interceptor.Outcome.RETURN; + +/** + * @description LLM Gateway policies for token usage and model restrictions. + */ +@MCElement(name = "policies", id = "llm-gateway-policies") +public class DefaultPolicies implements Policies { + + private static final Logger log = LoggerFactory.getLogger(DefaultPolicies.class); + + private LLMErrorCreator errorCreator; + + private List models; + private int maxOutputTokens; + private int maxInputTokens; + + public void init(LLMErrorCreator errorCreator) { + this.errorCreator = errorCreator; + } + + public Outcome handleRequest(ModelInputRequest mir, Exchange exc) { + + if (mir instanceof OrganizationRequest) { + return CONTINUE; + } + + var outcome = checkTokenLimits(mir, exc); + if (outcome != CONTINUE) { + return outcome; + } + return checkModel(mir, exc); + } + + public Outcome checkModel(ModelInputRequest mir, Exchange exc) { + var model = mir.getModel(); + if (models != null && !models.contains(model)) { + exc.setResponse(errorCreator.modelNotAllowed(model, models)); + return RETURN; + } + return CONTINUE; + } + + public Outcome checkTokenLimits(ModelInputRequest mir, Exchange exc) { + + var requestedMaxOutputTokens = mir.getRequestedMaxOutputTokens(); + var inputTokens = mir.estimateInputTokens(); + + if (maxOutputTokens > 0) { + if (requestedMaxOutputTokens <= 0) { + log.info("No max. output requested. Setting limit to {}.", maxOutputTokens); + mir.setMaxOutputTokens(maxOutputTokens); + } else if (requestedMaxOutputTokens > maxOutputTokens) { + log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, maxOutputTokens); + mir.setMaxOutputTokens(maxOutputTokens); + } + } + + if (maxInputTokens != 0) { + if (inputTokens > maxInputTokens) { + log.info("Input tokens {} exceed the limit of {}.", inputTokens, maxInputTokens); + exc.setResponse(errorCreator.inputTokensExceeded(maxInputTokens, inputTokens)); + return RETURN; + } + } + return CONTINUE; + } + + public List getModels() { + return models; + } + + /** + * @param models List of models that can be used by the gateway. + * @desciption Restricts the models that can be used by the gateway. + * @default null (no restriction) + */ + @MCAttribute + public void setModels(List models) { + this.models = models; + } + + + public int getMaxOutputTokens() { + return maxOutputTokens; + } + + /** + * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response. + * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway + * sends to the LLM provider. The provider may use a different limit. + * @default 0 (unlimited) + */ + @MCAttribute + public void setMaxOutputTokens(int maxOutputTokens) { + if (maxOutputTokens < 0) { + throw new IllegalArgumentException("maxOutputTokens must be >= 0"); + } + this.maxOutputTokens = maxOutputTokens; + } + + public int getMaxInputTokens() { + return maxInputTokens; + } + + /** + * @param maxInputTokens Maximum number of tokens that a request can use. + * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size. + * Actual token usage may be deviate from this value. + */ + @MCAttribute + public void setMaxInputTokens(int maxInputTokens) { + if (maxInputTokens < 0) { + throw new ConfigurationException("maxInputTokens must be >= 0"); + } + this.maxInputTokens = maxInputTokens; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java new file mode 100644 index 0000000000..fca36d7066 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java @@ -0,0 +1,241 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCChildElement; +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.AbstractInterceptor; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; +import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiStore; +import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiUser; +import com.predic8.membrane.core.util.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE; +import static com.predic8.membrane.core.interceptor.Outcome.RETURN; + +/* + * @description

+ * API Gateway for Large Language Models (LLMs). + *

+ * Features: + *
    + *
  • Sharing an API key between multiple users
  • + *
  • Enforcing token limits
  • + *
  • Logging LLM usage
  • + *
+ *

+ * @topic 10. AI + */ +@MCElement(name = "llmGateway") +public class LLMGatewayInterceptor extends AbstractInterceptor { + + private static final Logger log = LoggerFactory.getLogger(LLMGatewayInterceptor.class); + + public static final String MEMBRANE_AI_USER = "membrane.ai.user"; + + private LLMProvider provider; + private LLMErrorCreator errorCreator; + + private String apiKey; + + private Policies policies = new NullPolicies(); + + private SystemPrompt systemPrompt; + + private AiApiStore store; + + @Override + public void init() { + super.init(); + errorCreator = provider.getErrorCreator(); + policies.init(errorCreator); + if (store != null) + store.init(router); + + // Check if the replacement markers are still there + if (apiKey != null && apiKey.contains("<<") && apiKey.contains(">>")) { + throw new ConfigurationException("The configuration contains the replacement marker %s. Substitute it with the API key of the model.".formatted(apiKey)); + } + } + + @Override + public Outcome handleRequest(Exchange exc) { + + LLMRequest llmReq; + try { + llmReq = provider.getLLMRequest(exc); + } catch (Exception e) { + exc.setResponse(errorCreator.invalidRequestError("Error parsing request: " + e.getMessage())); + return RETURN; + } + + AiApiUser user = null; + if (store != null) { + var opt = store.getUser(llmReq.getApiKey()); + if (opt.isEmpty()) { + exc.setResponse(errorCreator.authenticationFailed()); + return RETURN; + } + user = opt.get(); + log.debug("User: {}", user); + exc.setProperty(MEMBRANE_AI_USER, user); + } + + // If APIKey is specified, use that for the LLM. Overwrites keys from the client + if (apiKey != null) { + llmReq.setApiKey(apiKey); + } + + if (!exc.getRequest().isPOSTRequest()) { + return CONTINUE; + } + + if (!(llmReq instanceof ModelInputRequest mir)) { + return CONTINUE; + } + + var outcome = policies.handleRequest(mir, exc); + if (outcome != CONTINUE) { + return outcome; + } + + if (systemPrompt != null) { + outcome = systemPrompt.handleRequest(mir, exc); + if (outcome != CONTINUE) { + return outcome; + } + } + + // Check store limits + if (checkStoreLimits(exc, mir, user) != CONTINUE) { + return RETURN; + } + + exc.getRequest().setBodyContent(mir.getBody().getContent()); + return CONTINUE; + } + + private Outcome checkStoreLimits(Exchange exc, ModelInputRequest mir, AiApiUser user) { + long inputTokens = mir.estimateInputTokens(); + log.debug("Estimated input tokens: {}", inputTokens); + if (store != null) { + var effectiveMaxTokens = computeEffectiveMaxOutputTokens(mir.getRequestedMaxOutputTokens(), policies.getMaxOutputTokens()); + var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens); + log.debug("User {} has {} remaining tokens left", user, remaining); + if (remaining <= 0) { + log.info("Token limit exceeded. Remaining: {} input: {} maxOutput: {}", remaining, inputTokens, effectiveMaxTokens); + exc.setResponse(errorCreator.tokenLimitExceeded(inputTokens + effectiveMaxTokens, remaining, store.getRemainingResetTime())); + return RETURN; + } + } + return CONTINUE; + } + + long computeEffectiveMaxOutputTokens(long requestedMaxOutputTokens, long maxOutputTokens) { + if (requestedMaxOutputTokens <= 0) + return maxOutputTokens; + return Math.min(requestedMaxOutputTokens, maxOutputTokens); + } + + @Override + public Outcome handleResponse(Exchange exc) { + provider.getLLMResponse(exc, res -> { + var user = exc.getProperty(MEMBRANE_AI_USER, AiApiUser.class); + log.debug("Token usage of user {}: {}", user, res.getUsage()); + if (store != null) { + store.store(user, res.getUsage()); + } + }); + + return CONTINUE; + } + + public String getApiKey() { + return apiKey; + } + + /** + * @param apiKey LLM provider API key + * @description API key for the LLM provider. Specify here the API key from OpenAI or Anthropic. + */ + @MCAttribute + public void setApiKey(String apiKey) { + this.apiKey = apiKey; + } + + public AiApiStore getAiStore() { + return store; + } + + /** + * @param store Store for API keys and usage statistics + * @description The LLM Gateway can operate stateless and statefully. For stateful operation, specify an AiApiStore. + * A store is needed for user authentication at the gateway. + * The gateway will use the store to enforce token limits and log usage statistics. + */ + @MCChildElement(allowForeign = true, order = 30) + public void setAiStore(AiApiStore store) { + this.store = store; + } + + @Override + public String getDisplayName() { + return "LLM Gateway"; + } + + public LLMProvider getProvider() { + return provider; + } + + /** + * @param provider The LLM provider to use. + * @description The LLM provider to use. Currently, OpenAI, Anthropic and Gemini are supported. + * The provider determines the API used to talk to the LLM. The provider can be different as long as the API is supported. + */ + @MCChildElement(order = 10) + public void setProvider(LLMProvider provider) { + this.provider = provider; + } + + public Policies getPolicies() { + return policies; + } + + /** + * + * @param policies Usage policy for the LLM Gateway. + */ + @MCChildElement(order = 20) + public void setPolicies(Policies policies) { + this.policies = policies; + } + + public SystemPrompt getSystemPrompt() { + return systemPrompt; + } + + @MCChildElement + public void setSystemPrompt(SystemPrompt systemPrompt) { + this.systemPrompt = systemPrompt; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java new file mode 100644 index 0000000000..a1ba392b3b --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/NullPolicies.java @@ -0,0 +1,31 @@ +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; + +import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE; + +public class NullPolicies implements Policies { + + @Override + public Outcome handleRequest(ModelInputRequest mir, Exchange exc) { + return CONTINUE; + } + + @Override + public void init(LLMErrorCreator errorCreator) { + + } + + @Override + public int getMaxOutputTokens() { + return 0; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + } +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java new file mode 100644 index 0000000000..fc742e30ce --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java @@ -0,0 +1,17 @@ +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; + +public interface Policies { + + Outcome handleRequest(ModelInputRequest mir, Exchange exc); + + void init(LLMErrorCreator errorCreator); + + int getMaxOutputTokens(); + void setMaxOutputTokens(int maxOutputTokens); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java new file mode 100644 index 0000000000..4b726bec62 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java @@ -0,0 +1,66 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ResponsesApiEvent extends AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(ResponsesApiEvent.class); + + private final String type; + + public ResponsesApiEvent(JsonNode json) { + super(json); + + this.type = json.path("type").asText(); + + log.debug("Responses API event: {}", type); + + if ("response.output_item.done".equals(type)) { + + var item = json.path("item"); + + if (item.isObject()) { + var on = (ObjectNode) item; + + if ("function_call".equals(on.path("type").asText())) { + if (log.isDebugEnabled()) { + log.debug("Function call: {} with params {}", + on.path("name").asText(), + on.path("arguments").asText()); + } else { + log.info("Function call: {}", on.path("name")); + } + } + } + } + } + + @Override + public String getType() { + return type; + } + + @Override + public String toString() { + return "ResponsesApiEvent{" + + "type='" + type + '\'' + + '}'; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java new file mode 100644 index 0000000000..e2382e0135 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/SystemPrompt.java @@ -0,0 +1,71 @@ +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE; + +/** + * @description When used with older chat completions API the instruction is converted to a system message like: + * "system": "You are a helpful assistant." + */ +@MCElement(name = "systemPrompt") +public class SystemPrompt { + + private static final Logger log = LoggerFactory.getLogger(SystemPrompt.class); + + public enum Action { + REMOVE, OVERWRITE, APPEND, PREPEND + } + + private Action action = Action.OVERWRITE; + private String content = ""; + + public Outcome handleRequest(ModelInputRequest mir, Exchange exc) { + var instructions = mir.getSystemPrompt() == null ? "" : mir.getSystemPrompt(); + switch (action) { + case OVERWRITE -> { + log.debug("Overwriting instructions: {}", content); + mir.setSystemPrompts(List.of(content)); + } + case PREPEND -> { + log.debug("Prepending instructions: {}", content); + mir.setSystemPrompts(List.of(content, instructions)); + } + case APPEND -> { + log.debug("Appending instructions: {}", content); + mir.setSystemPrompts(List.of(instructions, content)); + } + case REMOVE -> { + log.info("Removing instructions: {}", instructions); + mir.removeSystemPrompt(); + } + } + return CONTINUE; + } + + public Action getAction() { + return action; + } + + @MCAttribute + public void setAction(Action action) { + this.action = action; + } + + public String getContent() { + return content; + } + + @MCAttribute + public void setContent(String content) { + this.content = content; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java new file mode 100644 index 0000000000..6ecf4d7ef5 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java @@ -0,0 +1,47 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public abstract class AbstractLLMErrorCreator implements LLMErrorCreator { + + private static final ObjectMapper om = new ObjectMapper(); + + public static String createJson(Object o) { + try { + return om.writeValueAsString(o); + } catch (Exception e) { + return """ + { "error": "Could not create JSON" } + """; + } + } + + public String envelope(String message, String type, String param, String code) { + return createJson(new ErrorEnvelope(new ErrorBody(message,type,param,code))); + } + + private record ErrorEnvelope(ErrorBody error) { + } + + private record ErrorBody( + String message, + String type, + String param, + String code + ) { + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java new file mode 100644 index 0000000000..391324f38e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java @@ -0,0 +1,39 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; + +public class AbstractLLMMessage { + + protected final Exchange exchange; + + public enum API { COMPLETIONS, NORMAL } + + protected API api; + + protected AbstractLLMMessage(Exchange exchange) { + this.exchange = exchange; + api = getAPI(exchange); + } + + protected API getAPI(Exchange exchange) { + if (exchange.getRequest().getUri().contains("/chat/completions")) { + return API.COMPLETIONS; + } else { + return API.NORMAL; + } + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java new file mode 100644 index 0000000000..9b833dea56 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMProvider.java @@ -0,0 +1,18 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest; + +import java.io.IOException; + +public abstract class AbstractLLMProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) throws IOException { + var uri = exchange.getRequest().getUri(); + if (uri.startsWith("/v1/chat/completions")) { + return new ChatCompletionsRequest(exchange); + } + return new BaseLLMRequest(exchange); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java new file mode 100644 index 0000000000..4732d0a0a5 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java @@ -0,0 +1,98 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.http.AbstractMessageObserver; +import com.predic8.membrane.core.http.Chunk; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Consumer; + +public abstract class AbstractLLMResponse extends AbstractLLMMessage implements LLMResponse { + + private static final Logger log = LoggerFactory.getLogger(AbstractLLMResponse.class); + + protected ObjectNode json; + protected Consumer postProcessor; + + public AbstractLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange); + this.postProcessor = postProcessor; + var msg = exchange.getResponse(); + + if (msg.isStream()) { + + log.debug("Streaming response."); + + var parser = new SSEParser(getTerminalEvents()); + + msg.getBody().addObserver(new AbstractMessageObserver() { + @Override + public void bodyChunk(Chunk chunk) { + processChunk(chunk, parser); + } + }); + } else { + json = JsonUtil.getJsonObject(exchange.getResponse()) + .orElse(JsonNodeFactory.instance.objectNode().put("error", "No JSON object response from model.")); + postProcessor.accept(this); + } + } + + protected void processChunk(Chunk chunk, SSEParser parser) { + // Wait for terminal chunk + if (!parser.parse(chunk)) { + return; + } + + // Now all chunks are parsed + + var events = parser.getEvents(); + var terminal = parser.getTerminalEvent(); + + log.debug("Events: {}", events.size()); + events.forEach(this::process); + + terminal.ifPresent(event -> { + processTerminalEvent(event); + postProcessor.accept(AbstractLLMResponse.this); + }); + } + + protected void processTerminalEvent(SSEParser.SSEEvent terminal) {} + + @Override + public boolean isError() { + return json.get("error") != null && !json.get("error").isNull(); + } + + protected static int getOutputTokens(JsonNode usage) { + return usage.path("output_tokens").asInt( + usage.path("completion_tokens").asInt(0) + ); + } + + protected static int getInputTokens(JsonNode usage) { + return usage.path("input_tokens").asInt( + usage.path("prompt_tokens").asInt(0)); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java new file mode 100644 index 0000000000..8551dcbc4a --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequest.java @@ -0,0 +1,146 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.http.AbstractBody; +import com.predic8.membrane.core.http.Body; +import com.predic8.membrane.core.multipart.MultipartUtil; +import com.predic8.membrane.core.util.json.JsonUtil; +import jakarta.mail.internet.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class AbstractModelInputRequest extends BaseLLMRequest implements ModelInputRequest { + + private static final Logger log = LoggerFactory.getLogger(AbstractModelInputRequest.class); + + private static final ObjectMapper om = new ObjectMapper(); + + protected ObjectNode json; + + private String model; + + private AbstractBody body; + + public AbstractModelInputRequest(Exchange exchange) throws IOException { + super(exchange); + + if (exchange.getRequest().getHeader().isMultipart()) { + try { + for (var part : MultipartUtil.split(exchange.getRequest())) { + log.info("Part: name={} type={} size={}", part.getName(), part.getContentType(), part.getBody().length); + if ("model".equals(part.getName())) { + log.info("Model: {}", part.getBodyAsString()); + model = part.getBodyAsString(); + } + } + body = exchange.getRequest().getBody(); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (ParseException e) { + throw new RuntimeException(e); + } + return; + } + + if (exchange.getRequest().isJSON()) { + json = JsonUtil.getJsonObject(exchange.getRequest()).orElseThrow(() -> new RuntimeException("Cannot parse input as JSON message.")); + } + + if (json != null) { + if (json.has("model")) { + model = json.path("model").asText(); + } + } + } + + public List getTools() { + return Collections.emptyList(); + } + + @Override + public String getSystemPrompt() { + return ""; + } + + @Override + public void setSystemPrompts(List prompts) { + log.warn("Not supported."); + } + + @Override + public void removeSystemPrompt() { + log.warn("Not supported."); + } + + protected ArrayNode getToolsNode() { + if (json == null) + return null; + if (json.path("tools").isArray()) + return (ArrayNode) json.path("tools"); + return null; + } + + + @Override + public ObjectNode getJson() { + return json; + } + + @Override + public String getModel() { + return model; + } + + @Override + public long getRequestedMaxOutputTokens() { + return 0; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + log.warn("Not supported."); + } + + @Override + public long estimateInputTokens() { + return 0; + } + + @Override + public AbstractBody getBody() { + if (body != null) + return body; + try { + return new Body(om + .writerWithDefaultPrettyPrinter() + .writeValueAsString(json).getBytes(UTF_8)); + } catch (JsonProcessingException e) { + log.info("Could not serialize JSON: {}", e.getMessage()); + throw new RuntimeException("Could not serialize JSON: " + e.getMessage()); + } + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java new file mode 100644 index 0000000000..40b317de09 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/BaseLLMRequest.java @@ -0,0 +1,37 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; + +import static com.predic8.membrane.core.http.Header.AUTHORIZATION; + +public class BaseLLMRequest extends AbstractLLMMessage implements LLMRequest { + + public static final String BEARER_PREFIX = "Bearer"; + + protected BaseLLMRequest(Exchange exchange) { + super(exchange); + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(AUTHORIZATION); + exchange.getRequest().getHeader().add(AUTHORIZATION, "Bearer " + apiKey); + } + + @Override + public String getApiKey() { + var ah = exchange.getRequest().getHeader().getAuthorization(); + if (ah == null) { + return null; + } + + if (!ah.regionMatches(true, 0, BEARER_PREFIX, 0, BEARER_PREFIX.length())) { + return null; + } + + var token = ah.substring(BEARER_PREFIX.length()).trim(); + + return token.isEmpty() ? null : token; + } + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java new file mode 100644 index 0000000000..2859b6fc38 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/JSONRequest.java @@ -0,0 +1,8 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +interface JSONMessage { + + ObjectNode getJson(); +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java new file mode 100644 index 0000000000..732a1332fe --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java @@ -0,0 +1,38 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.http.Response; + +import java.util.Collection; + +public interface LLMErrorCreator { + + Response invalidRequestError(String message); + + Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds); + + Response modelNotAllowed(String model, Collection allowedModels); + + Response authenticationFailed(); + + /** + * + * @param maxTokens as configured + * @param estimatedTokens estimated number of input tokens + * @return Response error response + */ + Response inputTokensExceeded(long maxTokens, long estimatedTokens); +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java new file mode 100644 index 0000000000..457597d70e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java @@ -0,0 +1,28 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; + +import java.io.IOException; +import java.util.function.Consumer; + +public interface LLMProvider { + + LLMRequest getLLMRequest(Exchange request) throws IOException; + LLMResponse getLLMResponse(Exchange request, Consumer postProcessor); + LLMErrorCreator getErrorCreator(); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java new file mode 100644 index 0000000000..f80230a755 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java @@ -0,0 +1,24 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +public interface LLMRequest { + + String getApiKey(); + + void setApiKey(String apiKey); + +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java new file mode 100644 index 0000000000..3d3ed9bd78 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java @@ -0,0 +1,32 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser.SSEEvent; + +import java.util.Set; + +public interface LLMResponse { + + boolean isError(); + + Usage getUsage(); + + Set getTerminalEvents(); + + void process(SSEEvent event); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java new file mode 100644 index 0000000000..4a779a140c --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/ModelInputRequest.java @@ -0,0 +1,30 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.http.AbstractBody; + +import java.util.List; + +public interface ModelInputRequest extends JSONMessage { + + String getModel(); + + /** + * The max number of tokens that the model is allowed to generate as specified by the client. + * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set. + */ + long getRequestedMaxOutputTokens(); + + void setMaxOutputTokens(int maxOutputTokens); + + long estimateInputTokens(); + + List getTools(); + + String getSystemPrompt(); + + void setSystemPrompts(List prompts); + + void removeSystemPrompt(); + + AbstractBody getBody(); +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java new file mode 100644 index 0000000000..643786b0a4 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java @@ -0,0 +1,67 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator; + +import java.util.Collection; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class ChatCompletionsErrorCreator extends AbstractLLMErrorCreator { + + @Override + public Response invalidRequestError(String message) { + return badRequest().json(envelope(message, "invalid_request_error", null, "bad_request")).build(); + } + + public Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds) { + return statusCode(429).json(envelope( + "Token rate limit exceeded. Request requires %d tokens but only %d remain. Please wait %d seconds before retrying.".formatted(tokenRequired, tokenRemaining, tokenResetInSeconds), + "rate_limit_error", + null, + "token_limit_exceeded")).build(); + } + + public Response modelNotAllowed(String model, Collection allowedModels) { + return badRequest().json(envelope( + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)), + "invalid_request_error", + null, + "model_not_allowed")).build(); + } + + public Response authenticationFailed() { + return unauthorized().header(WWW_AUTHENTICATE, "Bearer").json(envelope( + "Invalid authentication credentials", + "invalid_request_error", + null, + "invalid_authentication")).build(); + } + + public Response inputTokensExceeded(long maxTokens, long estimatedTokens) { + return badRequest().json(envelope( + """ + This model's maximum context length is %d tokens. + Your request contains approximately %d tokens. + """.formatted(maxTokens, estimatedTokens).trim(), + "invalid_request_error", + "input", + "context_length_exceeded")).build(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java new file mode 100644 index 0000000000..8f1a7a491a --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java @@ -0,0 +1,63 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.io.IOException; +import java.util.function.Consumer; + +/** + * @description + * OpenAI Chat Completions API compatible provider. + * Can be used for the following providers: + *
    + *
  • Azure OpenAI
  • + *
  • Google Gemini (OpenAI compatible endpoint)
  • + *
  • TogetherAI
  • + *
  • Fireworks AI
  • + *
  • DeepSeek AI
  • + *
  • OpenRouter
  • + *
  • Mistral AI
  • + *
  • DeepInfra
  • + *
  • SiliconFlow
  • + *
  • NVIDIA NIM
  • + *
  • ML Studio
  • + *
  • vLLM
  • + *
  • Ollama
  • + *
+ */ +@MCElement(name = "chatCompletions") +public class ChatCompletionsProvider implements LLMProvider { + @Override + public LLMRequest getLLMRequest(Exchange request) throws IOException { + return new ChatCompletionsRequest(request); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new ChatCompletionsResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new ChatCompletionsErrorCreator(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java new file mode 100644 index 0000000000..b50a536131 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java @@ -0,0 +1,119 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.emptyList; + +public class ChatCompletionsRequest extends AbstractOpenAiLLMRequest { + + public ChatCompletionsRequest(Exchange exchange) throws IOException { + super(exchange); + + if (json == null) { + return; + } + + // Make sure that when streaming is enabled, the usage is included in the response. + if (json.path("stream").asBoolean(false)) { + var streamOptions = json.withObject("/stream_options"); + streamOptions.put("include_usage", true); + } + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_tokens", maxOutputTokens); + } + + public List getTools() { + var tools = getToolsNode(); + if (tools == null) + return emptyList(); + return tools.valueStream() + .filter(n -> "function".equals(n.path("type").asText(""))) + .map(n -> n.path("function").path("name").asText("")) + .filter(name -> !name.isEmpty()) + .toList(); + } + + /** + * Returns the content of the first {@code "role": "system"} message, + * or an empty string if none is present. + */ + @Override + public String getSystemPrompt() { + for (var message : json.path("messages")) { + if ("system".equals(message.path("role").asText())) { + return message.path("content").asText(""); + } + } + return ""; + } + + /** + * Replaces all system messages with one separate {@code {"role":"system","content":"..."}} message + * per prompt, prepended to the messages array in list order. + * + *

Chat Completions API wire format: + *

{@code
+     * { "messages": [
+     *     {"role": "system", "content": "prompt 1"},
+     *     {"role": "system", "content": "prompt 2"},
+     *     ...user messages...
+     * ]}
+     * }
+ */ + @Override + public void setSystemPrompts(List prompts) { + removeSystemPrompt(); + var messages = json.withArray("messages"); + // Insert in reverse so that prompts[0] ends up at index 0 + for (int i = prompts.size() - 1; i >= 0; i--) { + var systemMessage = json.objectNode(); + systemMessage.put("role", "system"); + systemMessage.put("content", prompts.get(i)); + messages.insert(0, systemMessage); + } + } + + /** + * Removes all {@code "role": "system"} messages from the {@code "messages"} array. + * Has no effect if no system message is present. + */ + @Override + public void removeSystemPrompt() { + var messages = json.withArray("messages"); + for (int i = messages.size() - 1; i >= 0; i--) { + if ("system".equals(messages.get(i).path("role").asText())) { + messages.remove(i); + } + } + } + + @Override + public long getRequestedMaxOutputTokens() { + // Prefer max_completion_tokens (modern OpenAI/o1+), fall back to max_tokens (legacy / all other providers) + long v = json.path("max_completion_tokens").asLong(0); + if (v > 0) return v; + return json.path("max_tokens").asLong(0); + } + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java new file mode 100644 index 0000000000..2b1acc0047 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java @@ -0,0 +1,69 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class ChatCompletionsResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(ChatCompletionsResponse.class); + + public ChatCompletionsResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + + var usage = json.path("usage"); + + var inputTokens = usage.path("prompt_tokens").asInt(0); + var outputTokens = usage.path("completion_tokens").asInt(0); + var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens); + + return new Usage( + inputTokens, + outputTokens, + totalTokens + ); + } + + @Override + public Set getTerminalEvents() { + return Set.of("[DONE]"); + } + + @Override + protected void processTerminalEvent(SSEParser.SSEEvent terminal) { + postProcessor.accept(ChatCompletionsResponse.this); + } + + @Override + public void process(SSEParser.SSEEvent e) { + log.debug("Data: {}", e.data()); + var event = AbstractLLMEvent.create(e); + log.debug("Event: {}", event); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java new file mode 100644 index 0000000000..1fbcf2f1a1 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java @@ -0,0 +1,99 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.claude.ClaudeErrorResponse.ClaudeError; + +import java.util.Collection; +import java.util.UUID; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class ClaudeErrorCreator implements LLMErrorCreator { + + private static final String INVALID_REQUEST_ERROR = "invalid_request_error"; + private static final String AUTHENTICATION_ERROR = "authentication_error"; + private static final String RATE_LIMIT_ERROR = "rate_limit_error"; + + @Override + public Response invalidRequestError(String message) { + return badRequest() + .json(error(INVALID_REQUEST_ERROR, message)) + .build(); + } + + @Override + public Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds) { + long visibleRemaining = Math.max(0, tokenRemaining); + + return statusCode(429) + .json(error( + RATE_LIMIT_ERROR, + """ + Token rate limit exceeded. + Request requires %d tokens but only %d remain. + Retry after %d seconds. + """.formatted(tokenRequired, visibleRemaining, tokenResetInSeconds).trim() + )) + .build(); + } + + @Override + public Response modelNotAllowed(String model, Collection allowedModels) { + return badRequest() + .json(error( + INVALID_REQUEST_ERROR, + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)) + )) + .build(); + } + + @Override + public Response authenticationFailed() { + return unauthorized() + .header(WWW_AUTHENTICATE, "Bearer") + .json(error(AUTHENTICATION_ERROR, "Invalid bearer token")) + .build(); + } + + @Override + public Response inputTokensExceeded(long maxTokens, long estimatedTokens) { + return badRequest() + .json(error( + INVALID_REQUEST_ERROR, + """ + prompt is too long: + %d tokens > %d maximum + """.formatted(estimatedTokens, maxTokens).trim() + )) + .build(); + } + + private String error(String type, String message) { + return ClaudeErrorResponse.builder() + .type("error") + .error( + ClaudeError.builder() + .type(type) + .message(message) + ) + .requestId("membrane_" + UUID.randomUUID()) + .toJson(); + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java new file mode 100644 index 0000000000..0ff004834e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java @@ -0,0 +1,114 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class ClaudeErrorResponse { + + private static final ObjectMapper om = new ObjectMapper(); + + private String type = "error"; + private ClaudeError error; + private String request_id; + + public static ClaudeErrorResponse builder() { + return new ClaudeErrorResponse(); + } + + public String getType() { + return type; + } + + public ClaudeErrorResponse type(String type) { + this.type = type; + return this; + } + + public ClaudeError getError() { + return error; + } + + public ClaudeErrorResponse error(ClaudeError error) { + this.error = error; + return this; + } + + public String getRequest_id() { + return request_id; + } + + public ClaudeErrorResponse requestId(String requestId) { + this.request_id = requestId; + return this; + } + + public String toJson() { + try { + return om.writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to serialize ClaudeErrorResponse", e); + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class ClaudeError { + + private String type; + private String message; + + public static ClaudeError builder() { + return new ClaudeError(); + } + + public String getType() { + return type; + } + + public ClaudeError type(String type) { + this.type = type; + return this; + } + + public String getMessage() { + return message; + } + + public ClaudeError message(String message) { + this.message = message; + return this; + } + + @Override + public String toString() { + return "ClaudeError{" + + "type='" + type + '\'' + + ", message='" + message + '\'' + + '}'; + } + } + + @Override + public String toString() { + return "ClaudeErrorResponse{" + + "type='" + type + '\'' + + ", error=" + error + + ", request_id='" + request_id + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java new file mode 100644 index 0000000000..1a0e66c3c3 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java @@ -0,0 +1,144 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; + +/** + * system field for system prompt + */ +public class ClaudeLLMRequest extends AbstractModelInputRequest implements ModelInputRequest { + + private static final Logger log = LoggerFactory.getLogger(ClaudeLLMRequest.class); + + public static final String X_API_KEY = "x-api-key"; + + public ClaudeLLMRequest(Exchange exchange) throws IOException { + super(exchange); + + exchange.getRequest().getHeader().setValue( "Accept-Encoding","identity"); + } + + public void setMaxOutputTokens(int maxOutputTokens) { + + // Thinking needs a certain number of tokens + if (maxOutputTokens < 2048 && isThinking()) { + log.info("maxOutputTokens is {}. Too low for thinking. Disabling thinking.", maxOutputTokens); + disableThinking(); + } + + json.put("max_tokens", maxOutputTokens); + + if (isThinking()) { + var thinking = (ObjectNode) json.path("thinking"); + if (!thinking.path("budget_tokens").isNull()) { + var budgetTokens = thinking.path("budget_tokens").asInt(); + if (budgetTokens >= maxOutputTokens) { + // budget_tokens must be smaller than max_tokens + // value might vary between models + thinking.put("budget_tokens", Math.min(maxOutputTokens / 2, 1024)); + } + } + + } + } + + @Override + public long estimateInputTokens() { + // System prompt + long tokens = json.path("system").asText().length() / 4; + + // Messages + for (var message : json.path("messages")) { + var content = message.path("content"); + if (content.isTextual()) { + tokens += content.asText().length() / 4; + } else if (content.isArray()) { + for (var block : content) { + var type = block.path("type").asText(); + if (type.equals("text")) { + tokens += block.path("text").asText().length() / 4; + } else if (type.equals("image")) { + tokens += 1000; + } + } + } + } + return tokens; + } + + /** + * Returns the system prompt from the top-level {@code "system"} field, + * or an empty string if no system prompt is set. + */ + @Override + public String getSystemPrompt() { + return json.path("system").asText(""); + } + + private boolean isThinking() { + var thinking = json.path("thinking"); + return thinking.isObject() && "enabled".equals(thinking.path("type").asText()); + } + + private void disableThinking() { + var thinking = json.putObject("thinking"); + thinking.put("type", "disabled"); + } + + @Override + public long getRequestedMaxOutputTokens() { + return json.path("max_tokens").asLong(0); + } + + @Override + public String getApiKey() { + return exchange.getRequest().getHeader().getFirstValue(X_API_KEY); + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(X_API_KEY); + exchange.getRequest().getHeader().add(X_API_KEY, apiKey); + } + + /** + * Concatenates all prompts (newline-separated) into the top-level {@code "system"} field. + * + *

Claude API wire format: + *

{@code { "system": "prompt 1\nprompt 2", "messages": [...] }}
+ */ + @Override + public void setSystemPrompts(List prompts) { + json.put("system", String.join("\n", prompts)); + } + + /** + * Removes the top-level {@code "system"} field entirely. + * Has no effect if no system prompt is present. + */ + @Override + public void removeSystemPrompt() { + json.remove("system"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java new file mode 100644 index 0000000000..8d534643ea --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java @@ -0,0 +1,101 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser.SSEEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class ClaudeLLMResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(ClaudeLLMResponse.class); + + private Usage usage; + + private final StringBuffer inputJson = new StringBuffer(); + + private String tool; + + public ClaudeLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange,postProcessor); + } + + @Override + public Set getTerminalEvents() { + return Set.of("message_stop"); + } + + @Override + public void process(SSEEvent event) { + log.debug("Event: {}", event); + + if ("content_block_start".equals(event.name())) { + var cbs = ContentBlockStart.from(event.json()); + if (cbs.getToolUse() != null) { + tool = cbs.getToolUse().getName(); + } + } + if ("message_delta".equals(event.name())) { + var md = MessageDelta.from(event.json()); + log.debug("Message delta: {}", md); + if (md.getUsage() != null) { + usage = md.getUsage(); + if (tool != null) + log.debug("Tool {} with {}", tool, inputJson.toString()); + } + } + if ("content_block_delta".equals(event.name())) { + var cbd = ContentBlockDelta.from(event.json()); + if (cbd.isInputJsonDelta()) { + inputJson.append(cbd.getPartialJson()); + } + } + } + + Usage extractUsage() { + + var usage = json.path("usage"); + + var inputTokens = getInputTokens(usage); + var outputTokens = getOutputTokens(usage); + var totalTokens = inputTokens + outputTokens; + return new Usage(inputTokens, outputTokens, totalTokens); + + } + + protected static int getOutputTokens(JsonNode usage) { + return usage.path("output_tokens").asInt(0); + } + + protected static int getInputTokens(JsonNode usage) { + return usage.path("input_tokens").asInt(0); + } + + @Override + public Usage getUsage() { + if (usage != null) + return usage; + return usage = extractUsage(); + } + +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java new file mode 100644 index 0000000000..decc7048b2 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java @@ -0,0 +1,48 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.io.IOException; +import java.util.function.Consumer; + +/** + * @description (Experimental) Anthroic Claude provider configuration + * Use to configure a LLM gateway to use the anthropic API + */ +@MCElement( name="claude") +public class ClaudeProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) throws IOException { + return new ClaudeLLMRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new ClaudeLLMResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new ClaudeErrorCreator(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java new file mode 100644 index 0000000000..5e5a0648bb --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java @@ -0,0 +1,53 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +public class ContentBlockDelta { + + private int index; + private String deltaType; + private String partialJson; + + public static ContentBlockDelta from(ObjectNode on) { + var cbd = new ContentBlockDelta(); + + cbd.index = on.path("index").asInt(); + + JsonNode delta = on.path("delta"); + cbd.deltaType = delta.path("type").asText(null); + cbd.partialJson = delta.path("partial_json").asText(""); + + return cbd; + } + + public boolean isInputJsonDelta() { + return "input_json_delta".equals(deltaType); + } + + public int getIndex() { + return index; + } + + public String getDeltaType() { + return deltaType; + } + + public String getPartialJson() { + return partialJson; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java new file mode 100644 index 0000000000..bdf2be207b --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java @@ -0,0 +1,37 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +public class ContentBlockStart { + + private ToolUse toolUse; + + public static ContentBlockStart from(ObjectNode on) { + var cbs = new ContentBlockStart(); + var cb = (ObjectNode) on.path("content_block"); + + if ("tool_use".equals(cb.path("type").asText())) { + cbs.toolUse = ToolUse.from(cb); + } + + return cbs; + } + + public ToolUse getToolUse() { + return toolUse; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java new file mode 100644 index 0000000000..4aa68fa737 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java @@ -0,0 +1,87 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; + +public class MessageDelta { + + private String stopReason; + private int inputTokens; + private int outputTokens; + private int cacheCreationInputTokens; + private int cacheReadInputTokens; + + private Usage usage; + + public static MessageDelta from(ObjectNode on) { + var md = new MessageDelta(); + + JsonNode delta = on.path("delta"); + md.stopReason = delta.path("stop_reason").asText(null); + + JsonNode u = on.path("usage"); + if (u.isObject()) { + md.inputTokens = u.path("input_tokens").asInt(0); + md.outputTokens = u.path("output_tokens").asInt(0); + md.cacheCreationInputTokens = u.path("cache_creation_input_tokens").asInt(0); + md.cacheReadInputTokens = u.path("cache_read_input_tokens").asInt(0); + + // Cache tokens (cache_creation_input_tokens and cache_read_input_tokens) are billable according to Claude's pricing model + int effectiveInputTokens = md.inputTokens + md.cacheCreationInputTokens + md.cacheReadInputTokens; + md.usage = new Usage(effectiveInputTokens,md.outputTokens, effectiveInputTokens + md.outputTokens); + + } + + return md; + } + + public String getStopReason() { + return stopReason; + } + + public int getInputTokens() { + return inputTokens; + } + + public int getOutputTokens() { + return outputTokens; + } + + public int getCacheCreationInputTokens() { + return cacheCreationInputTokens; + } + + public int getCacheReadInputTokens() { + return cacheReadInputTokens; + } + + public Usage getUsage() { + return usage; + } + + @Override + public String toString() { + return "MessageDelta{" + + "stopReason='" + stopReason + '\'' + + ", inputTokens=" + inputTokens + + ", outputTokens=" + outputTokens + + ", cacheCreationInputTokens=" + cacheCreationInputTokens + + ", cacheReadInputTokens=" + cacheReadInputTokens + + '}'; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java new file mode 100644 index 0000000000..5694468d9e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java @@ -0,0 +1,36 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ToolUse { + + private static final Logger log = LoggerFactory.getLogger(ToolUse.class); + + private String name; + + public static ToolUse from(ObjectNode on) { + var tu = new ToolUse(); + tu.name = on.path("name").asText(); + return tu; + } + + public String getName() { + return name; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java new file mode 100644 index 0000000000..1b86f0f39b --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java @@ -0,0 +1,114 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator; + +import java.util.Collection; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class GoogleErrorCreator extends AbstractLLMErrorCreator { + + @Override + public Response invalidRequestError(String message) { + return badRequest().json( + envelope(400, message, "INVALID_ARGUMENT") + ).build(); + } + + public Response tokenLimitExceeded(long tokenRequired, + long tokenRemaining, + long tokenResetInSeconds) { + + var visibleRemaining = Math.max(0, tokenRemaining); + + return statusCode(429).json( + envelope( + 429, + """ + Token rate limit exceeded. + Request requires %d tokens but only %d remain. + Retry after %d seconds. + """ + .formatted(tokenRequired, visibleRemaining, tokenResetInSeconds) + .trim(), + "RESOURCE_EXHAUSTED" + ) + ).build(); + } + + public Response modelNotAllowed(String model, + Collection allowedModels) { + + return badRequest().json( + envelope( + 400, + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)), + "INVALID_ARGUMENT" + ) + ).build(); + } + + public Response authenticationFailed() { + return unauthorized() + .header(WWW_AUTHENTICATE, "Bearer") + .json( + envelope( + 401, + "Invalid API key.", + "UNAUTHENTICATED" + ) + ).build(); + } + + public Response inputTokensExceeded(long maxTokens, + long estimatedTokens) { + + return badRequest().json( + envelope( + 400, + """ + The input token count (%d) exceeds the maximum allowed (%d). + """ + .formatted(estimatedTokens, maxTokens) + .trim(), + "INVALID_ARGUMENT" + ) + ).build(); + } + + private String envelope(int code, + String message, + String status) { + + return createJson(new ErrorEnvelope( + new ErrorBody(code, message, status) + )); + } + + private record ErrorEnvelope(ErrorBody error) { + } + + private record ErrorBody( + int code, + String message, + String status + ) { + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java new file mode 100644 index 0000000000..90f1b1ab36 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java @@ -0,0 +1,195 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.ModelInputRequest; + +import java.io.IOException; +import java.util.List; + +public class GoogleLLMRequest extends AbstractModelInputRequest implements ModelInputRequest { + + /** + * x-goog-api-key is correct it is not google + */ + public static final String X_GOOG_API_KEY = "x-goog-api-key"; + + public GoogleLLMRequest(Exchange exchange) throws IOException { + super(exchange); + } + + @Override + public String getModel() { + + var uri = exchange.getRequest().getUri(); + + if (uri == null) { + return null; + } + + // Example: + // /v1beta/models/gemini-2.5-pro:generateContent + int modelsIndex = uri.indexOf("/models/"); + if (modelsIndex < 0) { + return null; + } + + var modelPart = uri.substring(modelsIndex + "/models/".length()); + + // Support both ':' and URL-encoded '%3A' / '%3a' as separator before the action suffix + // (e.g. ':generateContent' or '%3AgenerateContent'). + int colonIndex = modelPart.indexOf(':'); + if (colonIndex < 0) { + colonIndex = modelPart.toLowerCase().indexOf("%3a"); + } + if (colonIndex >= 0) { + return modelPart.substring(0, colonIndex); + } + + return modelPart; + } + + @Override + public String getApiKey() { + return exchange.getRequest().getHeader().getFirstValue(X_GOOG_API_KEY); + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(X_GOOG_API_KEY); + exchange.getRequest().getHeader().add(X_GOOG_API_KEY, apiKey); + } + + @Override + public long getRequestedMaxOutputTokens() { + return json.path("generationConfig") + .path("maxOutputTokens") + .asLong(0); + } + + public long estimateInputTokens() { + if (json == null || json.isNull()) { + return 0; + } + + long chars = countText(json.path("systemInstruction")); + + var contents = json.path("contents"); + if (contents.isArray()) { + for (JsonNode content : contents) { + chars += countText(content.path("parts")); + } + } + + // Safety margin for JSON structure, roles, metadata, etc. + return Math.max(1, Math.round(chars / 4.0 * 1.15)); + } + + /** + * Returns the text of the first part inside {@code systemInstruction}, + * or an empty string if no system prompt is set. + * + *

Gemini API wire format: + *

{@code
+     * { "systemInstruction": { "parts": [{ "text": "You are a helpful assistant." }] } }
+     * }
+ */ + @Override + public String getSystemPrompt() { + for (var part : json.path("systemInstruction").path("parts")) { + if (part.path("text").isTextual()) { + return part.path("text").asText(""); + } + } + return ""; + } + + /** + * Concatenates all prompts (newline-separated) into a single text part under + * {@code systemInstruction}. Replaces any existing system instruction. + * + *

Gemini API wire format: + *

{@code { "systemInstruction": { "parts": [{ "text": "prompt 1\nprompt 2" }] } }}
+ */ + @Override + public void setSystemPrompts(List prompts) { + json.putObject("systemInstruction") + .putArray("parts") + .addObject() + .put("text", String.join("\n", prompts)); + } + + /** + * Removes the {@code systemInstruction} field entirely. + * Has no effect if no system instruction is present. + */ + @Override + public void removeSystemPrompt() { + json.remove("systemInstruction"); + } + + private long countText(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + + if (node.isTextual()) { + return node.asText().length(); + } + + if (node.isObject()) { + long chars = 0; + + JsonNode text = node.get("text"); + if (text != null && text.isTextual()) { + chars += text.asText().length(); + } + + JsonNode parts = node.get("parts"); + if (parts != null) { + chars += countText(parts); + } + + return chars; + } + + if (node.isArray()) { + long chars = 0; + for (JsonNode child : node) { + chars += countText(child); + } + return chars; + } + + return 0; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + getGenerationConfig().put("maxOutputTokens", maxOutputTokens); + } + + private ObjectNode getGenerationConfig() { + var gc = json.get("generationConfig"); + if (gc instanceof ObjectNode objectNode) { + return objectNode; + } + return json.putObject("generationConfig"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java new file mode 100644 index 0000000000..abf1c0a592 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java @@ -0,0 +1,58 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; + +import java.util.Set; +import java.util.function.Consumer; + +public class GoogleLLMResponse extends AbstractLLMResponse { + + public GoogleLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + var usage = json.path("usageMetadata"); + + int inputTokens = usage.path("promptTokenCount").asInt(0); + int thoughtsTokens = usage.path("thoughtsTokenCount").asInt(0); + int candidatesTokenCount = usage.path("candidatesTokenCount").asInt(0); + int outputTokens = thoughtsTokens + candidatesTokenCount; + int totalTokens = usage.path("totalTokenCount").asInt(inputTokens + outputTokens); + + return new Usage( + inputTokens, + outputTokens, + totalTokens + ); + } + + @Override + public Set getTerminalEvents() { + return Set.of("response.completed","response.incompleted"); + } + + @Override + public void process(SSEParser.SSEEvent event) { + + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java new file mode 100644 index 0000000000..0654b9b52f --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java @@ -0,0 +1,48 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.io.IOException; +import java.util.function.Consumer; + +/** + * @description (Experimental)Google AI provider configuration + * Use to configure a LLM gateway to use the Google LLM API + */ +@MCElement( name="google",id = "google-ai-provider") +public class GoogleProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) throws IOException { + return new GoogleLLMRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new GoogleLLMResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new GoogleErrorCreator(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java new file mode 100644 index 0000000000..9e75ef5ec5 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java @@ -0,0 +1,104 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.fasterxml.jackson.databind.JsonNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractModelInputRequest; + +import java.io.IOException; + +public abstract class AbstractOpenAiLLMRequest extends AbstractModelInputRequest { + + public AbstractOpenAiLLMRequest(Exchange exchange) throws IOException { + super(exchange); + } + + @Override + public long estimateInputTokens() { + + long chars = countText(json.path("input")); + + chars += estimateChatCompletitions(); + + // system instructions: "system" (chat completions) or "instructions" (responses API) + chars += countText(json.path("system")); + chars += countText(json.path("instructions")); + + // tools/functions contribute significantly + chars += countJsonSize(json.path("tools")); + chars += countJsonSize(json.path("functions")); + + // safety margin for JSON structure and tokenizer variance + return Math.max(1, Math.round(chars / 4.0 * 1.15)); + } + + private long estimateChatCompletitions() { + long chars = 0; + // Chat Completions API + var messages = json.path("messages"); + if (messages.isArray()) { + for (var message : messages) { + chars += countText(message.path("content")); + // roles also consume tokens + chars += message.path("role").asText("").length(); + } + } + return chars; + } + + private long countText(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + + if (node.isTextual()) { + return node.asText().length(); + } + + if (node.isArray()) { + long chars = 0; + for (JsonNode child : node) { + chars += countText(child); + } + return chars; + } + + if (node.isObject()) { + + // OpenAI content blocks: + // { "type": "text", "text": "..." } + long chars = 0; + + var text = node.get("text"); + if (text != null && text.isTextual()) { + chars += text.asText().length(); + } + + chars += countText(node.get("content")); + + return chars; + } + + return 0; + } + + private long countJsonSize(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + return node.toString().length(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java new file mode 100644 index 0000000000..b26e2794e2 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java @@ -0,0 +1,31 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest; + +import java.io.IOException; + +public class OpenAIChatCompletionsRequest extends ChatCompletionsRequest { + public OpenAIChatCompletionsRequest(Exchange exchange) throws IOException { + super(exchange); + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_completion_tokens", maxOutputTokens); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java new file mode 100644 index 0000000000..d24e5154c1 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java @@ -0,0 +1,62 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsResponse; + +import java.io.IOException; +import java.util.function.Consumer; + +/** + * @description OpenAI provider configuration + * Use to configure a LLM gateway to use the OpenAI API + */ +@MCElement( name="openai") +public class OpenAIProvider extends AbstractLLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) throws IOException { + var uri = exchange.getRequest().getUri(); + if (uri.startsWith("/v1/chat/completions")) { + return new OpenAIChatCompletionsRequest(exchange); + } + if (uri.startsWith("/v1/responses")) { + return new OpenAiLLMResponsesRequest(exchange); + } + return super.getLLMRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange exchange, Consumer postProcessor) { + var uri = exchange.getRequest().getUri(); + if (uri.startsWith("/v1/responses")) { + return new OpenAiLLMResponsesResponse(exchange,postProcessor); + } + return new ChatCompletionsResponse(exchange, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new ChatCompletionsErrorCreator(); + } + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java new file mode 100644 index 0000000000..945ede46e4 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java @@ -0,0 +1,77 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.core.exchange.Exchange; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.emptyList; + +public class OpenAiLLMResponsesRequest extends AbstractOpenAiLLMRequest { + + public OpenAiLLMResponsesRequest(Exchange exchange) throws IOException { + super(exchange); + } + + public List getTools() { + var tools = getToolsNode(); + if (tools == null) + return emptyList(); + return tools.valueStream() + .filter(n -> "function".equals(n.path("type").asText(""))) + .map(n -> n.path("name").asText("")) + .filter(name -> !name.isEmpty()) + .toList(); + } + + @Override + public String getSystemPrompt() { + return json.path("instructions").asText(""); + } + + /** + * Concatenates all prompts (newline-separated) into the {@code "instructions"} field. + * + *

OpenAI Responses API wire format: + *

{@code { "instructions": "prompt 1\nprompt 2", "input": "..." }}
+ */ + @Override + public void setSystemPrompts(List prompts) { + json.put("instructions", String.join("\n", prompts)); + } + + /** + * Removes the {@code "instructions"} field entirely. + * Has no effect if no system prompt is present. + */ + @Override + public void removeSystemPrompt() { + json.remove("instructions"); + } + + @Override + public long getRequestedMaxOutputTokens() { + if (json.has("max_output_tokens")) + return json.get("max_output_tokens").asLong(); + return -1; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_output_tokens", maxOutputTokens); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java new file mode 100644 index 0000000000..15263fbd55 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java @@ -0,0 +1,75 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class OpenAiLLMResponsesResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(OpenAiLLMResponsesResponse.class); + + public OpenAiLLMResponsesResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + + var usage = json.path("usage"); + + // For streamed response.completed events + if (usage.isMissingNode() || usage.isNull()) { + usage = json.path("response").path("usage"); + } + + var inputTokens = getInputTokens(usage); + var outputTokens = getOutputTokens(usage); + var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens); + return new Usage(inputTokens, outputTokens, totalTokens); + + } + + @Override + public Set getTerminalEvents() { + return Set.of("response.completed", "response.incomplete"); + } + + @Override + protected void processTerminalEvent(SSEParser.SSEEvent terminal) { + json = JsonUtil.getJsonObject(terminal.data()) + .orElse(JsonNodeFactory.instance.objectNode() + .put("error", "No JSON object response from model.")); + } + + @Override + public void process(SSEParser.SSEEvent e) { + log.debug("Event: {}", e.name()); + log.debug("Data: {}", e.data()); + var event = AbstractLLMEvent.create(e); + log.debug("Event: {}", event); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java new file mode 100644 index 0000000000..8d4b5bbb25 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OrganizationRequest.java @@ -0,0 +1,11 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.core.exchange.Exchange; + +import java.io.IOException; + +public class OrganizationRequest extends AbstractOpenAiLLMRequest { + public OrganizationRequest(Exchange exchange) throws IOException { + super(exchange); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java new file mode 100644 index 0000000000..c764e17ac9 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java @@ -0,0 +1,43 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.core.router.Router; + +import java.util.Optional; + +/** + * @TODO + * - Store .status, .error, .model, .stop_reason + */ +public interface AiApiStore { + + default void init(Router router) { + } + + void store(AiApiUser user, Usage usage); + + Optional getUser(String token); + + /** + * Checks if the user has enough tokens to make the request. + * @param user The user to check + * @return Estimated number of tokens that the user has left after this request + */ + long checkLimit(AiApiUser user, long inputTokens, long outputTokens); + + long getRemainingResetTime(); +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java new file mode 100644 index 0000000000..d2a5c9b018 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java @@ -0,0 +1,109 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.util.ConfigurationException; + +import java.util.concurrent.atomic.AtomicLong; + +import static java.lang.Long.MAX_VALUE; + +@MCElement(name = "users", component = false, id = "ai-api-users") +public class AiApiUser { + + private String name; + private String apiKey; + + private long tokens = 0; + + private final AtomicLong tokensUsedInPeriod = new AtomicLong(); + + /** + * Updates the store with the number of tokens used in this call + * + * @param usage The number of tokens used + */ + public void addTokensUsedInPeriod(Usage usage) { + tokensUsedInPeriod.addAndGet(usage.totalTokens()); + } + + public void resetTokensUsedInPeriod() { + tokensUsedInPeriod.set(0); + } + + /** + * Checks if the user has enough tokens to make the request. + * + * @param tokensNeededForRequest The number of tokens that the user needs to make the request + * @return The estimated number of tokens that the user has left after this request + */ + public long checkLimit(long tokensNeededForRequest) { + if (tokens == 0) + return MAX_VALUE; + return this.tokens - tokensUsedInPeriod.get() - tokensNeededForRequest; + } + + public String getName() { + return name; + } + + /** + * @param name of the user + * @description Name of the API user, group or cost center. + */ + @MCAttribute() + public void setName(String name) { + this.name = name; + } + + public String getApiKey() { + return apiKey; + } + + /** + * @description API key to authenticate the user at the llm gateway + * @default (not set) + * @param apikey to authenticate the user + */ + @MCAttribute() + public void setApiKey(String apikey) { + this.apiKey = apikey; + } + + + public long getTokens() { + return tokens; + } + + /** + * @description Number of tokens that the user has available within the current period. + * @default 0 (no limit) + * @param tokens available tokens + */ + @MCAttribute + public void setTokens(long tokens) { + if (tokens < 0) { + throw new ConfigurationException("tokens must be >= 0"); + } + this.tokens = tokens; + } + + @Override + public String toString() { + return "user(name: %s)".formatted(name); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java new file mode 100644 index 0000000000..7541c08a2c --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java @@ -0,0 +1,93 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.router.Router; +import com.predic8.membrane.core.util.jdbc.AbstractJdbcSupport; + +import java.sql.SQLException; +import java.util.Optional; + +/** + * @description Stores AI API usage in a database (experimental). + */ +@MCElement(name = "jdbcAiApiUsageStore") +public class JDBCAiApiUsageStore extends AbstractJdbcSupport implements AiApiStore { + + // @TODO GENERATED ALWAYS AS IDENTITY is PostgreSQL specific + private static final String CREATE_TABLE_SQL = """ + CREATE TABLE IF NOT EXISTS ai_api_usage ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + username VARCHAR(255) NOT NULL, + input_tokens INT NOT NULL, + output_tokens INT NOT NULL, + total_tokens INT NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """; + + private static final String INSERT_SQL = """ + INSERT INTO ai_api_usage ( + username, + input_tokens, + output_tokens, + total_tokens + ) VALUES (?, ?, ?, ?) + """; + + @Override + public void init(Router router) { + super.init(router); + createTablesIfNotExist(); + } + + @Override + public void store(AiApiUser user, com.predic8.membrane.core.interceptor.llmgateway.store.Usage usage) { + try (var connection = getConnection(); var ps = connection.prepareStatement(INSERT_SQL)) { + ps.setString(1, user.getName()); + ps.setInt(2, usage.inputTokens()); + ps.setInt(3, usage.outputTokens()); + ps.setInt(4, usage.totalTokens()); + + ps.executeUpdate(); + } catch (SQLException e) { + throw new RuntimeException("Could not store AI API usage.", e); + } + } + + @Override + public Optional getUser(String token) { + return Optional.empty(); + } + + @Override + public long checkLimit(AiApiUser user, long inputTokens, long outputTokens) { + return 0; + } + + @Override + public long getRemainingResetTime() { + return 0; + } + + private void createTablesIfNotExist() { + try (var connection = getConnection(); var ps = connection.prepareStatement(CREATE_TABLE_SQL)) { + ps.executeUpdate(); + } catch (SQLException e) { + throw new RuntimeException("Could not create AI API usage table.", e); + } + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java new file mode 100644 index 0000000000..9f7e91b210 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java @@ -0,0 +1,129 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCChildElement; +import com.predic8.membrane.annot.MCElement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.GuardedBy; +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import static java.time.Instant.now; + +/** + * @description Simple store for the LLM Gateway that stores limits in memory. Users and keys can + * be configured in the configuration file. + */ +@MCElement(name="simpleStore",component = false, id="simple-ai-api-store") +public class SimpleAiApiStore implements AiApiStore { + + private static final Logger log = LoggerFactory.getLogger(SimpleAiApiStore.class); + + @GuardedBy("lock") + private List users = Collections.emptyList(); + + private boolean logUsage = true; + + private final Object lock = new Object(); + + @GuardedBy("lock") + private Instant nextReset; + + private long limitResetPeriod = 60; + + @Override + public void store(AiApiUser user, Usage usage) { + if (logUsage) + log.info("user: {} {}", user.getName(), usage.toString()); + user.addTokensUsedInPeriod(usage); + } + + @Override + public Optional getUser(String token) { + synchronized (lock) { + return users.stream().filter(u -> u.getApiKey().equals(token)).findFirst(); + } + } + + @Override + public long checkLimit(AiApiUser user, long inputTokens, long outputTokens) { + if (user == null) + return 0; // anonymous user gets no tokens + + synchronized (lock) { + var now = now(); + if (nextReset == null || now.isAfter(nextReset)) { + nextReset = now.plusSeconds(limitResetPeriod); + log.info("Resetting AI API token usage limit."); + users.forEach(AiApiUser::resetTokensUsedInPeriod); + } + } + + return user.checkLimit(inputTokens + outputTokens); + } + + @Override + public long getRemainingResetTime() { + synchronized (lock) { + return nextReset == null ? 0 : (nextReset.toEpochMilli() - now().toEpochMilli()) / 1000; + } + } + + + /** + * List of users that can be used for authentication. + * @param users User list + */ + @MCChildElement(allowForeign = true,order = 10) + public void setUsers(List users) { + synchronized (lock) { + this.users = users; + } + } + + public List getUsers() { + synchronized (lock) { + return List.copyOf(users); + } + } + + public long getLimitResetPeriod() { + return limitResetPeriod; + } + + /** + * @description The period in seconds after which the token limit is reset. + * @param limitResetPeriod in seconds, e.g. 3600 for 1 hour + */ + @MCAttribute + public void setLimitResetPeriod(long limitResetPeriod) { + this.limitResetPeriod = limitResetPeriod; + } + + public boolean isLogUsage() { + return logUsage; + } + + public void setLogUsage(boolean logUsage) { + this.logUsage = logUsage; + } +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java new file mode 100644 index 0000000000..3bcc626858 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java @@ -0,0 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +public record Usage(int inputTokens, int outputTokens, int totalTokens) {} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java index 34fe64dbad..b2f2f10a5c 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/log/LogInterceptor.java @@ -146,6 +146,10 @@ private String dumpHeaderFields(Message msg) { } private static String dumpBody(Message msg) { + if (msg.isBinary()) { + return "[Binary]"; + } + try { return "Body:\n%s\n".formatted(msg.getBodyAsStringDecoded()); } catch (Exception e) { diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java index 319636fe29..ed9f1608d8 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java @@ -1,29 +1,32 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.predic8.membrane.core.exchange.AbstractExchange; -import com.predic8.membrane.core.interceptor.mcp.MCPUtil.InvalidToolArgumentsException; import com.predic8.membrane.core.mcp.MCPToolsCall; import com.predic8.membrane.core.mcp.MCPToolsCallResponse; import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; +import java.util.*; import static com.predic8.membrane.core.interceptor.mcp.ExchangeUtils.matchesExchangeFilter; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalBooleanArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalIntArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalSizeArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalStringArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getRequiredLongArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.rejectUnexpectedArguments; +import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.*; import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.integer; import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.string; import static java.lang.Integer.MAX_VALUE; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java index 274e150d3d..5e3c02c846 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.AbstractExchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java index 148965c53e..b3c5b866df 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.AbstractExchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java index addf83e551..dd9b291c9a 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.http.Header; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java index 14539ca1cc..d03b7d1b05 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import java.util.Collections; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java index 615e22d543..0bff5dbcab 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPInitialize.ClientInfo; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java index e67394c480..d607e51b7f 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPInitialize; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java index 49d14cbb4f..f07b51b8fd 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPToolsListResponse; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java index b8bcf4acf3..58910c945c 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.Exchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java index 1dde3ef18a..0480bc715e 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import java.util.Collection; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java index a22da0473f..4fc1bab70e 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.core.JsonProcessingException; @@ -9,14 +23,7 @@ import com.predic8.membrane.core.interceptor.mcp.MCPUtil.InvalidToolArgumentsException; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; import com.predic8.membrane.core.jsonrpc.JSONRPCResponse; -import com.predic8.membrane.core.mcp.MCPInitialize; -import com.predic8.membrane.core.mcp.MCPInitializeResponse; -import com.predic8.membrane.core.mcp.MCPInitialized; -import com.predic8.membrane.core.mcp.MCPPing; -import com.predic8.membrane.core.mcp.MCPToolsCall; -import com.predic8.membrane.core.mcp.MCPToolsCallResponse; -import com.predic8.membrane.core.mcp.MCPToolsList; -import com.predic8.membrane.core.mcp.MCPToolsListResponse; +import com.predic8.membrane.core.mcp.*; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,13 +43,7 @@ import static com.predic8.membrane.core.interceptor.mcp.McpSessionContext.McpSessionState.INITIALIZED; import static com.predic8.membrane.core.interceptor.mcp.McpSessionContext.McpSessionState.READY; import static com.predic8.membrane.core.jsonrpc.JSONRPCRequest.parse; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INTERNAL_ERROR; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INVALID_PARAMS; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INVALID_REQUEST; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_METHOD_NOT_FOUND; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_PARSE_ERROR; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.error; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.success; +import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.*; /** * @description MCP Server for Membrane. It allows querying Membrane's internal state and operation from an LLM diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java index 65f3b3b86b..f9b802c163 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -15,7 +29,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.util.List; import java.util.Map; import java.util.Objects; diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java index ebe1a2513e..02d95507fb 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java @@ -1,10 +1,24 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; -import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializerProvider; diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java index 158fa9e087..922c391ef1 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import com.fasterxml.jackson.databind.JsonNode; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java index a8d94ca230..9aa79a1ca1 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java index c3227bae7a..a49610a9ef 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java index 27d63ee716..f39698807a 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java index eeefa8e421..3e106d3aa3 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java index b7aa3c8ce1..3746a156ba 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java index 1d47fcf638..059766bc63 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java @@ -1,9 +1,21 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; -import java.util.Objects; - import static java.util.Objects.requireNonNull; /** diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java index f9a27bd1d6..faaba2786f 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCResponse; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java index 1724b5022e..51055b2ed8 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java index 026b032375..e221eced60 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.*; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java index c0beba054c..dbada1138f 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java index ad78539949..48d72fbef9 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java b/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java new file mode 100644 index 0000000000..d19a808e3a --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/multipart/MultipartUtil.java @@ -0,0 +1,98 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.multipart; + +import com.predic8.membrane.core.http.Header; +import com.predic8.membrane.core.http.Message; +import com.predic8.membrane.core.util.MessageUtil; +import jakarta.mail.internet.ParseException; +import org.apache.commons.fileupload.MultipartStream; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Utility for splitting multipart HTTP messages into their individual {@link Part}s. + * + *

Example: + *

{@code
+ * List parts = MultipartUtil.split(exchange.getRequest());
+ * for (Part part : parts) {
+ *     String name = part.getName();          // form field name
+ *     String type = part.getContentType();   // e.g. "image/png"
+ *     byte[] body = part.getBody();
+ * }
+ * }
+ */ +public class MultipartUtil { + + /** + * Splits a multipart message into its individual parts. + * The MIME boundary is read from the message's {@code Content-Type} header. + * + * @param message a request or response whose Content-Type is multipart/* + * @return parts in wire order; never null, may be empty + * @throws IOException on I/O or parse errors + * @throws ParseException if the Content-Type header cannot be parsed + */ + public static List split(Message message) throws IOException, ParseException { + var contentType = message.getHeader().getContentTypeObject(); + if (contentType == null) { + throw new IOException("No Content-Type header"); + } + String boundary = contentType.getParameter("boundary"); + if (boundary == null) { + throw new IOException("No boundary parameter in Content-Type: " + contentType); + } + return split(message, boundary); + } + + /** + * Splits a multipart message into its individual parts using an explicit boundary. + * + * @param message a request or response with a multipart body + * @param boundary the MIME boundary string (without leading {@code --}) + * @return parts in wire order; never null, may be empty + * @throws IOException on I/O or unsupported Content-Transfer-Encoding + */ + @SuppressWarnings("deprecation") + public static List split(Message message, String boundary) throws IOException { + List result = new ArrayList<>(); + + MultipartStream ms = new MultipartStream(MessageUtil.getContentAsStream(message), boundary.getBytes(UTF_8)); + boolean hasNext = ms.skipPreamble(); + while (hasNext) { + Header partHeader = new Header(ms.readHeaders()); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ms.readBodyData(baos); + + // Only binary-safe encodings are supported; base64/QP would corrupt binary parts + String cte = partHeader.getFirstValue("Content-Transfer-Encoding"); + if (cte != null && !cte.equalsIgnoreCase("binary") + && !cte.equalsIgnoreCase("8bit") + && !cte.equalsIgnoreCase("7bit")) { + throw new IOException("Content-Transfer-Encoding '" + cte + "' is not supported."); + } + + result.add(new Part(partHeader, baos.toByteArray())); + hasNext = ms.readBoundary(); + } + return result; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/Part.java b/core/src/main/java/com/predic8/membrane/core/multipart/Part.java index 6d3e58c027..5415954eec 100644 --- a/core/src/main/java/com/predic8/membrane/core/multipart/Part.java +++ b/core/src/main/java/com/predic8/membrane/core/multipart/Part.java @@ -14,141 +14,120 @@ package com.predic8.membrane.core.multipart; -import com.predic8.membrane.core.http.*; +import com.predic8.membrane.core.http.Header; -import javax.xml.namespace.*; -import javax.xml.stream.*; -import javax.xml.stream.events.*; -import java.io.*; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import static java.nio.charset.StandardCharsets.*; -import static org.apache.commons.codec.binary.Base64.*; +import static java.nio.charset.StandardCharsets.UTF_8; +/** + * A single part of a multipart HTTP message, consisting of a header block and a body. + * + * @see MultipartUtil#split(com.predic8.membrane.core.http.Message) + */ public class Part { - private final Header header; - private final byte[] data; - - public Part(Header header, byte[] data) { - this.header = header; - this.data = data; - } - - public String getContentID() { - return header.getFirstValue("Content-ID"); - } - - public Header getHeader() { - return header; - } - - public InputStream getInputStream() { - return new ByteArrayInputStream(data); - } - - public XMLEvent asXMLEvent() { - return new Characters() { - - @Override - public void writeAsEncodedUnicode(Writer writer) { - throw new RuntimeException("not implemented"); - } - - @Override - public boolean isStartElement() { - return false; - } - - @Override - public boolean isStartDocument() { - return false; - } - - @Override - public boolean isProcessingInstruction() { - return false; - } - - @Override - public boolean isNamespace() { - return false; - } - - @Override - public boolean isEntityReference() { - return false; - } - - @Override - public boolean isEndElement() { - return false; - } - - @Override - public boolean isEndDocument() { - return false; - } - - @Override - public boolean isCharacters() { - return true; - } - - @Override - public boolean isAttribute() { - return false; - } - - @Override - public QName getSchemaType() { - return null; - } - - @Override - public Location getLocation() { - return null; - } - - @Override - public int getEventType() { - return CHARACTERS; - } - - @Override - public StartElement asStartElement() { - return null; - } - - @Override - public EndElement asEndElement() { - return null; - } - - @Override - public Characters asCharacters() { - return this; - } - - @Override - public String getData() { - return new String(encodeBase64(data), UTF_8); - } - - @Override - public boolean isWhiteSpace() { - return false; - } - - @Override - public boolean isCData() { - return false; - } - - @Override - public boolean isIgnorableWhiteSpace() { - return false; - } - }; - } - + private static final Pattern NAME_PATTERN = + Pattern.compile("(?i)\\bname=\"([^\"]+)\""); + private static final Pattern FILENAME_PATTERN = + Pattern.compile("(?i)\\bfilename=\"([^\"]+)\""); + + private final Header header; + private final byte[] body; + + public Part(Header header, byte[] body) { + this.header = header; + this.body = body; + } + + // ------------------------------------------------------------------------- + // Header accessors + // ------------------------------------------------------------------------- + + /** + * Returns the part's own header block (may contain Content-Type, Content-ID, etc.). + */ + public Header getHeader() { + return header; + } + + /** + * Returns the {@code Content-ID} header value, or {@code null} if absent. + * Used in MIME multipart/related messages (e.g. SOAP XOP). + */ + public String getContentID() { + return header.getFirstValue("Content-ID"); + } + + /** + * Returns the {@code Content-Type} of this part (e.g. {@code "image/png"}), + * or {@code null} if no Content-Type header is present. + */ + public String getContentType() { + return header.getContentType(); + } + + /** + * Returns the {@code name} parameter from the {@code Content-Disposition} header. + * This is the form field name in {@code multipart/form-data} submissions. + * Returns {@code null} if not present. + */ + public String getName() { + return extractDispositionParam(NAME_PATTERN); + } + + /** + * Returns the {@code filename} parameter from the {@code Content-Disposition} header, + * or {@code null} if not present. + */ + public String getFilename() { + return extractDispositionParam(FILENAME_PATTERN); + } + + // ------------------------------------------------------------------------- + // Body accessors + // ------------------------------------------------------------------------- + + /** + * Returns the raw body bytes of this part. + */ + public byte[] getBody() { + return body; + } + + /** + * Returns the body decoded as a UTF-8 string. + */ + public String getBodyAsString() { + return getBodyAsString(UTF_8); + } + + /** + * Returns the body decoded using the given charset. + */ + public String getBodyAsString(Charset charset) { + return new String(body, charset); + } + + /** + * Returns a fresh {@link InputStream} over the body bytes. + */ + public InputStream getInputStream() { + return new ByteArrayInputStream(body); + } + + // ------------------------------------------------------------------------- + // Internal helpers + // ------------------------------------------------------------------------- + + private String extractDispositionParam(Pattern pattern) { + String disposition = header.getFirstValue("Content-Disposition"); + if (disposition == null) return null; + Matcher m = pattern.matcher(disposition); + return m.find() ? m.group(1) : null; + } } diff --git a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java index e9aa70e6e2..c2253e333a 100644 --- a/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java +++ b/core/src/main/java/com/predic8/membrane/core/multipart/XOPReconstitutor.java @@ -14,20 +14,31 @@ package com.predic8.membrane.core.multipart; -import com.predic8.membrane.core.http.*; -import com.predic8.membrane.core.util.*; -import jakarta.mail.internet.*; -import org.apache.commons.fileupload.*; -import org.slf4j.*; - -import javax.annotation.concurrent.*; -import javax.xml.namespace.*; +import com.predic8.membrane.core.http.BodyCollectingMessageObserver; +import com.predic8.membrane.core.http.Header; +import com.predic8.membrane.core.http.Message; +import com.predic8.membrane.core.util.EndOfStreamException; +import com.predic8.membrane.core.util.MessageUtil; +import jakarta.mail.internet.ContentType; +import jakarta.mail.internet.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.ThreadSafe; +import javax.xml.namespace.QName; import javax.xml.stream.*; -import javax.xml.stream.events.*; -import java.io.*; -import java.util.*; - -import static java.nio.charset.StandardCharsets.*; +import javax.xml.stream.events.Characters; +import javax.xml.stream.events.EndElement; +import javax.xml.stream.events.StartElement; +import javax.xml.stream.events.XMLEvent; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Writer; +import java.util.HashMap; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.commons.codec.binary.Base64.encodeBase64; /** * Reassemble a multipart XOP message (see @@ -48,7 +59,7 @@ public XOPReconstitutor() { xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); } - public InputStream reconstituteIfNecessary(Message message) throws IOException { + public InputStream reconstituteIfNecessary(Message message) { try { Message reconstitutedMessage = getReconstitutedMessage(message); if (reconstitutedMessage != null) @@ -90,7 +101,7 @@ public Message getReconstitutedMessage(Message message) throws ParseException, I if (boundary == null) return null; - HashMap parts = split(message, boundary); + HashMap parts = splitById(message, boundary); Part startPart = parts.get(start); if (startPart == null) return null; @@ -132,36 +143,16 @@ public boolean shouldNotContainBody() { return m; } - @SuppressWarnings("deprecation") - private HashMap split(Message message, String boundary) - throws IOException, EndOfStreamException { - HashMap parts = new HashMap<>(); - - MultipartStream multipartStream = new MultipartStream(MessageUtil.getContentAsStream(message), boundary.getBytes(UTF_8)); - boolean nextPart = multipartStream.skipPreamble(); - while(nextPart) { - Header header = new Header(multipartStream.readHeaders()); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - multipartStream.readBodyData(baos); - - // see http://www.iana.org/assignments/transfer-encodings/transfer-encodings.xml - String cte = header.getFirstValue("Content-Transfer-Encoding"); - if (cte != null && - !cte.equals("binary") && - !cte.equals("8bit") && - !cte.equals("7bit")) - throw new RuntimeException("Content-Transfer-Encoding '" + cte + "' not implemented."); - - - Part part = new Part(header, baos.toByteArray()); + /** Splits the multipart message and indexes parts by Content-ID for XOP lookup. */ + private HashMap splitById(Message message, String boundary) throws IOException { + HashMap byId = new HashMap<>(); + for (Part part : MultipartUtil.split(message, boundary)) { String id = part.getContentID(); if (id != null) { - parts.put(id, part); + byId.put(id, part); } - - nextPart = multipartStream.readBoundary(); } - return parts; + return byId; } private byte[] fillInXOPParts(InputStream inputStream, @@ -189,7 +180,7 @@ private byte[] fillInXOPParts(InputStream inputStream, if (p == null) throw new RuntimeException("Did not find multipart with id " + href); - writer.add(p.asXMLEvent()); + writer.add(base64CharactersEvent(p.getBody())); xopIncludeOpen = true; continue; } @@ -212,4 +203,33 @@ private byte[] fillInXOPParts(InputStream inputStream, return baos.toByteArray(); } + /** Wraps raw bytes as a base64-encoded XML Characters event for XOP inlining. */ + private static Characters base64CharactersEvent(byte[] data) { + String encoded = new String(encodeBase64(data), UTF_8); + return new Characters() { + @Override public String getData() { return encoded; } + @Override public boolean isCharacters() { return true; } + @Override public boolean isWhiteSpace() { return false; } + @Override public boolean isCData() { return false; } + @Override public boolean isIgnorableWhiteSpace() { return false; } + @Override public int getEventType() { return CHARACTERS; } + @Override public Characters asCharacters() { return this; } + @Override public boolean isStartElement() { return false; } + @Override public boolean isEndElement() { return false; } + @Override public boolean isStartDocument() { return false; } + @Override public boolean isEndDocument() { return false; } + @Override public boolean isAttribute() { return false; } + @Override public boolean isNamespace() { return false; } + @Override public boolean isEntityReference() { return false; } + @Override public boolean isProcessingInstruction() { return false; } + @Override public QName getSchemaType() { return null; } + @Override public Location getLocation() { return null; } + @Override public StartElement asStartElement() { return null; } + @Override public EndElement asEndElement() { return null; } + @Override public void writeAsEncodedUnicode(Writer writer) { + throw new UnsupportedOperationException(); + } + }; + } + } diff --git a/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java b/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java index b936ec3385..b3950b727a 100644 --- a/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java +++ b/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java @@ -13,7 +13,7 @@ limitations under the License. */ package com.predic8.membrane.core.security; -import com.predic8.membrane.core.exchange.*; +import com.predic8.membrane.core.exchange.Exchange; import java.util.*; @@ -58,4 +58,5 @@ public boolean hasScope(String scope) { public Set getScopes() { return scopes; } + } diff --git a/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java new file mode 100644 index 0000000000..405312ba4e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java @@ -0,0 +1,176 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.util.http; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.http.Chunk; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +public final class SSEParser { + + private static final Logger log = LoggerFactory.getLogger(SSEParser.class); + + private final Set terminalEventNames; + private final StringBuilder buffer = new StringBuilder(); + + private final List events = new ArrayList<>(); + + private String eventName; + private final StringBuilder data = new StringBuilder(); + + private boolean terminalFound; + + public SSEParser(Set terminalEventNames) { + this.terminalEventNames = terminalEventNames; + } + + public boolean parse(Chunk chunk) { + if (terminalFound) { + return true; + } + + log.debug("Parsing SSE chunk: {}", chunk); + + buffer.append(chunk.toString()); + + int lineEnd; + while ((lineEnd = findLineEnd(buffer)) >= 0) { + String line = readLine(buffer, lineEnd); + + if (line.isEmpty()) { + var event = buildEvent(); + resetEvent(); + + if (event != null) { + events.add(event); + + if ((event.name() != null && terminalEventNames.contains(event.name())) || "[DONE]".equals(event.data())) { + terminalFound = true; + return true; + } + } + + continue; + } + + parseLine(line); + } + + return false; + } + + public List getEvents() { + return List.copyOf(events); + } + + public Optional getTerminalEvent() { + if (!terminalFound || events.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(events.getLast()); + } + + private SSEEvent buildEvent() { + if (eventName == null && data.isEmpty()) { + return null; + } + + return new SSEEvent(eventName, data.isEmpty() ? null : data.toString()); + } + + private void resetEvent() { + eventName = null; + data.setLength(0); + } + + private void parseLine(String line) { + if (line.startsWith(":")) { + return; + } + + int colon = line.indexOf(':'); + + String field = colon >= 0 ? line.substring(0, colon) : line; + String value = colon >= 0 ? line.substring(colon + 1) : ""; + + if (value.startsWith(" ")) { + value = value.substring(1); + } + + switch (field) { + case "event" -> eventName = value; + + case "data" -> { + if (!data.isEmpty()) { + data.append('\n'); + } + data.append(value); + } + + default -> { + // ignore id, retry, unknown fields + } + } + } + + private static int findLineEnd(StringBuilder buffer) { + for (int i = 0; i < buffer.length(); i++) { + char c = buffer.charAt(i); + if (c == '\n' || c == '\r') { + return i; + } + } + return -1; + } + + private static String readLine(StringBuilder buffer, int lineEnd) { + String line = buffer.substring(0, lineEnd); + + int removeUntil = lineEnd + 1; + + if (lineEnd + 1 < buffer.length() + && buffer.charAt(lineEnd) == '\r' + && buffer.charAt(lineEnd + 1) == '\n') { + removeUntil++; + } + + buffer.delete(0, removeUntil); + return line; + } + + public record SSEEvent(String name, String data) { + + private static final ObjectMapper om = new ObjectMapper(); + + public ObjectNode json() { + try { + return (ObjectNode) om.readTree(data); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + } + +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java b/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java index 6103666e1b..df865bbda5 100644 --- a/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java +++ b/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java @@ -14,12 +14,16 @@ package com.predic8.membrane.core.util.jdbc; -import com.predic8.membrane.annot.*; -import com.predic8.membrane.core.router.*; -import com.predic8.membrane.core.util.*; +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.core.router.Router; +import com.predic8.membrane.core.util.ConfigurationException; -import javax.sql.*; -import java.util.*; +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Map; + +import static com.predic8.membrane.core.util.ExceptionUtil.getRootCause; public abstract class AbstractJdbcSupport { @@ -53,6 +57,19 @@ public void init(Router router) { getDatasourceIfNull(); } + // @TODO make subclasses use this method + public Connection getConnection() { + try { + return datasource.getConnection(); + } catch (SQLException e) { + var root = getRootCause(e); + if (root instanceof ClassNotFoundException) { + throw new ConfigurationException("JDBC driver not found. Please add the JDBC driver to the classpath: " + root.getMessage()); + } + throw new RuntimeException(e); + } + } + private void getDatasourceIfNull() { if (datasource != null) return; diff --git a/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java b/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java index 5fb73f8092..644c5ca414 100644 --- a/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java @@ -14,13 +14,30 @@ package com.predic8.membrane.core.util.json; -import com.fasterxml.jackson.databind.*; -import com.fasterxml.jackson.databind.node.*; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.http.Message; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.math.*; +import java.io.InputStream; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.Optional; + +import static com.predic8.membrane.core.http.MimeType.APPLICATION_JSON; +import static java.util.Optional.empty; public class JsonUtil { + private static final Logger log = LoggerFactory.getLogger(JsonUtil.class); + + + private static final ObjectMapper om = new ObjectMapper(); + private static final JsonNodeFactory FACTORY = JsonNodeFactory.instance; /** @@ -75,4 +92,60 @@ public static JsonNode scalarAsJson(String value) { return FACTORY.textNode(value); } + + /** + * Get JSON object from message body. + * The caller must deal with the possibility that the body is not a JSON object or + * there are parsing errors. + * @param jsonString String with a JSON body + * @return JSON object or empty if the body is not a JSON object or there are parsing errors + */ + public static Optional getJsonObject(String jsonString) { + try { + var node = om.readTree(jsonString); + if (node instanceof ObjectNode on) { + return Optional.of(on); + } + log.debug("Expected JSON Object but got: {}",node.getNodeType()); + } catch (Exception e) { + log.debug("Error reading JSON: {}", e.getMessage()); + } + return empty(); + } + + /** + * Get JSON object from message body. + * The caller must deal with the possibility that the body is not a JSON object or + * there are parsing errors. + * @param msg With a JSON body + * @return JSON object or empty if the body is not a JSON object or there are parsing errors + */ + public static Optional getJsonObject(Message msg) { + return getJsonObjectFromSteam(msg.getBodyAsStreamDecoded()); + } + + private static Optional getJsonObjectFromSteam(InputStream obj) { + try { + var node = om.readTree(obj); + if (node instanceof ObjectNode on) { + return Optional.of(on); + } + log.debug("Expected JSON Object but got: {}",node.getNodeType()); + } catch (Exception e) { + log.debug("Error reading JSON: {}", e.getMessage()); + } + return empty(); + } + + + public static void setJsonBody(Message msg, ObjectNode json) { + try { + if (!msg.isJSON()) { + msg.getHeader().setContentType(APPLICATION_JSON); + } + msg.setBodyContent(om.writeValueAsBytes(json)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } } diff --git a/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java b/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java index 2597281778..add23dfc42 100644 --- a/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java +++ b/core/src/test/java/com/predic8/membrane/core/http/HeaderTest.java @@ -15,15 +15,21 @@ package com.predic8.membrane.core.http; import jakarta.activation.MimeType; -import org.junit.jupiter.api.*; -import org.junit.jupiter.params.*; -import org.junit.jupiter.params.provider.*; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; -import java.util.*; +import java.util.HashSet; +import java.util.List; import static com.predic8.membrane.core.http.Header.*; -import static com.predic8.membrane.core.http.MimeType.*; -import static java.nio.charset.StandardCharsets.*; +import static com.predic8.membrane.core.http.MimeType.TEXT_XML; +import static com.predic8.membrane.core.http.MimeType.isBinary; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.*; class HeaderTest { @@ -262,4 +268,47 @@ void unique() { assertEquals("1, 2", h.getValuesAsString("X-Foo")); assertEquals("3, 4", h.getValuesAsString("X-BAR")); } + + @Nested + class IsMultipart { + @Test + void formDataIsMultipart() { + var h = new Header(); + h.add("Content-Type", "multipart/form-data; boundary=abc"); + assertTrue(h.isMultipart()); + } + + @Test + void relatedIsMultipart() { + var h = new Header(); + h.add("Content-Type", "multipart/related; boundary=abc"); + assertTrue(h.isMultipart()); + } + + @Test + void mixedIsMultipart() { + var h = new Header(); + h.add("Content-Type", "multipart/mixed; boundary=abc"); + assertTrue(h.isMultipart()); + } + + @Test + void isCaseInsensitive() { + var h = new Header(); + h.add("Content-Type", "Multipart/Form-Data; boundary=abc"); + assertTrue(h.isMultipart()); + } + + @Test + void jsonIsNotMultipart() { + var h = new Header(); + h.add("Content-Type", "application/json"); + assertFalse(h.isMultipart()); + } + + @Test + void missingContentTypeIsNotMultipart() { + assertFalse(new Header().isMultipart()); + } + } } diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java new file mode 100644 index 0000000000..560d5edf58 --- /dev/null +++ b/core/src/test/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractModelInputRequestTest.java @@ -0,0 +1,67 @@ +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.List; + +import static com.predic8.membrane.core.http.Request.post; +import static org.junit.jupiter.api.Assertions.assertEquals; + +class AbstractModelInputRequestTest { + + @ParameterizedTest + @ValueSource(strings = { + "Bearer test-api-key", + "bearer test-api-key", + "BEARER test-api-key", + "bEaReR test-api-key" + }) + void getApiKeyAcceptsBearerCaseInsensitive(String authorization) throws URISyntaxException, IOException { + var request = new TestLLMRequest(post("http://localhost/chat/completions") + .header("Authorization", authorization) + .json("{}") + .buildExchange()); + + assertEquals("test-api-key", request.getApiKey()); + } + + private static class TestLLMRequest extends AbstractModelInputRequest implements ModelInputRequest { + + TestLLMRequest(Exchange exchange) throws IOException { + super(exchange); + } + + @Override + public long getRequestedMaxOutputTokens() { + return -1; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + } + + @Override + public long estimateInputTokens() { + return 0; + } + + @Override + public String getSystemPrompt() { + return null; + } + + @Override + public void setSystemPrompts(List prompts) { + + } + + @Override + public void removeSystemPrompt() { + + } + } +} \ No newline at end of file diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java index 33fa0184f4..b52986e458 100644 --- a/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java +++ b/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.databind.JsonNode; diff --git a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java index 935ae66c12..f5efd94d6e 100644 --- a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java +++ b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import org.junit.jupiter.api.Test; diff --git a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java index 97eed97b58..7c0a85e98c 100644 --- a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java +++ b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import org.junit.jupiter.api.Test; diff --git a/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java b/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java index 02d6dadce4..0abbb75f66 100644 --- a/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java +++ b/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java new file mode 100644 index 0000000000..86c8fbf1e3 --- /dev/null +++ b/core/src/test/java/com/predic8/membrane/core/multipart/MultipartUtilTest.java @@ -0,0 +1,228 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.multipart; + +import com.predic8.membrane.core.http.Response; +import jakarta.mail.internet.ParseException; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.jupiter.api.Assertions.*; + +class MultipartUtilTest { + + private static final String BOUNDARY = "test-boundary-123"; + private static final String CRLF = "\r\n"; + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + /** Builds a Response with the given multipart body and boundary. */ + private static Response response(String body) { + return response(body, BOUNDARY); + } + + private static Response response(String body, String boundary) { + byte[] bytes = body.getBytes(UTF_8); + return Response.ok() + .header("Content-Type", "multipart/form-data; boundary=\"" + boundary + "\"") + .header("Content-Length", String.valueOf(bytes.length)) + .body(bytes) + .build(); + } + + /** + * Builds a minimal multipart body. + * Each {@code part} string should contain headers + blank line + body, + * e.g. {@code "Content-Disposition: form-data; name=\"x\"\r\n\r\nvalue"}. + */ + private static String multipartBody(String... parts) { + var sb = new StringBuilder(); + for (String part : parts) { + sb.append("--").append(BOUNDARY).append(CRLF); + sb.append(part).append(CRLF); + } + sb.append("--").append(BOUNDARY).append("--").append(CRLF); + return sb.toString(); + } + + private static String formField(String name, String value) { + return "Content-Disposition: form-data; name=\"" + name + "\"" + CRLF + CRLF + value; + } + + // ------------------------------------------------------------------------- + // split(Message) — auto-reads boundary from Content-Type + // ------------------------------------------------------------------------- + + @Test + void twoFormFieldsAreReturnedInOrder() throws IOException, ParseException { + var parts = MultipartUtil.split(response(multipartBody( + formField("username", "alice"), + formField("message", "Hello World") + ))); + + assertEquals(2, parts.size()); + assertEquals("username", parts.get(0).getName()); + assertEquals("alice", parts.get(0).getBodyAsString()); + assertEquals("message", parts.get(1).getName()); + assertEquals("Hello World", parts.get(1).getBodyAsString()); + } + + @Test + void fileUploadPartExposesFilenameAndContentType() throws IOException, ParseException { + String part = "Content-Disposition: form-data; name=\"upload\"; filename=\"photo.jpg\"" + CRLF + + "Content-Type: image/jpeg" + CRLF + + CRLF + + "JFIF"; + + var parts = MultipartUtil.split(response(multipartBody(part))); + + assertEquals(1, parts.size()); + assertEquals("upload", parts.get(0).getName()); + assertEquals("photo.jpg", parts.get(0).getFilename()); + assertEquals("image/jpeg", parts.get(0).getContentType()); + assertArrayEquals("JFIF".getBytes(UTF_8), parts.get(0).getBody()); + } + + @Test + void partWithContentIdIsAccessible() throws IOException, ParseException { + String part = "Content-Type: application/octet-stream" + CRLF + + "Content-ID: " + CRLF + + CRLF + + "binary"; + + var parts = MultipartUtil.split(response(multipartBody(part))); + + assertEquals("", parts.get(0).getContentID()); + } + + @Test + void binaryBodyIsPreservedExactly() throws IOException, ParseException { + byte[] payload = {0, 1, 2, (byte) 0xFF, (byte) 0xFE}; + String header = "Content-Type: application/octet-stream" + CRLF + CRLF; + byte[] partBytes = (header).getBytes(UTF_8); + byte[] fullPart = new byte[partBytes.length + payload.length]; + System.arraycopy(partBytes, 0, fullPart, 0, partBytes.length); + System.arraycopy(payload, 0, fullPart, partBytes.length, payload.length); + + // Build body manually to embed raw bytes + byte[] prefix = ("--" + BOUNDARY + CRLF).getBytes(UTF_8); + byte[] suffix = (CRLF + "--" + BOUNDARY + "--" + CRLF).getBytes(UTF_8); + byte[] body = new byte[prefix.length + fullPart.length + suffix.length]; + System.arraycopy(prefix, 0, body, 0, prefix.length); + System.arraycopy(fullPart, 0, body, prefix.length, fullPart.length); + System.arraycopy(suffix, 0, body, prefix.length + fullPart.length, suffix.length); + + byte[] msgBytes = body; + var msg = Response.ok() + .header("Content-Type", "multipart/form-data; boundary=\"" + BOUNDARY + "\"") + .header("Content-Length", String.valueOf(msgBytes.length)) + .body(msgBytes) + .build(); + + var parts = MultipartUtil.split(msg); + assertArrayEquals(payload, parts.get(0).getBody()); + } + + // ------------------------------------------------------------------------- + // split(Message, boundary) — explicit boundary overload + // ------------------------------------------------------------------------- + + @Test + void explicitBoundaryOverloadProducesSameResult() throws IOException { + var body = multipartBody(formField("x", "42")); + byte[] bytes = body.getBytes(UTF_8); + var msg = Response.ok() + .header("Content-Type", "multipart/form-data; boundary=\"other\"") // intentionally wrong + .header("Content-Length", String.valueOf(bytes.length)) + .body(bytes) + .build(); + + // Pass the correct boundary explicitly — Content-Type boundary is ignored + var parts = MultipartUtil.split(msg, BOUNDARY); + + assertEquals(1, parts.size()); + assertEquals("x", parts.get(0).getName()); + assertEquals("42", parts.get(0).getBodyAsString()); + } + + // ------------------------------------------------------------------------- + // Real-world resource: XOP multipart from ReassembleTest + // ------------------------------------------------------------------------- + + @Test + void xopResourceSplitsIntoTwoParts() throws IOException { + byte[] body = IOUtils.toByteArray(getClass().getResourceAsStream("/multipart/embedded-byte-array.txt")); + var response = Response.ok() + .header("Content-Type", "multipart/related; " + + "type=\"application/xop+xml\"; " + + "boundary=\"uuid:168683dc-43b3-4e71-8e66-efb633ef406b\"; " + + "start=\"\"; " + + "start-info=\"text/xml\"") + .header("Content-Length", String.valueOf(body.length)) + .body(body) + .build(); + + var parts = MultipartUtil.split(response, "uuid:168683dc-43b3-4e71-8e66-efb633ef406b"); + + assertEquals(2, parts.size()); + assertEquals("", parts.get(0).getContentID()); + assertEquals("", parts.get(1).getContentID()); + assertEquals("application/xop+xml; charset=UTF-8; type=\"text/xml\";", parts.get(0).getContentType()); + assertEquals("application/octet-stream", parts.get(1).getContentType()); + } + + // ------------------------------------------------------------------------- + // Error cases + // ------------------------------------------------------------------------- + + @Test + void missingContentTypeThrows() { + byte[] bytes = "body".getBytes(UTF_8); + var msg = Response.ok() + .header("Content-Length", String.valueOf(bytes.length)) + .body(bytes) + .build(); + + assertThrows(IOException.class, () -> MultipartUtil.split(msg)); + } + + @Test + void missingBoundaryParameterThrows() { + byte[] bytes = "body".getBytes(UTF_8); + var msg = Response.ok() + .header("Content-Type", "multipart/form-data") // no boundary= + .header("Content-Length", String.valueOf(bytes.length)) + .body(bytes) + .build(); + + assertThrows(IOException.class, () -> MultipartUtil.split(msg)); + } + + @Test + void unsupportedContentTransferEncodingThrows() { + String part = "Content-Disposition: form-data; name=\"x\"" + CRLF + + "Content-Transfer-Encoding: quoted-printable" + CRLF + + CRLF + + "value"; + + assertThrows(IOException.class, + () -> MultipartUtil.split(response(multipartBody(part)))); + } +} diff --git a/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java new file mode 100644 index 0000000000..7738321f85 --- /dev/null +++ b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java @@ -0,0 +1,165 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.util.http; + +import com.predic8.membrane.core.http.Chunk; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; + +class SSEParserTest { + + @Test + void parsesSingleEvent() { + var parser = new SSEParser(Set.of("done")); + + assertFalse(parser.parse(chunk(""" + event: message + data: hello + + """))); + + var events = parser.getEvents(); + + assertEquals(1, events.size()); + assertEquals("message", events.getFirst().name()); + assertEquals("hello", events.getFirst().data()); + assertTrue(parser.getTerminalEvent().isEmpty()); + } + + @Test + void parsesMultilineData() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + event: message + data: first + data: second + + """)); + + assertEquals("first\nsecond", parser.getEvents().getFirst().data()); + } + + @Test + void parsesEventSplitAcrossChunks() { + var parser = new SSEParser(Set.of("done")); + + assertFalse(parser.parse(chunk(""" + event: mes"""))); + + assertFalse(parser.parse(chunk(""" + sage + data: hel"""))); + + assertFalse(parser.parse(chunk(""" + lo + + """))); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void returnsTrueWhenTerminalEventIsFound() { + var parser = new SSEParser(Set.of("done")); + + assertTrue(parser.parse(chunk(""" + event: done + data: {"usage":{"total_tokens":42}} + + """))); + + var terminal = parser.getTerminalEvent(); + + assertTrue(terminal.isPresent()); + assertEquals("done", terminal.get().name()); + assertEquals("{\"usage\":{\"total_tokens\":42}}", terminal.get().data()); + } + + @Test + void ignoresChunksAfterTerminalEvent() { + var parser = new SSEParser(Set.of("done")); + + assertTrue(parser.parse(chunk(""" + event: done + data: final + + """))); + + assertTrue(parser.parse(chunk(""" + event: message + data: ignored + + """))); + + assertEquals(1, parser.getEvents().size()); + assertEquals("done", parser.getEvents().getFirst().name()); + } + + @Test + void ignoresCommentsAndUnknownFields() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + : comment + id: 123 + retry: 1000 + event: message + data: hello + + """)); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void supportsCrLfLineEndings() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk("event: message\r\ndata: hello\r\n\r\n")); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void returnsUnmodifiableEventsList() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + event: message + data: hello + + """)); + + assertThrows(UnsupportedOperationException.class, + () -> parser.getEvents().add(new SSEParser.SSEEvent("x", "y"))); + } + + private static Chunk chunk(String content) { + return new Chunk(content.getBytes()); + } +} \ No newline at end of file diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java new file mode 100644 index 0000000000..77c674ee1e --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java @@ -0,0 +1,169 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.AbstractInterceptor; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.flow.ReturnInterceptor; +import com.predic8.membrane.core.interceptor.templating.StaticInterceptor; +import com.predic8.membrane.core.proxies.ServiceProxy; +import com.predic8.membrane.core.proxies.ServiceProxyKey; +import com.predic8.membrane.core.router.DefaultRouter; +import com.predic8.membrane.examples.util.DistributionExtractingTestcase; +import com.predic8.membrane.examples.util.Process2; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.function.Consumer; + +import static com.predic8.membrane.core.http.MimeType.APPLICATION_JSON; + +/** + * Base class for AI tutorial tests. Starts a local Membrane mock of the upstream LLM API + * so tests run without a real API key and without network access to the LLM provider. + * + *

The tutorial YAML's {@code target.url} is rewritten to point at the mock server + * before Membrane starts. Subclasses override {@link #getTutorialDir()} and + * {@link #getTutorialYaml()} to select the tutorial under test. + * + *

JUnit 5 lifecycle ordering guarantees that {@code DistributionExtractingTestcase.init()} + * (superclass {@code @BeforeEach}) runs first and sets {@code baseDir}, allowing + * {@link #startGateway()} to use {@code replaceInFile2()} safely. + */ +public abstract class AbstractAiTutorialTest extends DistributionExtractingTestcase { + + protected static final int MOCK_LLM_PORT = 3100; + + /** + * Value substituted for the {@code <>} placeholder in tutorial + * YAMLs before Membrane starts. Tests that verify upstream key-substitution assert against + * this constant instead of the raw placeholder text. + */ + protected static final String TEST_API_KEY = "test-upstream-key"; + + protected Process2 process; + protected volatile String lastRequestBody; + protected volatile String lastRequestApiKey; + + private DefaultRouter mockRouter; + + protected abstract String getTutorialDir(); + protected abstract String getTutorialYaml(); + + @Override + protected String getExampleDirName() { + return "../tutorials/%s".formatted(getTutorialDir()); + } + + @Override + protected String getParameters() { + return "-c %s".formatted(getTutorialYaml()); + } + + /** + * Runs after {@code DistributionExtractingTestcase.init()} sets {@code baseDir}. + * Starts the mock, patches the YAML, then starts Membrane. + */ + @BeforeEach + void startGateway() throws Exception { + startMockLlmApi(); + replaceInFile2(getTutorialYaml(), getUpstreamApiUrl(), mockApiUrl()); + replaceInFile2(getTutorialYaml(), "<>", TEST_API_KEY); + process = startServiceProxyScript(); + } + + @AfterEach + void stopGateway() { + if (process != null) + process.killScript(); + if (mockRouter != null) + mockRouter.stop(); + } + + /** + * The upstream API URL used in the tutorial YAML (to be replaced by the mock URL). + */ + protected String getUpstreamApiUrl() { + return "https://api.anthropic.com"; + } + + protected String mockApiUrl() { + return "http://localhost:" + MOCK_LLM_PORT; + } + + /** + * The HTTP header name from which the upstream API key is read when capturing + * requests in the mock. Defaults to {@code "x-api-key"} (Claude). Override to + * {@code "authorization"} for OpenAI or {@code "x-goog-api-key"} for Google. + */ + protected String getApiKeyHeader() { + return "x-api-key"; + } + + /** + * Content-Type the mock LLM server sends back. Defaults to {@code "application/json"} + * for regular responses. Override to {@code "text/event-stream"} in streaming test classes. + */ + protected String mockContentType() { + return APPLICATION_JSON; + } + + private void startMockLlmApi() throws Exception { + var si = new StaticInterceptor(); + si.setSrc(mockResponse()); + si.setContentType(mockContentType()); + + var sp = new ServiceProxy(new ServiceProxyKey(MOCK_LLM_PORT), null, 0); + sp.getFlow().add(new BodyCaptureInterceptor( + body -> lastRequestBody = body, + apiKey -> lastRequestApiKey = apiKey, + getApiKeyHeader())); + sp.getFlow().add(si); + sp.getFlow().add(new ReturnInterceptor()); + + mockRouter = new DefaultRouter(); + mockRouter.add(sp); + mockRouter.start(); + } + + private static class BodyCaptureInterceptor extends AbstractInterceptor { + + private final Consumer bodySink; + private final Consumer apiKeySink; + private final String apiKeyHeader; + + BodyCaptureInterceptor(Consumer bodySink, Consumer apiKeySink, String apiKeyHeader) { + this.bodySink = bodySink; + this.apiKeySink = apiKeySink; + this.apiKeyHeader = apiKeyHeader; + } + + @Override + public Outcome handleRequest(Exchange exc) { + bodySink.accept(exc.getRequest().getBodyAsStringDecoded()); + apiKeySink.accept(exc.getRequest().getHeader().getFirstValue(apiKeyHeader)); + return Outcome.CONTINUE; + } + } + + protected String mockResponse() { + return """ + {"id":"msg_mock","type":"message","role":"assistant",\ + "content":[{"type":"text","text":"I am a mock."}],\ + "model":"claude-sonnet-4-0","stop_reason":"end_turn",\ + "usage":{"input_tokens":10,"output_tokens":5}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..32614a7431 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java @@ -0,0 +1,116 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.claude; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for {@code distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures a Claude LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code max_tokens} in the forwarded request is capped to 200
  • + *
+ * + *

The upstream Anthropic API is replaced by a local mock server so no real API key is needed. + */ +public class BasicClaudeLLMGatewayTutorialTest extends AbstractAiTutorialTest { + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/claude"; + } + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .log().ifValidationFails() + .statusCode(200) + .body("type", equalTo("message")) + .body("content[0].type", equalTo("text")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the Claude error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("tokens")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code max_tokens} to 200 before forwarding to the upstream. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .log().everything() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("max_tokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java new file mode 100644 index 0000000000..3514870774 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java @@ -0,0 +1,223 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.claude; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 250 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code claude-sonnet-4-0}, {@code claude-opus-4-0}, {@code claude-haiku-3-5}. + */ +public class SharingApiKeysTutorialTest extends AbstractAiTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/claude"; + } + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", BOB) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "invalid-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(401) + .body("type", equalTo("error")) + .body("error.type", equalTo("authentication_error")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo(ALICE))); + assertThat(lastRequestApiKey, equalTo(TEST_API_KEY)); + } + + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("wrong-model.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("gpt-5")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("prompt is too long")) + .body("error.message", containsString("100 maximum")); + // @formatter:on + } + + /** + * Alice has a budget of 250 tokens. Each request with {@code max-output.json} projects + * 7 (input estimate) + 200 (capped max_tokens) = 207 tokens. The mock returns 15 tokens + * of actual usage per call, so the running total grows by 15 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 250 - 0   - 207 =  43  → forwarded; used becomes 15
+     *   2nd: 250 - 15  - 207 =  28  → forwarded; used becomes 30
+     *   3rd: 250 - 30  - 207 =  13  → forwarded; used becomes 45
+     *   4th: 250 - 45  - 207 =  -2  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(429) + .body("type", equalTo("error")) + .body("error.type", equalTo("rate_limit_error")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("x-api-key", BOB) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java new file mode 100644 index 0000000000..4e39f7ae6c --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java @@ -0,0 +1,58 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; + +/** + * Base class for Google Gemini LLM-Gateway tutorial tests. + * + *

Overrides the upstream URL and the API-key header so the mock captures + * the {@code x-goog-api-key} header that Google uses. The mock response is + * formatted as a Gemini {@code generateContent} reply and reports 100 total + * tokens (50 prompt + 50 candidates) per call. + */ +public abstract class AbstractGoogleTutorialTest extends AbstractAiTutorialTest { + + /** URL prefix used in both Google tutorial YAML files. */ + @Override + protected String getUpstreamApiUrl() { + return "https://generativelanguage.googleapis.com"; + } + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/google"; + } + + /** Google authenticates via the {@code x-goog-api-key} header. */ + @Override + protected String getApiKeyHeader() { + return "x-goog-api-key"; + } + + /** + * Minimal Gemini {@code generateContent} reply with 50 prompt + 50 candidates = 100 total + * tokens. The higher per-request cost keeps the token-budget exhaustion test to three + * successful requests before alice's 500-token allowance runs out. + */ + @Override + protected String mockResponse() { + return """ + {"candidates":[{"content":{"parts":[{"text":"I am a mock."}],"role":"model"},\ + "finishReason":"STOP"}],\ + "usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":50,"totalTokenCount":100}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..16f52d470b --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java @@ -0,0 +1,109 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for + * {@code distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures a Google Gemini LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code generationConfig.maxOutputTokens} in the forwarded + * request is capped to 200
  • + *
+ * + *

The upstream Google Gemini API is replaced by a local mock server so no real API key is needed. + */ +public class BasicGoogleLLMGatewayTutorialTest extends AbstractGoogleTutorialTest { + + private static final String GEMINI_ENDPOINT = + LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent"; + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the Google error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("exceeds the maximum allowed")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code generationConfig.maxOutputTokens} to 200 before forwarding. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("generationConfig.maxOutputTokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java new file mode 100644 index 0000000000..79b1a71e3e --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java @@ -0,0 +1,219 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 500 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code gemini-2.5-pro}, {@code gemini-2.5-flash}, {@code gemini-2.5-flash-lite}, + * {@code gemini-2.0-flash}, {@code gemini-2.0-flash-lite}. + * + *

For Google Gemini the model is part of the URL path + * ({@code /v1beta/models/:generateContent}), not the request body. + */ +public class SharingApiKeysGoogleTutorialTest extends AbstractGoogleTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + private static final String GEMINI_FLASH_ENDPOINT = + LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent"; + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "invalid-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(401) + .body("error.status", equalTo("UNAUTHENTICATED")) + .body("error.message", containsString("Invalid API key")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + * For Google Gemini, the key is carried in the {@code x-goog-api-key} header. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .log().ifValidationFails() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo(ALICE))); + assertThat(lastRequestApiKey, equalTo(TEST_API_KEY)); + } + + /** + * For Google Gemini the model is extracted from the URL path. Sending a request to + * {@code /v1beta/models/gpt-5:generateContent} uses model {@code gpt-5}, which is not + * in the allowed list, so the gateway rejects it with 400. + */ + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1beta/models/gpt-5:generateContent") + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("gpt-5")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("exceeds the maximum allowed")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects + * 9 (input estimate) + 200 (capped maxOutputTokens) = 209 tokens. The mock returns + * 100 tokens of actual usage per call, so the running total grows by 100 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 500 - 0   - 209 = 291  → forwarded; used becomes 100
+     *   2nd: 500 - 100 - 209 = 191  → forwarded; used becomes 200
+     *   3rd: 500 - 200 - 209 =  91  → forwarded; used becomes 300
+     *   4th: 500 - 300 - 209 =  -9  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(429) + .body("error.status", equalTo("RESOURCE_EXHAUSTED")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("x-goog-api-key", BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java new file mode 100644 index 0000000000..54136f4c2f --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java @@ -0,0 +1,61 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; + +/** + * Base class for OpenAI LLM-Gateway tutorial tests. + * + *

Overrides the upstream URL and the API-key header so the mock captures + * the {@code Authorization} header that OpenAI uses instead of {@code x-api-key}. + * The mock response is formatted as an OpenAI Responses-API reply and reports + * 100 total tokens (50 input + 50 output) per call. + */ +public abstract class AbstractOpenAiTutorialTest extends AbstractAiTutorialTest { + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/openai"; + } + + @Override + protected String getUpstreamApiUrl() { + return "https://api.openai.com"; + } + + /** + * OpenAI authenticates via {@code Authorization: Bearer }. + * The full header value (including the "Bearer " prefix) is captured. + */ + @Override + protected String getApiKeyHeader() { + return "authorization"; + } + + /** + * Minimal OpenAI Responses-API reply with 50 input + 50 output = 100 total tokens. + * The higher per-request cost (vs. the default Claude mock) keeps the token-budget + * exhaustion test to three successful requests before alice's 500-token allowance runs out. + */ + @Override + protected String mockResponse() { + return """ + {"id":"resp_mock","object":"response","model":"gpt-5-nano",\ + "output":[{"type":"message","role":"assistant",\ + "content":[{"type":"output_text","text":"I am a mock."}]}],\ + "usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..6dd96ee098 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java @@ -0,0 +1,105 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for + * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures an OpenAI LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code max_output_tokens} in the forwarded request is capped to 200
  • + *
+ * + *

The upstream OpenAI API is replaced by a local mock server so no real API key is needed. + */ +public class BasicOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest { + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the OpenAI error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("context_length_exceeded")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code max_output_tokens} to 200 before forwarding to the upstream. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java new file mode 100644 index 0000000000..e1821bc28c --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java @@ -0,0 +1,209 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 500 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code gpt-5.4}, {@code gpt-5-nano}, {@code gpt-5-mini}. + */ +public class SharingApiKeysOpenAiTutorialTest extends AbstractOpenAiTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer invalid-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(401) + .body("error.code", equalTo("invalid_authentication")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + * For OpenAI, the key is carried in the {@code Authorization: Bearer } header. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo("Bearer " + ALICE))); + assertThat(lastRequestApiKey, equalTo("Bearer " + TEST_API_KEY)); + } + + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("wrong-model.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .log().ifValidationFails() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("model_not_allowed")) + .body("error.message", containsString("gpt-4")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("context_length_exceeded")) + .body("error.message", containsString("maximum context length")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects + * 9 (input estimate) + 200 (capped max_output_tokens) = 209 tokens. The mock returns + * 100 tokens of actual usage per call, so the running total grows by 100 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 500 - 0   - 209 = 291  → forwarded; used becomes 100
+     *   2nd: 500 - 100 - 209 = 191  → forwarded; used becomes 200
+     *   3rd: 500 - 200 - 209 =  91  → forwarded; used becomes 300
+     *   4th: 500 - 300 - 209 =  -9  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(429) + .body("error.type", equalTo("rate_limit_error")) + .body("error.code", equalTo("token_limit_exceeded")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("Authorization", "Bearer " + BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..679cfca6a7 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java @@ -0,0 +1,135 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; + +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests for the streaming (SSE) path of + * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}. + * + *

The mock upstream returns {@code Content-Type: text/event-stream} with three + * SSE events so the gateway's SSE processing path is exercised end-to-end without + * a real OpenAI connection: + * + *

    + *
  • {@code response.created} — initial acknowledgement
  • + *
  • {@code response.output_text.delta} — incremental text chunk
  • + *
  • {@code response.completed} — terminal event carrying usage statistics
  • + *
+ * + *

Because RestAssured does not handle server-sent events well, these tests use the + * Java {@link java.net.http.HttpClient} directly — the same approach used in + * {@code ServerSentEventsTutorialTest}. + */ +public class StreamingOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest { + + private static final String RESPONSES_ENDPOINT = LOCALHOST_2000 + "/v1/responses"; + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** Tell the mock server to respond as a finite SSE stream. */ + @Override + protected String mockContentType() { + return "text/event-stream"; + } + + /** + * A minimal but complete SSE body: one delta event followed by the terminal + * {@code response.completed} event that carries the usage node the gateway + * reads for token accounting. + */ + @Override + protected String mockResponse() { + return """ + event: response.created + data: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress","model":"gpt-5-nano"}} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","item_id":"msg_mock","output_index":0,"content_index":0,"delta":"I am a mock."} + + event: response.completed + data: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed","model":"gpt-5-nano","output":[{"type":"message","id":"msg_mock","status":"completed","role":"assistant","content":[{"type":"output_text","text":"I am a mock."}]}],"usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}} + + """; + } + + /** + * The gateway must forward a streaming request and pass the {@code text/event-stream} + * response through to the client intact. The response body must contain the SSE events + * emitted by the upstream, including the delta text. + */ + @Test + void streamingResponseIsForwarded() throws IOException, InterruptedException { + var response = sendStreamingRequest("stream.json"); + + assertEquals(200, response.statusCode()); + assertTrue(response.headers().firstValue("content-type").orElse("").contains("text/event-stream"), + "Expected Content-Type text/event-stream"); + assertTrue(response.body().contains("response.output_text.delta"), + "SSE body must contain the delta event name"); + assertTrue(response.body().contains("I am a mock."), + "SSE body must contain the delta text"); + assertTrue(response.body().contains("response.completed"), + "SSE body must contain the terminal event"); + } + + /** + * When the request carries {@code "max_output_tokens": 500} and the gateway is + * configured with {@code maxOutputTokens: 200}, the gateway must rewrite the field + * to 200 before forwarding — even for streaming requests. + * + *

The mock captures the forwarded request body so we can assert the capped value. + */ + @Test + void streamingOutputTokensAreCappedBeforeForwarding() throws IOException, InterruptedException { + var response = sendStreamingRequest("max-output-stream.json"); + + assertEquals(200, response.statusCode()); + assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200)); + } + + // ------------------------------------------------------------------------- + + private HttpResponse sendStreamingRequest(String fixture) throws IOException, InterruptedException { + var request = HttpRequest.newBuilder() + .uri(URI.create(RESPONSES_ENDPOINT)) + .timeout(Duration.ofSeconds(10)) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer test-key") + .POST(HttpRequest.BodyPublishers.ofString(readFileFromBaseDir(fixture))) + .build(); + + try (var client = HttpClient.newHttpClient()) { + return client.send(request, HttpResponse.BodyHandlers.ofString()); + } + } +} diff --git a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..ddaaaedcf1 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,28 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (Antropic Claude) +# +# Replace <> with your Claude API key. +# +# 1. Hello World +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @simple.json http://localhost:2000/v1/messages +# Check the response and the Membrane logs. +# +# 2. Exceed the input token limit +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-input.json http://localhost:2000/v1/messages +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages +# Check the Membrane log for limiting max tokens to 200 + +api: + port: 2000 + flow: + - llmGateway: + claude: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://api.anthropic.com \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..3a6a54f2f4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (Claude) +# +# Replace <> with your Claude API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @simple.json http://localhost:2000/v1/messages +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @wrong-model.json http://localhost:2000/v1/messages +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @max-input.json http://localhost:2000/v1/messages +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages +# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json + +api: + port: 2000 + flow: + - llmGateway: + claude: {} + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - claude-sonnet-4-0 + - claude-opus-4-0 + - claude-haiku-3-5 + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 250 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + - request: + - log: {} + target: + url: https://api.anthropic.com diff --git a/distribution/tutorials/ai/llm-gateway/claude/max-input.json b/distribution/tutorials/ai/llm-gateway/claude/max-input.json new file mode 100644 index 0000000000..a51d79d50e --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/max-input.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/max-output.json b/distribution/tutorials/ai/llm-gateway/claude/max-output.json new file mode 100644 index 0000000000..b3746f34c6 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/max-output.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 500, + "messages": [ + { + "role": "user", + "content": "Explain in detail who you are?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd b/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/claude/membrane.sh b/distribution/tutorials/ai/llm-gateway/claude/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/simple.json b/distribution/tutorials/ai/llm-gateway/claude/simple.json new file mode 100644 index 0000000000..bd6b974408 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/simple.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json b/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json new file mode 100644 index 0000000000..d149716e51 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json @@ -0,0 +1,10 @@ +{ + "model": "gpt-5", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..2cbf4c236d --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,28 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (Google Gemini) +# +# Replace <> with your Google API key. +# +# 1. Hello World +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @simple.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check the response and the Membrane logs. +# +# 2. Exceed the input token limit +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @max-input.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @max-output.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check the Membrane log for limiting max tokens to 200 + +api: + port: 2000 + flow: + - llmGateway: + google: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://generativelanguage.googleapis.com \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..4a9ef00ba4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (Google Gemini) +# +# Replace <> with your Gemini API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @simple.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @simple.json http://localhost:2000/v1beta/models/gpt-5:generateContent +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @max-input.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @max-output.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json + +api: + port: 2000 + flow: + - llmGateway: + google: {} + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - gemini-2.5-pro + - gemini-2.5-flash + - gemini-2.5-flash-lite + - gemini-2.0-flash + - gemini-2.0-flash-lite + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 500 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + target: + url: https://generativelanguage.googleapis.com diff --git a/distribution/tutorials/ai/llm-gateway/google/max-input.json b/distribution/tutorials/ai/llm-gateway/google/max-input.json new file mode 100644 index 0000000000..017608297f --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/max-input.json @@ -0,0 +1,11 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" + } + ] + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/max-output.json b/distribution/tutorials/ai/llm-gateway/google/max-output.json new file mode 100644 index 0000000000..615c6db3a0 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/max-output.json @@ -0,0 +1,14 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Explain in detail who you are?" + } + ] + } + ], + "generationConfig": { + "maxOutputTokens": 500 + } +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/membrane.cmd b/distribution/tutorials/ai/llm-gateway/google/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/google/membrane.sh b/distribution/tutorials/ai/llm-gateway/google/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/simple.json b/distribution/tutorials/ai/llm-gateway/google/simple.json new file mode 100644 index 0000000000..3bf6c67b2e --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/simple.json @@ -0,0 +1,11 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Who are you?" + } + ] + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..0074494b40 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,27 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (OpenAI) +# +# Replace <> with your OpenAI API key. +# +# 1. Hello World +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @simple.json http://localhost:2000/v1/responses +# +# 2. Exceed the input token limit +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-input.json http://localhost:2000/v1/responses +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-output.json http://localhost:2000/v1/responses +# Check the max_output_tokens field in the response and the Membrane log + +api: + port: 2000 + flow: + - llmGateway: + openai: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://api.openai.com diff --git a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..8aa3e72f4d --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (OpenAI) +# +# Replace <> with your OpenAI API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @simple.json http://localhost:2000/v1/responses +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @wrong-model.json http://localhost:2000/v1/responses +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @max-input.json http://localhost:2000/v1/responses +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @max-output.json http://localhost:2000/v1/responses +# Check: Field max_output_tokens in the response + +api: + port: 2000 + flow: + - llmGateway: + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - gpt-5.4 + - gpt-5-nano + - gpt-5-mini + openai: {} + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 500 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + target: + url: https://api.openai.com/ diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-input.json b/distribution/tutorials/ai/llm-gateway/openai/max-input.json new file mode 100644 index 0000000000..e4b0e90985 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-input.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json new file mode 100644 index 0000000000..0a747d70e4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json @@ -0,0 +1,6 @@ +{ + "model": "gpt-5-nano", + "input": "Explain in detail who you are?", + "max_output_tokens": 500, + "stream": true +} diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-output.json b/distribution/tutorials/ai/llm-gateway/openai/max-output.json new file mode 100644 index 0000000000..cc7e04017f --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-output.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-5-nano", + "input": "Explain in detail who you are?", + "max_output_tokens": 500 +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd b/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/openai/membrane.sh b/distribution/tutorials/ai/llm-gateway/openai/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/simple.json b/distribution/tutorials/ai/llm-gateway/openai/simple.json new file mode 100644 index 0000000000..ab3c4b7bde --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/simple.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you?" +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/stream.json b/distribution/tutorials/ai/llm-gateway/openai/stream.json new file mode 100644 index 0000000000..1c75ce00aa --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/stream.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you?", + "stream": true +} diff --git a/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json b/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json new file mode 100644 index 0000000000..7a551564a2 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-4", + "input": "Who are you?" +} \ No newline at end of file