Skip to content

Commit fae2b90

Browse files
committed
feat(server): propagate parallel_tool_calls across all OpenAI-compatible surfaces
#244 made the chat core honor parallel_tool_calls, but only the OpenAI /v1/chat/completions surface forwarded it; the alternative protocol surfaces (which translate into that same chat core) silently dropped the equivalent flag. Close the gap: - Anthropic /v1/messages (AnthropicApiSupport.toOpenAiChatRequest): map tool_choice.disable_parallel_tool_use=true -> parallel_tool_calls=false (default stays parallel when unset/false). - OpenAI Responses /v1/responses (ResponsesApiSupport.toOpenAiChatRequest): forward parallel_tool_calls, and also forward tool_choice (string form), which was being dropped entirely — both now reach the shared OpenAiRequestMapper. Tests: - AnthropicApiSupportTest / ResponsesApiSupportTest: unit-cover the new mappings (set, and omitted-when-absent). - OpenAiServerToolCallingIntegrationTest (new): real-model end-to-end over HTTP using the Qwen2.5-1.5B tool model #244 wired into CI. tool_choice="required" forces a call, so it deterministically asserts the server returns a well-formed tool_calls array (arguments as a JSON string, llama.cpp #20198) and that parallel_tool_calls=false travels HTTP -> mapper -> native intact. Self-skips when the model is absent. Verified locally: spotless, compile, spotbugs clean; model-free translator tests pass. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01JdLpWD8nedY7LwNnHefZLF
1 parent cf65c3d commit fae2b90

5 files changed

Lines changed: 182 additions & 0 deletions

File tree

src/main/java/net/ladenthin/llama/server/AnthropicApiSupport.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ static ObjectNode toOpenAiChatRequest(JsonNode request) {
8585
if (toolChoice != null) {
8686
openAi.put("tool_choice", toolChoice);
8787
}
88+
// Anthropic expresses "no parallel tool use" via tool_choice.disable_parallel_tool_use;
89+
// OpenAI's equivalent is parallel_tool_calls=false. Map it so the shared chat core honors
90+
// a client's request to serialize tool calls (default stays parallel when unset/false).
91+
if (request.path("tool_choice").path("disable_parallel_tool_use").asBoolean(false)) {
92+
openAi.put("parallel_tool_calls", false);
93+
}
8894
}
8995

9096
copyNumber(request, "max_tokens", openAi, "max_tokens");

src/main/java/net/ladenthin/llama/server/ResponsesApiSupport.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,17 @@ static ObjectNode toOpenAiChatRequest(JsonNode request) {
7979
function.set("parameters", tool.path("parameters").deepCopy());
8080
}
8181
}
82+
// The Responses API uses the same tool_choice + parallel_tool_calls fields as chat; forward
83+
// them so the shared chat core honors them. The mapper consumes the string form of
84+
// tool_choice ("auto"/"none"/"required"), which is what we forward here.
85+
if (request.path("tool_choice").isTextual()) {
86+
openAi.put("tool_choice", request.path("tool_choice").asText());
87+
}
88+
if (request.path("parallel_tool_calls").isBoolean()) {
89+
openAi.put(
90+
"parallel_tool_calls",
91+
request.path("parallel_tool_calls").asBoolean());
92+
}
8293
}
8394

8495
copyNumber(request, "temperature", openAi, "temperature");

src/test/java/net/ladenthin/llama/server/AnthropicApiSupportTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,25 @@ public void sseEventBuildersAreWellFormed() throws IOException {
160160
is("assistant"));
161161
assertThat(AnthropicApiSupport.messageStopEvent().startsWith("event: message_stop"), is(true));
162162
}
163+
164+
@Test
165+
public void requestMapsDisableParallelToolUseToParallelToolCallsFalse() throws IOException {
166+
// Anthropic tool_choice.disable_parallel_tool_use=true -> OpenAI parallel_tool_calls=false.
167+
JsonNode openAi = AnthropicApiSupport.toOpenAiChatRequest(read("{\"model\":\"m\","
168+
+ "\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}],"
169+
+ "\"tools\":[{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\"}}],"
170+
+ "\"tool_choice\":{\"type\":\"auto\",\"disable_parallel_tool_use\":true}}"));
171+
assertThat(openAi.path("parallel_tool_calls").isBoolean(), is(true));
172+
assertThat(openAi.path("parallel_tool_calls").asBoolean(), is(false));
173+
}
174+
175+
@Test
176+
public void requestOmitsParallelToolCallsWhenParallelToolUseAllowed() throws IOException {
177+
// disable_parallel_tool_use absent -> default (parallel allowed) -> no override emitted.
178+
JsonNode openAi = AnthropicApiSupport.toOpenAiChatRequest(read("{\"model\":\"m\","
179+
+ "\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}],"
180+
+ "\"tools\":[{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\"}}],"
181+
+ "\"tool_choice\":{\"type\":\"auto\"}}"));
182+
assertThat(openAi.has("parallel_tool_calls"), is(false));
183+
}
163184
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama.server;
6+
7+
import static org.hamcrest.MatcherAssert.assertThat;
8+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
9+
import static org.hamcrest.Matchers.is;
10+
11+
import com.fasterxml.jackson.databind.JsonNode;
12+
import com.fasterxml.jackson.databind.ObjectMapper;
13+
import java.io.File;
14+
import java.io.IOException;
15+
import net.ladenthin.llama.LlamaModel;
16+
import net.ladenthin.llama.TestConstants;
17+
import net.ladenthin.llama.parameters.ModelParameters;
18+
import org.junit.jupiter.api.AfterAll;
19+
import org.junit.jupiter.api.Assumptions;
20+
import org.junit.jupiter.api.BeforeAll;
21+
import org.junit.jupiter.api.Test;
22+
23+
/**
24+
* End-to-end tool-calling integration test for {@link OpenAiCompatServer}, driven over a real socket
25+
* against the Qwen2.5-1.5B-Instruct tool model — a stronger tool-calling family than the 0.6B reasoning
26+
* model used by {@link OpenAiCompatServerIntegrationTest}, so it actually emits tool calls. The model is
27+
* resolved from {@link TestConstants#PROP_TOOL_MODEL_PATH} (CI sets it; otherwise
28+
* {@link TestConstants#DEFAULT_TOOL_MODEL_PATH}) and the test self-skips when the GGUF is absent, so a
29+
* model-free checkout is never broken.
30+
*
31+
* <p>Where {@link OpenAiCompatServerIntegrationTest}'s tool test can only assert a structurally valid
32+
* message (the 0.6B model may not elect to call), these force a call via {@code tool_choice:"required"}
33+
* so the native grammar must emit one — letting us assert, deterministically, that the HTTP server
34+
* returns a well-formed OpenAI {@code tool_calls} array with {@code arguments} carried as a JSON
35+
* <em>string</em> (the agentic-client invariant, llama.cpp #20198), and that #244's
36+
* {@code parallel_tool_calls} flag travels HTTP &rarr; mapper &rarr; native without breaking the request.
37+
*/
38+
public class OpenAiServerToolCallingIntegrationTest extends OpenAiServerTestSupport {
39+
40+
private static final ObjectMapper MAPPER = new ObjectMapper();
41+
private static final String MODEL_ID = "qwen25-tools";
42+
43+
/** A trivial single-required-argument function; {@code tool_choice:"required"} forces a call. */
44+
private static final String TOOLS = "\"tools\":[{\"type\":\"function\",\"function\":{"
45+
+ "\"name\":\"get_weather\",\"description\":\"Get the weather for a city\","
46+
+ "\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},"
47+
+ "\"required\":[\"city\"]}}}]";
48+
49+
private static LlamaModel model;
50+
private static OpenAiCompatServer server;
51+
private static int port;
52+
53+
@BeforeAll
54+
public static void setup() throws IOException {
55+
String modelPath =
56+
System.getProperty(TestConstants.PROP_TOOL_MODEL_PATH, TestConstants.DEFAULT_TOOL_MODEL_PATH);
57+
Assumptions.assumeTrue(
58+
new File(modelPath).exists(),
59+
"Tool-calling model (Qwen2.5-1.5B) not found, skipping server tool-calling test: " + modelPath);
60+
int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
61+
model = new LlamaModel(new ModelParameters()
62+
.setModel(modelPath)
63+
.setCtxSize(4096)
64+
.setGpuLayers(gpuLayers)
65+
.setFit(false)
66+
.setParallel(1));
67+
server = new OpenAiCompatServer(
68+
model,
69+
OpenAiServerConfig.builder().port(0).modelId(MODEL_ID).build())
70+
.start();
71+
port = server.getPort();
72+
}
73+
74+
@AfterAll
75+
public static void tearDown() {
76+
if (server != null) {
77+
server.close();
78+
}
79+
if (model != null) {
80+
model.close();
81+
}
82+
}
83+
84+
@Test
85+
public void requiredToolChoiceReturnsWellFormedToolCalls() throws IOException {
86+
// tool_choice=required forces a function call, so a capable model deterministically returns a
87+
// structurally valid OpenAI tool_calls array regardless of its exact wording.
88+
String body = "{\"model\":\"" + MODEL_ID + "\",\"max_tokens\":64,\"tool_choice\":\"required\","
89+
+ "\"messages\":[{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],"
90+
+ TOOLS + "}";
91+
Response response = post(port, "/v1/chat/completions", body, "");
92+
assertThat(response.code, is(200));
93+
JsonNode toolCalls = MAPPER.readTree(response.body)
94+
.path("choices")
95+
.get(0)
96+
.path("message")
97+
.path("tool_calls");
98+
assertThat(toolCalls.isArray(), is(true));
99+
assertThat(toolCalls.size(), greaterThanOrEqualTo(1));
100+
JsonNode function = toolCalls.get(0).path("function");
101+
assertThat(function.path("name").asText(), is("get_weather"));
102+
// arguments must be a JSON *string* (not an inlined object) — the agentic-client invariant.
103+
assertThat(function.path("arguments").isTextual(), is(true));
104+
assertThat(MAPPER.readTree(function.path("arguments").asText()).isObject(), is(true));
105+
}
106+
107+
@Test
108+
public void parallelToolCallsFalseIsAcceptedEndToEnd() throws IOException {
109+
// parallel_tool_calls=false must flow HTTP -> OpenAiRequestMapper -> native without breaking the
110+
// request; tool_choice=required still yields a well-formed tool call.
111+
String body = "{\"model\":\"" + MODEL_ID + "\",\"max_tokens\":64,\"tool_choice\":\"required\","
112+
+ "\"parallel_tool_calls\":false,"
113+
+ "\"messages\":[{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],"
114+
+ TOOLS + "}";
115+
Response response = post(port, "/v1/chat/completions", body, "");
116+
assertThat(response.code, is(200));
117+
JsonNode toolCalls = MAPPER.readTree(response.body)
118+
.path("choices")
119+
.get(0)
120+
.path("message")
121+
.path("tool_calls");
122+
assertThat(toolCalls.isArray(), is(true));
123+
assertThat(toolCalls.size(), greaterThanOrEqualTo(1));
124+
}
125+
}

src/test/java/net/ladenthin/llama/server/ResponsesApiSupportTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,23 @@ public void responseEmitsFunctionCallItemsForToolCalls() throws IOException {
121121
assertThat(functionCall.path("name").asText(), is("f"));
122122
assertThat(functionCall.path("arguments").asText(), is("{\"a\":1}"));
123123
}
124+
125+
@Test
126+
public void requestForwardsToolChoiceAndParallelToolCalls() throws IOException {
127+
// The Responses API uses the same tool_choice + parallel_tool_calls fields as chat.
128+
JsonNode openAi = ResponsesApiSupport.toOpenAiChatRequest(read("{\"model\":\"m\",\"input\":\"hi\","
129+
+ "\"tools\":[{\"type\":\"function\",\"name\":\"f\",\"parameters\":{\"type\":\"object\"}}],"
130+
+ "\"tool_choice\":\"required\",\"parallel_tool_calls\":false}"));
131+
assertThat(openAi.path("tool_choice").asText(), is("required"));
132+
assertThat(openAi.path("parallel_tool_calls").isBoolean(), is(true));
133+
assertThat(openAi.path("parallel_tool_calls").asBoolean(), is(false));
134+
}
135+
136+
@Test
137+
public void requestOmitsToolChoiceAndParallelToolCallsWhenAbsent() throws IOException {
138+
JsonNode openAi = ResponsesApiSupport.toOpenAiChatRequest(read("{\"model\":\"m\",\"input\":\"hi\","
139+
+ "\"tools\":[{\"type\":\"function\",\"name\":\"f\",\"parameters\":{\"type\":\"object\"}}]}"));
140+
assertThat(openAi.has("tool_choice"), is(false));
141+
assertThat(openAi.has("parallel_tool_calls"), is(false));
142+
}
124143
}

0 commit comments

Comments
 (0)