Skip to content

Commit 6acd081

Browse files
Merge pull request #244 from vaiju1981/tools-support
feat: parallel_tool_calls + tool-calling tests
2 parents af13cf0 + 0a56e49 commit 6acd081

17 files changed

Lines changed: 525 additions & 48 deletions

.github/validate-models.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ REM GGUF files start with magic bytes: 0x47 0x47 0x55 0x46 ("GGUF")
99

1010
setlocal enabledelayedexpansion
1111

12-
set "MODELS=models\codellama-7b.Q2_K.gguf" "models\jina-reranker-v1-tiny-en-Q4_0.gguf" "models\AMD-Llama-135m-code.Q2_K.gguf" "models\Qwen3-0.6B-Q4_K_M.gguf"
12+
set "MODELS=models\codellama-7b.Q2_K.gguf" "models\jina-reranker-v1-tiny-en-Q4_0.gguf" "models\AMD-Llama-135m-code.Q2_K.gguf" "models\Qwen3-0.6B-Q4_K_M.gguf" "models\Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
1313

1414
REM Vision GGUFs are validated only when present (the Windows job downloads
1515
REM them too, but the validation step must not fail when a future job opts

.github/validate-models.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ MODELS=(
1515
"models/jina-reranker-v1-tiny-en-Q4_0.gguf"
1616
"models/AMD-Llama-135m-code.Q2_K.gguf"
1717
"models/Qwen3-0.6B-Q4_K_M.gguf"
18+
"models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
1819
)
1920

2021
# Optional GGUFs validated only when present so jobs that do not download

.github/workflows/publish.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ env:
2525
DRAFT_MODEL_NAME: "AMD-Llama-135m-code.Q2_K.gguf"
2626
REASONING_MODEL_URL: "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf"
2727
REASONING_MODEL_NAME: "Qwen3-0.6B-Q4_K_M.gguf"
28+
TOOL_MODEL_URL: "https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
29+
TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
2830
NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
2931
NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
3032
# Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34).
@@ -405,6 +407,8 @@ jobs:
405407
run: curl -L --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
406408
- name: Download reasoning model
407409
run: curl -L --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
410+
- name: Download tool-calling model
411+
run: curl -L --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
408412
- name: Download nomic embedding model (issue #98 regression)
409413
run: curl -L --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
410414
- name: Download vision model (issues #103 / #34)
@@ -428,6 +432,7 @@ jobs:
428432
- name: Run tests
429433
run: |
430434
mvn -e --no-transfer-progress -P jcstress test \
435+
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
431436
-Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \
432437
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
433438
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
@@ -526,6 +531,8 @@ jobs:
526531
run: curl -L --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
527532
- name: Download reasoning model
528533
run: curl -L --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
534+
- name: Download tool-calling model
535+
run: curl -L --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
529536
- name: Download vision model (issues #103 / #34)
530537
run: curl -L --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
531538
- name: Download vision mmproj
@@ -545,6 +552,7 @@ jobs:
545552
- name: Run tests
546553
run: |
547554
mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \
555+
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
548556
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
549557
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
550558
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -590,6 +598,8 @@ jobs:
590598
run: curl -L --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
591599
- name: Download reasoning model
592600
run: curl -L --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
601+
- name: Download tool-calling model
602+
run: curl -L --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
593603
- name: Download vision model (issues #103 / #34)
594604
run: curl -L --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
595605
- name: Download vision mmproj
@@ -609,6 +619,7 @@ jobs:
609619
- name: Run tests
610620
run: |
611621
mvn -e --no-transfer-progress test \
622+
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
612623
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
613624
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
614625
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -654,6 +665,8 @@ jobs:
654665
run: curl -L --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
655666
- name: Download reasoning model
656667
run: curl -L --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
668+
- name: Download tool-calling model
669+
run: curl -L --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
657670
- name: Download vision model (issues #103 / #34)
658671
run: curl -L --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
659672
- name: Download vision mmproj
@@ -673,6 +686,7 @@ jobs:
673686
- name: Run tests
674687
run: |
675688
mvn -e --no-transfer-progress test \
689+
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
676690
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
677691
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
678692
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -721,6 +735,8 @@ jobs:
721735
run: curl -L --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
722736
- name: Download reasoning model
723737
run: curl -L --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
738+
- name: Download tool-calling model
739+
run: curl -L --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
724740
- name: Download vision model (issues #103 / #34)
725741
run: curl -L --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
726742
- name: Download vision mmproj
@@ -756,6 +772,7 @@ jobs:
756772
- name: Run tests
757773
run: |
758774
mvn -e --no-transfer-progress test `
775+
"-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" `
759776
"-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
760777
"-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
761778
"-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,8 @@ src/test/resources/**/*.gbnf
5151
src/main/cpp/llama.cpp/
5252

5353
# jcstress / jqwik test outputs (generated in repo root)
54-
/.jqwik-database
54+
/.jqwik-database
55+
56+
# Local AI agent tooling (not part of the project)
57+
AGENTS.md
58+
.agents/

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@ from version 5.0.0 onward. Pre-fork releases (`1.x`–`4.2.0`) were authored by
1313
- `CODE_OF_CONDUCT.md` (Contributor Covenant 2.0).
1414
- `docs/RELEASE.md` capturing the maintainer-facing release procedure (moved out of CHANGELOG).
1515
- OpenSSF Best Practices badge (project 12862) on README.
16+
- OpenAI-compatible `parallel_tool_calls` support: `ChatRequest.withParallelToolCalls(Boolean)` / `getParallelToolCalls()`, `InferenceParameters.withParallelToolCalls(boolean)`, and pass-through in the `/v1/chat/completions` server mapper.
17+
- Real-model tool-calling integration tests for blocking and streaming required tool calls (`ToolCallingIntegrationTest`, Qwen2.5-1.5B-Instruct), wired into CI and `validate-models`.
1618

1719
### Changed
1820
- Unified `CONTRIBUTING.md` and `SECURITY.md` structure with sibling repositories in the project family.
1921
- Reconciled Java baseline to **11+** across `pom.xml`, README badge, `CLAUDE.md`, and `CONTRIBUTING.md`.
2022
- README license badge corrected from "Apache 2.0" to "MIT" (matches `LICENSE` file and `pom.xml`).
2123
- `pom.xml` SCM URL: `tree/master``tree/main` (default branch renamed).
2224
- Upgraded llama.cpp from b9151 to b9172.
25+
- Extracted the `chatWithTools` agent loop into `ToolCallingAgent`; tool-result errors (unknown tool / handler exception) are now JSON-serialized so tool names containing special characters remain valid JSON.
2326

2427
### Added
2528
- Reasoning-budget tests (Qwen3-0.6B).

README.md

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc
259259
| `net.ladenthin.llama.lib.path` | unset (falls back to `java.library.path`) | runtime | `LlamaLoader` | Directory containing the native `jllama` shared library. Checked first, before `java.library.path`. Set with `-Dnet.ladenthin.llama.lib.path=/path/to/dir`. |
260260
| `net.ladenthin.llama.tmpdir` | unset (falls back to `java.io.tmpdir`) | runtime | `LlamaLoader` | Custom temporary directory used when extracting the native library from the JAR. |
261261
| `net.ladenthin.llama.osinfo.architecture` | unset (uses `os.arch`) | runtime | `OSInfo` | Override for the architecture string used to locate the bundled library inside the JAR. Useful when `os.arch` reports an unexpected value (e.g. inside dockcross / chrooted environments). |
262-
| `net.ladenthin.llama.test.ngl` | `43` | test | `LlamaModelTest`, `RerankingModelTest`, `ChatScenarioTest`, `ChatAdvancedTest`, `ErrorHandlingTest`, `SessionConcurrencyTest`, `ConfigureParallelInferenceTest`, `MultimodalIntegrationTest` (via `Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL)`) | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. |
262+
| `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. |
263+
| `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. |
263264
| `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). |
264265
| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
265266
| `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. |
@@ -368,6 +369,40 @@ try (LlamaModel model = new LlamaModel(modelParams)) {
368369
Reasoning/thinking models can receive custom Jinja template variables via
369370
`ModelParameters#setChatTemplateKwargs(Map)`.
370371

372+
### Tool Calling
373+
374+
Use a tool-aware instruct model and enable Jinja when loading it. A typed request can either return
375+
the model's tool calls through `chat`, or execute registered handlers until the model produces a
376+
normal assistant response through `chatWithTools`:
377+
378+
```java
379+
ToolDefinition weather = new ToolDefinition(
380+
"get_weather",
381+
"Get the current weather for a city",
382+
"{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},"
383+
+ "\"required\":[\"city\"]}");
384+
385+
ChatRequest request = ChatRequest.empty()
386+
.appendMessage("user", "What is the weather in Paris?")
387+
.appendTool(weather)
388+
.withToolChoice("auto")
389+
.withParallelToolCalls(Boolean.FALSE);
390+
391+
Map<String, ToolHandler> handlers = Collections.singletonMap(
392+
"get_weather", argumentsJson -> "{\"temperature_c\":21,\"condition\":\"sunny\"}");
393+
394+
try (LlamaModel model = new LlamaModel(new ModelParameters()
395+
.setModel("models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf")
396+
.enableJinja())) {
397+
ChatResponse response = model.chatWithTools(request, handlers);
398+
System.out.println(response.getFirstContent());
399+
}
400+
```
401+
402+
`tool_choice` is the OpenAI-compatible string form (`auto`, `none`, or `required`). Set
403+
`parallel_tool_calls` to `false` when handlers should be issued one at a time. Handler failures and
404+
unknown tool names are returned to the model as valid `{"error":"..."}` tool-result JSON.
405+
371406
### Infilling
372407

373408
You can simply set `InferenceParameters#setInputPrefix(String)` and `InferenceParameters#setInputSuffix(String)`.

src/main/java/net/ladenthin/llama/LlamaModel.java

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import net.ladenthin.llama.parameters.ChatRequest;
3030
import net.ladenthin.llama.parameters.InferenceParameters;
3131
import net.ladenthin.llama.parameters.ModelParameters;
32-
import net.ladenthin.llama.value.ChatMessage;
3332
import net.ladenthin.llama.value.ChatResponse;
3433
import net.ladenthin.llama.value.CompletionResult;
3534
import net.ladenthin.llama.value.LlamaOutput;
@@ -38,7 +37,6 @@
3837
import net.ladenthin.llama.value.Pair;
3938
import net.ladenthin.llama.value.ServerMetrics;
4039
import net.ladenthin.llama.value.StopReason;
41-
import net.ladenthin.llama.value.ToolCall;
4240
import org.jspecify.annotations.Nullable;
4341

4442
/**
@@ -551,6 +549,10 @@ public ChatResponse chat(ChatRequest request) {
551549
if (toolChoice.isPresent()) {
552550
params = params.withToolChoice(toolChoice.get());
553551
}
552+
Optional<Boolean> parallelToolCalls = request.getParallelToolCalls();
553+
if (parallelToolCalls.isPresent()) {
554+
params = params.withParallelToolCalls(parallelToolCalls.get());
555+
}
554556
}
555557
params = request.applyCustomizer(params);
556558
String raw = chatComplete(params);
@@ -575,42 +577,7 @@ public ChatResponse chat(ChatRequest request) {
575577
* (or the last response when the round cap is hit)
576578
*/
577579
public ChatResponse chatWithTools(ChatRequest request, java.util.Map<String, ToolHandler> handlers) {
578-
final int maxRounds = request.getMaxToolRounds();
579-
if (maxRounds < 1) {
580-
throw new IllegalArgumentException("ChatRequest.maxToolRounds must be >= 1 (got " + maxRounds + "); "
581-
+ "chatWithTools always issues at least one chat call.");
582-
}
583-
ChatRequest current = request;
584-
ChatResponse last = chat(current);
585-
for (int round = 1; round < maxRounds; round++) {
586-
Optional<ChatMessage> assistantOpt = last.getFirstMessage();
587-
// NOTE: inline !isPresent() here (not compatibilityHelper.isEmpty) so NullAway's
588-
// CheckOptionalEmptiness recognises this as null-narrowing for the .get() below.
589-
if (!assistantOpt.isPresent() || assistantOpt.get().getToolCalls().isEmpty()) {
590-
return last;
591-
}
592-
ChatMessage assistant = assistantOpt.get();
593-
current = current.appendMessage(assistant);
594-
for (ToolCall call : assistant.getToolCalls()) {
595-
ToolHandler handler = handlers.get(call.getName());
596-
String result;
597-
if (handler == null) {
598-
result = "{\"error\":\"unknown tool: " + call.getName() + "\"}";
599-
} else {
600-
try {
601-
result = handler.invoke(call.getArgumentsJson());
602-
} catch (Exception e) {
603-
result = "{\"error\":"
604-
+ net.ladenthin.llama.json.ChatResponseParser.OBJECT_MAPPER.valueToTree(
605-
e.getClass().getSimpleName() + ": " + e.getMessage())
606-
+ "}";
607-
}
608-
}
609-
current = current.appendMessage(ChatMessage.toolResult(call.getId(), result));
610-
}
611-
last = chat(current);
612-
}
613-
return last;
580+
return ToolCallingAgent.run(request, handlers, this::chat);
614581
}
615582

616583
/**
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama;
6+
7+
import com.fasterxml.jackson.databind.ObjectMapper;
8+
import java.util.Map;
9+
import java.util.Optional;
10+
import java.util.function.Function;
11+
import net.ladenthin.llama.callback.ToolHandler;
12+
import net.ladenthin.llama.parameters.ChatRequest;
13+
import net.ladenthin.llama.value.ChatMessage;
14+
import net.ladenthin.llama.value.ChatResponse;
15+
import net.ladenthin.llama.value.ToolCall;
16+
17+
/** Model-independent orchestration for the tool-calling agent loop. */
18+
final class ToolCallingAgent {
19+
20+
private static final ObjectMapper MAPPER = new ObjectMapper();
21+
22+
private ToolCallingAgent() {}
23+
24+
static ChatResponse run(
25+
ChatRequest request, Map<String, ToolHandler> handlers, Function<ChatRequest, ChatResponse> chatCall) {
26+
final int maxRounds = request.getMaxToolRounds();
27+
if (maxRounds < 1) {
28+
throw new IllegalArgumentException("ChatRequest.maxToolRounds must be >= 1 (got " + maxRounds + "); "
29+
+ "chatWithTools always issues at least one chat call.");
30+
}
31+
32+
ChatRequest current = request;
33+
ChatResponse last = chatCall.apply(current);
34+
for (int round = 1; round < maxRounds; round++) {
35+
Optional<ChatMessage> assistantOpt = last.getFirstMessage();
36+
if (!assistantOpt.isPresent() || assistantOpt.get().getToolCalls().isEmpty()) {
37+
return last;
38+
}
39+
40+
ChatMessage assistant = assistantOpt.get();
41+
current = current.appendMessage(assistant);
42+
for (ToolCall call : assistant.getToolCalls()) {
43+
current = current.appendMessage(ChatMessage.toolResult(call.getId(), invoke(call, handlers)));
44+
}
45+
last = chatCall.apply(current);
46+
}
47+
return last;
48+
}
49+
50+
private static String invoke(ToolCall call, Map<String, ToolHandler> handlers) {
51+
ToolHandler handler = handlers.get(call.getName());
52+
if (handler == null) {
53+
return errorJson("unknown tool: " + call.getName());
54+
}
55+
try {
56+
return handler.invoke(call.getArgumentsJson());
57+
} catch (Exception e) {
58+
return errorJson(e.getClass().getSimpleName() + ": " + e.getMessage());
59+
}
60+
}
61+
62+
private static String errorJson(String message) {
63+
return MAPPER.createObjectNode().put("error", message).toString();
64+
}
65+
}

0 commit comments

Comments
 (0)