forked from bernardladenthin/java-llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAudioInputIntegrationTest.java
More file actions
100 lines (88 loc) · 4.43 KB
/
Copy pathAudioInputIntegrationTest.java
File metadata and controls
100 lines (88 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT
package net.ladenthin.llama;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
import net.ladenthin.llama.parameters.InferenceParameters;
import net.ladenthin.llama.parameters.ModelParameters;
import net.ladenthin.llama.value.ChatMessage;
import net.ladenthin.llama.value.ContentPart;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
/**
* Real-model coverage for <b>audio input</b> (llama.cpp discussion #13759). Loads an audio-capable
* model (Ultravox / Qwen2.5-Omni) with its audio {@code --mmproj} and sends a multipart message
* carrying a {@link ContentPart#audioFile(java.nio.file.Path)} clip, exercising:
* <ul>
* <li>{@link ModelParameters#setMmproj(String)} wiring an audio encoder;</li>
* <li>{@code ParameterJsonSerializer.buildMessages} emitting the OAI {@code input_audio} part;</li>
* <li>the upstream {@code oaicompat_chat_params_parse} routing {@code input_audio} through the
* compiled-in {@code mtmd} audio pipeline.</li>
* </ul>
*
* <p>Self-skips when any of the three system properties
* ({@link TestConstants#PROP_AUDIO_MODEL_PATH} / {@link TestConstants#PROP_AUDIO_MMPROJ_PATH} /
* {@link TestConstants#PROP_AUDIO_PATH}) is unset or its file is missing, so it runs only in CI or on a
* dev machine where the (large) audio model and a clip have been staged.
*/
public class AudioInputIntegrationTest {
private static LlamaModel model;
private static String audioPath;
@BeforeAll
public static void setup() {
String modelPath = System.getProperty(TestConstants.PROP_AUDIO_MODEL_PATH);
String mmprojPath = System.getProperty(TestConstants.PROP_AUDIO_MMPROJ_PATH);
audioPath = System.getProperty(TestConstants.PROP_AUDIO_PATH);
Assumptions.assumeTrue(
modelPath != null && !modelPath.isEmpty(),
"Audio model path not set (-D" + TestConstants.PROP_AUDIO_MODEL_PATH + "=...)");
Assumptions.assumeTrue(
mmprojPath != null && !mmprojPath.isEmpty(),
"Audio mmproj path not set (-D" + TestConstants.PROP_AUDIO_MMPROJ_PATH + "=...)");
Assumptions.assumeTrue(
audioPath != null && !audioPath.isEmpty(),
"Audio clip path not set (-D" + TestConstants.PROP_AUDIO_PATH + "=...)");
Assumptions.assumeTrue(new File(modelPath).exists(), "Audio model file missing: " + modelPath);
Assumptions.assumeTrue(new File(mmprojPath).exists(), "Audio mmproj file missing: " + mmprojPath);
Assumptions.assumeTrue(new File(audioPath).exists(), "Audio clip missing: " + audioPath);
int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
ModelParameters parameters = new ModelParameters()
.setCtxSize(4096)
.setModel(modelPath)
.setMmproj(mmprojPath)
.setGpuLayers(gpuLayers)
.setFit(false);
if (gpuLayers == 0) {
parameters.setDevices("none").setMmprojOffload(false);
}
model = new LlamaModel(parameters);
assertTrue(model.supportsAudio(), "loaded model + mmproj must advertise audio input");
}
@AfterAll
public static void tearDown() {
if (model != null) {
model.close();
}
}
@Test
@DisplayName("an input_audio content part reaches the model and yields a non-empty reply")
@Timeout(value = 240_000, unit = TimeUnit.MILLISECONDS)
public void audioInputProducesNonEmptyReply() throws IOException {
ChatMessage message = ChatMessage.userMultimodal(
ContentPart.text("Transcribe the audio."), ContentPart.audioFile(Paths.get(audioPath)));
String reply = model.chatCompleteText(InferenceParameters.empty()
.withMessages(Collections.singletonList(message))
.withNPredict(64));
assertFalse(reply.trim().isEmpty(), "reply must be non-empty for an audio prompt; got: \"" + reply + "\"");
}
}