forked from bernardladenthin/java-llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathContentPart.java
More file actions
236 lines (219 loc) · 9.32 KB
/
Copy pathContentPart.java
File metadata and controls
236 lines (219 loc) · 9.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT
package net.ladenthin.llama.value;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;
import java.util.Locale;
import java.util.Objects;
import lombok.EqualsAndHashCode;
import lombok.ToString;
import org.jspecify.annotations.Nullable;
/**
* One piece of a {@link ChatMessage}'s multimodal content array: either a text
* fragment or an image URL (typically a {@code data:image/...;base64,...} URI).
* Mirrors the OpenAI-compatible {@code content} part shape understood by the
* upstream {@code llama.cpp} server. The JNI bridge preserves decoded media
* buffers and submits them through the upstream multimodal task path. An
* image-bearing message is serialized to
* <pre>
* {"role":"user","content":[
* {"type":"text","text":"What is in this image?"},
* {"type":"image_url","image_url":{"url":"data:image/png;base64,..."}}
* ]}
* </pre>
* and the upstream {@code oaicompat_chat_params_parse} routes it through the
* compiled-in {@code mtmd} pipeline (requires
* {@link net.ladenthin.llama.parameters.ModelParameters#setMmproj(String)} to be wired).
* <p>
* Instances are immutable and safe to share across threads. Use the static
* factories — the constructor is private.
* </p>
*/
@ToString
@EqualsAndHashCode
public final class ContentPart {
/** Discriminator for the two part kinds the OAI multipart schema supports. */
public enum Type {
/** A plain-text fragment. */
TEXT,
/** An image reference (data URI or remote URL). */
IMAGE_URL,
/** An audio clip (base64 {@code data} + {@code format}), for audio-input models. */
INPUT_AUDIO
}
private final Type type;
private final @Nullable String text;
private final @Nullable String imageUrl;
private final @Nullable String audioData;
private final @Nullable String audioFormat;
private ContentPart(
Type type,
@Nullable String text,
@Nullable String imageUrl,
@Nullable String audioData,
@Nullable String audioFormat) {
this.type = type;
this.text = text;
this.imageUrl = imageUrl;
this.audioData = audioData;
this.audioFormat = audioFormat;
}
/**
* Build a text part.
*
* @param text the text fragment (must not be {@code null})
* @return a TEXT part wrapping {@code text}
*/
public static ContentPart text(String text) {
Objects.requireNonNull(text, "text");
return new ContentPart(Type.TEXT, text, null, null, null);
}
/**
* Build an image part from a pre-formed URL or data URI. Pass either an
* HTTP(S) URL (if the server is configured to fetch it) or a complete
* {@code data:image/...;base64,...} string.
*
* @param url image URL or data URI (must not be {@code null})
* @return an IMAGE_URL part wrapping {@code url}
*/
public static ContentPart imageUrl(String url) {
Objects.requireNonNull(url, "url");
return new ContentPart(Type.IMAGE_URL, null, url, null, null);
}
/**
* Build an image part from raw bytes plus an explicit MIME type. The bytes
* are base64-encoded and wrapped in a {@code data:} URI.
*
* @param bytes raw image bytes (must not be {@code null})
* @param mimeType MIME type, e.g. {@code "image/png"} (must not be {@code null} or empty)
* @return an IMAGE_URL part carrying the data URI
*/
public static ContentPart imageBytes(byte[] bytes, String mimeType) {
Objects.requireNonNull(bytes, "bytes");
Objects.requireNonNull(mimeType, "mimeType");
if (mimeType.isEmpty()) {
throw new IllegalArgumentException("mimeType must not be empty (bytes.length=" + bytes.length + ")");
}
String encoded = Base64.getEncoder().encodeToString(bytes);
return new ContentPart(Type.IMAGE_URL, null, "data:" + mimeType + ";base64," + encoded, null, null);
}
/**
* Build an image part by reading a file from disk and detecting its MIME
* type from the file extension. Recognised extensions: {@code .png},
* {@code .jpg}, {@code .jpeg}, {@code .webp}, {@code .gif}. Anything else
* throws {@link IllegalArgumentException}; use {@link #imageBytes(byte[], String)}
* to force a MIME type explicitly.
*
* @param imagePath path to the image file (must not be {@code null})
* @return an IMAGE_URL part carrying the data URI
* @throws IOException if the file cannot be read
*/
public static ContentPart imageFile(Path imagePath) throws IOException {
Objects.requireNonNull(imagePath, "imagePath");
Path fileNamePath = imagePath.getFileName();
if (fileNamePath == null) {
throw new IllegalArgumentException("imagePath has no file name component: " + imagePath);
}
String name = fileNamePath.toString().toLowerCase(Locale.ROOT);
String mimeType;
if (name.endsWith(".png")) {
mimeType = "image/png";
} else if (name.endsWith(".jpg") || name.endsWith(".jpeg")) {
mimeType = "image/jpeg";
} else if (name.endsWith(".webp")) {
mimeType = "image/webp";
} else if (name.endsWith(".gif")) {
mimeType = "image/gif";
} else {
throw new IllegalArgumentException("Cannot infer MIME type from extension: " + imagePath
+ " — use ContentPart.imageBytes(bytes, mimeType) instead");
}
return imageBytes(Files.readAllBytes(imagePath), mimeType);
}
/**
* Build an audio part from raw bytes plus an explicit container format. Mirrors the OpenAI
* {@code input_audio} content part the upstream {@code llama.cpp} server understands, routed
* through the {@code mtmd} audio pipeline (requires an audio-capable {@code --mmproj}). The bytes
* are base64-encoded.
*
* @param audioBytes raw audio bytes (must not be {@code null})
* @param format container format, {@code "wav"} or {@code "mp3"} (case-insensitive)
* @return an INPUT_AUDIO part carrying the base64 data and normalised format
* @throws IllegalArgumentException if {@code format} is not {@code "wav"} or {@code "mp3"}
*/
public static ContentPart inputAudio(byte[] audioBytes, String format) {
Objects.requireNonNull(audioBytes, "audioBytes");
Objects.requireNonNull(format, "format");
String normalized = format.toLowerCase(Locale.ROOT);
if (!normalized.equals("wav") && !normalized.equals("mp3")) {
throw new IllegalArgumentException("audio format must be 'wav' or 'mp3', was: " + format);
}
String encoded = Base64.getEncoder().encodeToString(audioBytes);
return new ContentPart(Type.INPUT_AUDIO, null, null, encoded, normalized);
}
/**
* Build an audio part by reading a file from disk and detecting its format from the file
* extension. Recognised extensions: {@code .wav}, {@code .mp3}. Anything else throws
* {@link IllegalArgumentException}; use {@link #inputAudio(byte[], String)} to force a format.
*
* @param audioPath path to the audio file (must not be {@code null})
* @return an INPUT_AUDIO part carrying the data
* @throws IOException if the file cannot be read
*/
public static ContentPart audioFile(Path audioPath) throws IOException {
Objects.requireNonNull(audioPath, "audioPath");
Path fileNamePath = audioPath.getFileName();
if (fileNamePath == null) {
throw new IllegalArgumentException("audioPath has no file name component: " + audioPath);
}
String name = fileNamePath.toString().toLowerCase(Locale.ROOT);
String format;
if (name.endsWith(".wav")) {
format = "wav";
} else if (name.endsWith(".mp3")) {
format = "mp3";
} else {
throw new IllegalArgumentException("Cannot infer audio format from extension: " + audioPath
+ " — use ContentPart.inputAudio(bytes, format) instead");
}
return inputAudio(Files.readAllBytes(audioPath), format);
}
/**
* Part-kind accessor.
* @return the discriminator selecting {@link #getText()} or {@link #getImageUrl()}
*/
public Type getType() {
return type;
}
/**
* Text accessor (only set for {@link Type#TEXT}).
* @return the text fragment, or {@code null} for {@link Type#IMAGE_URL} parts
*/
public @Nullable String getText() {
return text;
}
/**
* Image URL accessor (only set for {@link Type#IMAGE_URL}).
* @return the URL or data URI, or {@code null} for {@link Type#TEXT} parts
*/
public @Nullable String getImageUrl() {
return imageUrl;
}
/**
* Base64 audio-data accessor (only set for {@link Type#INPUT_AUDIO}).
* @return the base64-encoded audio bytes, or {@code null} for non-audio parts
*/
public @Nullable String getAudioData() {
return audioData;
}
/**
* Audio container-format accessor (only set for {@link Type#INPUT_AUDIO}).
* @return {@code "wav"} or {@code "mp3"}, or {@code null} for non-audio parts
*/
public @Nullable String getAudioFormat() {
return audioFormat;
}
}