Skip to content

Commit bfee8be

Browse files
Merge pull request #1212 from 1wos:codex/adk-java-load-artifacts-mime-fallback
PiperOrigin-RevId: 931005823
2 parents 3abcf4f + a60c246 commit bfee8be

2 files changed

Lines changed: 208 additions & 2 deletions

File tree

core/src/main/java/com/google/adk/tools/LoadArtifactsTool.java

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import com.google.adk.models.LlmRequest;
2323
import com.google.common.collect.ImmutableList;
2424
import com.google.common.collect.ImmutableMap;
25+
import com.google.common.collect.ImmutableSet;
2526
import com.google.common.collect.Iterables;
27+
import com.google.genai.types.Blob;
2628
import com.google.genai.types.Content;
2729
import com.google.genai.types.FunctionDeclaration;
2830
import com.google.genai.types.FunctionResponse;
@@ -31,7 +33,9 @@
3133
import io.reactivex.rxjava3.core.Completable;
3234
import io.reactivex.rxjava3.core.Observable;
3335
import io.reactivex.rxjava3.core.Single;
36+
import java.nio.charset.StandardCharsets;
3437
import java.util.List;
38+
import java.util.Locale;
3539
import java.util.Map;
3640
import java.util.Objects;
3741
import java.util.Optional;
@@ -55,6 +59,12 @@
5559
*/
5660
public final class LoadArtifactsTool extends BaseTool {
5761
public static final LoadArtifactsTool INSTANCE = new LoadArtifactsTool();
62+
private static final ImmutableList<String> GEMINI_SUPPORTED_INLINE_MIME_PREFIXES =
63+
ImmutableList.of("image/", "audio/", "video/");
64+
private static final ImmutableSet<String> GEMINI_SUPPORTED_INLINE_MIME_TYPES =
65+
ImmutableSet.of("application/pdf");
66+
private static final ImmutableSet<String> TEXT_LIKE_MIME_TYPES =
67+
ImmutableSet.of("application/csv", "application/json", "application/xml");
5868

5969
public LoadArtifactsTool() {
6070
super("load_artifacts", "Loads the artifacts and adds them to the session.");
@@ -177,15 +187,75 @@ private Completable loadAndAppendIndividualArtifact(
177187
appendArtifactToLlmRequest(
178188
llmRequestBuilder,
179189
"Artifact " + artifactName + " is:",
190+
artifactName,
180191
actualArtifact)));
181192
}
182193

183194
private void appendArtifactToLlmRequest(
184-
LlmRequest.Builder llmRequestBuilder, String prefix, Part artifact) {
195+
LlmRequest.Builder llmRequestBuilder, String prefix, String artifactName, Part artifact) {
185196
llmRequestBuilder.contents(
186197
ImmutableList.<Content>builder()
187198
.addAll(llmRequestBuilder.build().contents())
188-
.add(Content.fromParts(Part.fromText(prefix), artifact))
199+
.add(Content.fromParts(Part.fromText(prefix), asSafePartForLlm(artifact, artifactName)))
189200
.build());
190201
}
202+
203+
private static String normalizeMimeType(String mimeType) {
204+
if (mimeType == null) {
205+
return "";
206+
}
207+
int separatorIndex = mimeType.indexOf(';');
208+
if (separatorIndex >= 0) {
209+
mimeType = mimeType.substring(0, separatorIndex);
210+
}
211+
return mimeType.trim();
212+
}
213+
214+
private static boolean isInlineMimeTypeSupported(String mimeType) {
215+
String normalized = normalizeMimeType(mimeType);
216+
if (normalized.isEmpty()) {
217+
return false;
218+
}
219+
if (GEMINI_SUPPORTED_INLINE_MIME_TYPES.contains(normalized)) {
220+
return true;
221+
}
222+
return GEMINI_SUPPORTED_INLINE_MIME_PREFIXES.stream().anyMatch(normalized::startsWith);
223+
}
224+
225+
private static Part asSafePartForLlm(Part artifact, String artifactName) {
226+
Optional<Blob> inlineData = artifact.inlineData();
227+
if (inlineData.isEmpty()) {
228+
return artifact;
229+
}
230+
231+
Blob blob = inlineData.get();
232+
if (isInlineMimeTypeSupported(blob.mimeType().orElse(null))) {
233+
return artifact;
234+
}
235+
236+
String mimeType = normalizeMimeType(blob.mimeType().orElse(null));
237+
if (mimeType.isEmpty()) {
238+
mimeType = "application/octet-stream";
239+
}
240+
241+
Optional<byte[]> data = blob.data();
242+
if (data.isEmpty()) {
243+
return Part.fromText(
244+
String.format(
245+
"[Artifact: %s, type: %s. No inline data was provided.]", artifactName, mimeType));
246+
}
247+
248+
if (mimeType.startsWith("text/") || TEXT_LIKE_MIME_TYPES.contains(mimeType)) {
249+
return Part.fromText(new String(data.get(), StandardCharsets.UTF_8));
250+
}
251+
252+
double sizeKb = data.get().length / 1024.0;
253+
return Part.fromText(
254+
String.format(
255+
Locale.US,
256+
"[Binary artifact: %s, type: %s, size: %.1f KB. Content cannot be displayed inline.]",
257+
artifactName,
258+
mimeType,
259+
sizeKb));
260+
}
191261
}

core/src/test/java/com/google/adk/tools/LoadArtifactsToolTest.java

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static org.mockito.ArgumentMatchers.anyInt;
55
import static org.mockito.ArgumentMatchers.anyString;
66
import static org.mockito.ArgumentMatchers.nullable;
7+
import static org.mockito.Mockito.doReturn;
78
import static org.mockito.Mockito.mock;
89
import static org.mockito.Mockito.never;
910
import static org.mockito.Mockito.spy;
@@ -17,13 +18,15 @@
1718
import com.google.adk.sessions.Session;
1819
import com.google.common.collect.ImmutableList;
1920
import com.google.common.collect.ImmutableMap;
21+
import com.google.genai.types.Blob;
2022
import com.google.genai.types.Content;
2123
import com.google.genai.types.FunctionDeclaration;
2224
import com.google.genai.types.FunctionResponse;
2325
import com.google.genai.types.Part;
2426
import com.google.genai.types.Schema;
2527
import io.reactivex.rxjava3.core.Maybe;
2628
import io.reactivex.rxjava3.core.Single;
29+
import java.nio.charset.StandardCharsets;
2730
import java.util.List;
2831
import java.util.Map;
2932
import java.util.Optional;
@@ -218,4 +221,137 @@ public void processLlmRequest_artifactsInContext_withOtherFunctionCall_doesNotLo
218221
.loadArtifact(anyString(), anyString(), anyString(), anyString(), anyInt());
219222
assertThat(finalRequest.contents()).containsExactly(functionCallContent);
220223
}
224+
225+
@Test
226+
public void processLlmRequest_unsupportedTextLikeMime_convertsToText() {
227+
String artifactName = "data.csv";
228+
String csvContent = "col1,col2\n1,2\n";
229+
Part artifactPart =
230+
processLoadArtifactRequest(
231+
artifactName,
232+
Part.fromBytes(
233+
csvContent.getBytes(StandardCharsets.UTF_8), "application/csv; charset=utf-8"));
234+
235+
assertThat(artifactPart.inlineData()).isEmpty();
236+
assertThat(artifactPart.text()).hasValue(csvContent);
237+
}
238+
239+
@Test
240+
public void processLlmRequest_supportedMime_keepsInlineData() {
241+
String artifactName = "file.pdf";
242+
byte[] pdfBytes = "%PDF-1.4".getBytes(StandardCharsets.UTF_8);
243+
Part artifactPart =
244+
processLoadArtifactRequest(artifactName, Part.fromBytes(pdfBytes, "application/pdf"));
245+
246+
assertThat(artifactPart.inlineData()).isPresent();
247+
assertThat(artifactPart.inlineData().get().mimeType()).hasValue("application/pdf");
248+
assertThat(artifactPart.inlineData().get().data().get()).isEqualTo(pdfBytes);
249+
}
250+
251+
@Test
252+
public void processLlmRequest_unsupportedBinaryMime_convertsToPlaceholder() {
253+
String artifactName = "slides.pptx";
254+
Part artifactPart =
255+
processLoadArtifactRequest(
256+
artifactName,
257+
Part.fromBytes(
258+
new byte[] {1, 2, 3},
259+
"application/vnd.openxmlformats-officedocument.presentationml.presentation"));
260+
261+
assertThat(artifactPart.inlineData()).isEmpty();
262+
assertThat(artifactPart.text())
263+
.hasValue(
264+
"[Binary artifact: slides.pptx, type:"
265+
+ " application/vnd.openxmlformats-officedocument.presentationml.presentation,"
266+
+ " size: 0.0 KB. Content cannot be displayed inline.]");
267+
}
268+
269+
@Test
270+
public void processLlmRequest_unsupportedMimeWithoutInlineData_convertsToNoDataPlaceholder() {
271+
String artifactName = "empty.bin";
272+
Part artifactPart =
273+
processLoadArtifactRequest(
274+
artifactName,
275+
Part.builder()
276+
.inlineData(Blob.builder().mimeType("application/octet-stream").build())
277+
.build());
278+
279+
assertThat(artifactPart.inlineData()).isEmpty();
280+
assertThat(artifactPart.text())
281+
.hasValue(
282+
"[Artifact: empty.bin, type: application/octet-stream."
283+
+ " No inline data was provided.]");
284+
}
285+
286+
@Test
287+
public void processLlmRequest_emptyMime_defaultsToOctetStream() {
288+
String artifactName = "unknown";
289+
Part artifactPart =
290+
processLoadArtifactRequest(
291+
artifactName,
292+
Part.fromBytes(new byte[] {(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, ""));
293+
294+
assertThat(artifactPart.inlineData()).isEmpty();
295+
assertThat(artifactPart.text())
296+
.hasValue(
297+
"[Binary artifact: unknown, type: application/octet-stream,"
298+
+ " size: 0.0 KB. Content cannot be displayed inline.]");
299+
}
300+
301+
@Test
302+
public void processLlmRequest_nullMime_defaultsToOctetStream() {
303+
String artifactName = "mystery";
304+
Part artifactPart =
305+
processLoadArtifactRequest(
306+
artifactName,
307+
Part.builder()
308+
.inlineData(
309+
Blob.builder()
310+
.data(new byte[] {(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF})
311+
.build())
312+
.build());
313+
314+
assertThat(artifactPart.inlineData()).isEmpty();
315+
assertThat(artifactPart.text())
316+
.hasValue(
317+
"[Binary artifact: mystery, type: application/octet-stream,"
318+
+ " size: 0.0 KB. Content cannot be displayed inline.]");
319+
}
320+
321+
private Part processLoadArtifactRequest(String artifactName, Part loadedArtifactPart) {
322+
ImmutableList<String> availableArtifacts = ImmutableList.of(artifactName);
323+
ImmutableList<String> artifactsToLoad = ImmutableList.of(artifactName);
324+
325+
FunctionResponse functionResponse =
326+
FunctionResponse.builder()
327+
.name("load_artifacts")
328+
.response(ImmutableMap.of("artifact_names", artifactsToLoad))
329+
.build();
330+
Content functionCallContent =
331+
Content.builder()
332+
.role("model")
333+
.parts(
334+
ImmutableList.of(
335+
Part.fromFunctionResponse(
336+
functionResponse.name().get(), functionResponse.response().get())))
337+
.build();
338+
llmRequestBuilder.contents(ImmutableList.of(functionCallContent));
339+
340+
ToolContext spiedToolContext = spy(ToolContext.builder(mockInvocationContext).build());
341+
doReturn(Single.just(availableArtifacts)).when(spiedToolContext).listArtifacts();
342+
doReturn(Maybe.just(loadedArtifactPart)).when(spiedToolContext).loadArtifact(artifactName);
343+
344+
loadArtifactsTool.processLlmRequest(llmRequestBuilder, spiedToolContext).blockingAwait();
345+
verify(spiedToolContext).loadArtifact(artifactName);
346+
347+
LlmRequest finalRequest = llmRequestBuilder.build();
348+
assertThat(finalRequest.contents()).hasSize(2);
349+
Content appendedContent = finalRequest.contents().get(1);
350+
assertThat(appendedContent.role()).hasValue("user");
351+
assertThat(appendedContent.parts()).isPresent();
352+
assertThat(appendedContent.parts().get()).hasSize(2);
353+
assertThat(appendedContent.parts().get().get(0).text())
354+
.hasValue("Artifact " + artifactName + " is:");
355+
return appendedContent.parts().get().get(1);
356+
}
221357
}

0 commit comments

Comments
 (0)