1919import io .agentscope .core .agui .model .AguiFunctionCall ;
2020import io .agentscope .core .agui .model .AguiMessage ;
2121import io .agentscope .core .agui .model .AguiToolCall ;
22+ import io .agentscope .core .message .AudioBlock ;
23+ import io .agentscope .core .message .Base64Source ;
2224import io .agentscope .core .message .ContentBlock ;
25+ import io .agentscope .core .message .ImageBlock ;
2326import io .agentscope .core .message .Msg ;
2427import io .agentscope .core .message .MsgRole ;
28+ import io .agentscope .core .message .Source ;
2529import io .agentscope .core .message .TextBlock ;
2630import io .agentscope .core .message .ToolResultBlock ;
2731import io .agentscope .core .message .ToolUseBlock ;
32+ import io .agentscope .core .message .URLSource ;
33+ import io .agentscope .core .message .VideoBlock ;
2834import io .agentscope .core .util .JsonException ;
2935import io .agentscope .core .util .JsonUtils ;
3036import java .util .ArrayList ;
3743 *
3844 * <p>This class handles the bidirectional conversion between the AG-UI protocol's
3945 * message format and AgentScope's internal message format.
46+ *
47+ * <p>Supports multimodal input per AG-UI protocol:
48+ * <ul>
49+ * <li>{@code text} → {@link TextBlock}</li>
50+ * <li>{@code image} → {@link ImageBlock}</li>
51+ * <li>{@code video} → {@link VideoBlock}</li>
52+ * <li>{@code audio} → {@link AudioBlock}</li>
53+ * <li>{@code document} → {@link TextBlock} (with description)</li>
54+ * </ul>
55+ *
56+ * <p>See https://docs.ag-ui.com/concepts/messages.md for AG-UI InputContent spec.
4057 */
4158public class AguiMessageConverter {
4259 /**
@@ -54,8 +71,20 @@ public Msg toMsg(AguiMessage aguiMessage) {
5471 MsgRole role = convertRole (aguiMessage .getRole ());
5572 List <ContentBlock > blocks = new ArrayList <>();
5673
57- // Add text content if present
58- if (aguiMessage .getContent () != null && !aguiMessage .getContent ().isEmpty ()) {
74+ // Handle multimodal content (InputContent array per AG-UI protocol)
75+ if (aguiMessage .isMultimodalContent ()) {
76+ List <Map <String , Object >> parts = aguiMessage .getMultimodalContent ();
77+ if (parts != null ) {
78+ for (Map <String , Object > part : parts ) {
79+ ContentBlock block = convertInputContent (part );
80+ if (block != null ) {
81+ blocks .add (block );
82+ }
83+ }
84+ }
85+ }
86+ // Handle simple text content (backward compatible)
87+ else if (aguiMessage .getContent () != null && !aguiMessage .getContent ().isEmpty ()) {
5988 if (aguiMessage .isToolMessage () && aguiMessage .getToolCallId () != null ) {
6089 // For tool messages, wrap content in ToolResultBlock
6190 blocks .add (
@@ -78,6 +107,91 @@ public Msg toMsg(AguiMessage aguiMessage) {
78107 return Msg .builder ().id (aguiMessage .getId ()).role (role ).content (blocks ).build ();
79108 }
80109
110+ /**
111+ * Convert a single AG-UI InputContent part to an AgentScope ContentBlock.
112+ *
113+ * @param part The InputContent map from AG-UI protocol
114+ * @return The converted ContentBlock, or null if type is unrecognized
115+ */
116+ @ SuppressWarnings ("unchecked" )
117+ private ContentBlock convertInputContent (Map <String , Object > part ) {
118+ String type = (String ) part .get ("type" );
119+ if (type == null ) {
120+ return null ;
121+ }
122+
123+ switch (type ) {
124+ case "text" :
125+ String text = (String ) part .get ("text" );
126+ return text != null ? TextBlock .builder ().text (text ).build () : null ;
127+
128+ case "image" :
129+ Source source = extractSource (part );
130+ return source != null ? ImageBlock .builder ().source (source ).build () : null ;
131+
132+ case "video" :
133+ Source videoSource = extractSource (part );
134+ return videoSource != null
135+ ? VideoBlock .builder ().source (videoSource ).build ()
136+ : null ;
137+
138+ case "audio" :
139+ Source audioSource = extractSource (part );
140+ return audioSource != null
141+ ? AudioBlock .builder ().source (audioSource ).build ()
142+ : null ;
143+
144+ case "document" :
145+ // Convert document to TextBlock with description
146+ Source docSource = extractSource (part );
147+ if (docSource != null ) {
148+ String docDesc = "[Document: " + extractMimeType (part ) + "]" ;
149+ return TextBlock .builder ().text (docDesc ).build ();
150+ }
151+ return null ;
152+
153+ default :
154+ return null ;
155+ }
156+ }
157+
158+ /**
159+ * Extract Source from an InputContent part.
160+ * Supports both 'url' and 'data' (base64) source types.
161+ */
162+ @ SuppressWarnings ("unchecked" )
163+ private Source extractSource (Map <String , Object > part ) {
164+ Map <String , Object > sourceMap = (Map <String , Object >) part .get ("source" );
165+ if (sourceMap == null ) {
166+ return null ;
167+ }
168+
169+ String sourceType = (String ) sourceMap .get ("type" );
170+ if ("url" .equals (sourceType )) {
171+ String url = (String ) sourceMap .get ("value" );
172+ return url != null ? new URLSource (url ) : null ;
173+ } else if ("data" .equals (sourceType )) {
174+ String data = (String ) sourceMap .get ("value" );
175+ String mimeType = (String ) sourceMap .get ("mimeType" );
176+ if (data != null && mimeType != null ) {
177+ return new Base64Source (data , mimeType );
178+ }
179+ }
180+ return null ;
181+ }
182+
183+ /**
184+ * Extract mimeType from an InputContent part (for document type).
185+ */
186+ private String extractMimeType (Map <String , Object > part ) {
187+ @ SuppressWarnings ("unchecked" )
188+ Map <String , Object > sourceMap = (Map <String , Object >) part .get ("source" );
189+ if (sourceMap != null ) {
190+ return (String ) sourceMap .get ("mimeType" );
191+ }
192+ return null ;
193+ }
194+
81195 /**
82196 * Convert an AgentScope message to an AG-UI message.
83197 *
0 commit comments