@@ -9,22 +9,40 @@ import {
99 isInteractive ,
1010 type Config ,
1111 type GlobalFlags ,
12+ type KnowledgeChatContentPart ,
13+ type KnowledgeChatMessage ,
1214 type KnowledgeChatRequest ,
1315 type KnowledgeChatStreamChunk ,
1416} from "bailian-cli-core" ;
1517import { failIfMissing , cmdUsage , emitResult , emitBare , promptText } from "bailian-cli-runtime" ;
1618
17- interface ParsedMessage {
18- role : "user" | "assistant" ;
19- content : string ;
20- }
21-
22- function parseMessages ( flags : GlobalFlags ) : ParsedMessage [ ] {
23- const messages : ParsedMessage [ ] = [ ] ;
19+ /**
20+ * Parse --message flags into KnowledgeChatMessage[].
21+ * Supports:
22+ * 1. Simple text: "hello" → {role:"user", content:"hello"}
23+ * 2. Role prefix: "user:hello" / "assistant:hi" → {role, content}
24+ * 3. JSON object: '{"role":"user","content":[...]}' → structured message (advanced)
25+ */
26+ function parseMessages ( flags : GlobalFlags ) : KnowledgeChatMessage [ ] {
27+ const messages : KnowledgeChatMessage [ ] = [ ] ;
2428 if ( flags . message ) {
2529 const validRoles = new Set ( [ "user" , "assistant" ] ) ;
2630 const msgs = flags . message as string [ ] ;
2731 for ( const m of msgs ) {
32+ // Try JSON object first (advanced usage)
33+ if ( m . startsWith ( "{" ) ) {
34+ try {
35+ const parsed = JSON . parse ( m ) as { role ?: string ; content ?: unknown } ;
36+ if ( parsed . role && validRoles . has ( parsed . role ) && parsed . content !== undefined ) {
37+ messages . push ( parsed as KnowledgeChatMessage ) ;
38+ continue ;
39+ }
40+ } catch {
41+ // Not valid JSON, fall through to simple parsing
42+ }
43+ }
44+
45+ // Simple role:content or plain text
2846 const colonIdx = m . indexOf ( ":" ) ;
2947 const maybeRole = colonIdx !== - 1 ? m . slice ( 0 , colonIdx ) : "" ;
3048
@@ -38,6 +56,55 @@ function parseMessages(flags: GlobalFlags): ParsedMessage[] {
3856 return messages ;
3957}
4058
59+ /** Check if any message content already contains image_url parts */
60+ function hasEmbeddedImages ( messages : KnowledgeChatMessage [ ] ) : boolean {
61+ for ( const msg of messages ) {
62+ if ( Array . isArray ( msg . content ) ) {
63+ if ( msg . content . some ( ( p ) => p . type === "image_url" ) ) return true ;
64+ }
65+ }
66+ return false ;
67+ }
68+
69+ /** Attach --image URLs to the last user message's content (as multimodal array) */
70+ function attachImagesToLastUserMessage (
71+ messages : KnowledgeChatMessage [ ] ,
72+ imageUrls : string [ ] ,
73+ ) : void {
74+ // Find last user message index
75+ let lastUserIdx = - 1 ;
76+ for ( let i = messages . length - 1 ; i >= 0 ; i -- ) {
77+ if ( messages [ i ] ! . role === "user" ) {
78+ lastUserIdx = i ;
79+ break ;
80+ }
81+ }
82+
83+ // If no user message exists, append an empty one
84+ if ( lastUserIdx === - 1 ) {
85+ messages . push ( { role : "user" , content : "" } ) ;
86+ lastUserIdx = messages . length - 1 ;
87+ }
88+
89+ const target = messages [ lastUserIdx ] ! ;
90+ const contentParts : KnowledgeChatContentPart [ ] = [ ] ;
91+
92+ // Preserve existing text content (always include a text part, even if empty)
93+ if ( typeof target . content === "string" ) {
94+ contentParts . push ( { type : "text" , text : target . content } ) ;
95+ } else {
96+ // Already an array, extend it
97+ contentParts . push ( ...target . content ) ;
98+ }
99+
100+ // Append image parts
101+ for ( const url of imageUrls ) {
102+ contentParts . push ( { type : "image_url" , image_url : { url } } ) ;
103+ }
104+
105+ target . content = contentParts ;
106+ }
107+
41108/** SSE step_change → human-friendly progress label (TTY only) */
42109const STEP_LABELS : Record < string , string > = {
43110 tool_calling : "🔍 Retrieving..." ,
@@ -67,7 +134,8 @@ export default defineCommand({
67134 } ,
68135 {
69136 flag : "--image <url>" ,
70- description : "Image URL(s) (repeatable)" ,
137+ description :
138+ "Image URL (repeatable). Attached to the last user message as multimodal content" ,
71139 type : "array" ,
72140 } ,
73141 ] ,
@@ -80,12 +148,19 @@ export default defineCommand({
80148 exampleArgs : [
81149 '--message "What is RAG?" --agent-id aid-xxx --workspace-id ws-xxx' ,
82150 '--message "user:What is RAG?" --message "assistant:RAG is..." --message "How does it work?" --agent-id aid-xxx --workspace-id ws-xxx' ,
151+ '--message "Describe these images" --image https://example.com/a.png --image https://example.com/b.png --agent-id aid-xxx --workspace-id ws-xxx' ,
83152 ] ,
84153 async run ( config : Config , flags : GlobalFlags ) {
85154 let messages = parseMessages ( flags ) ;
86155
156+ const imageUrls = flags . image as string [ ] | undefined ;
157+ const hasImages = imageUrls && imageUrls . length > 0 ;
158+
87159 if ( messages . length === 0 ) {
88- if ( isInteractive ( { nonInteractive : config . nonInteractive } ) ) {
160+ if ( hasImages ) {
161+ // --image without --message: create an empty user message to hold images
162+ messages = [ { role : "user" , content : "" } ] ;
163+ } else if ( isInteractive ( { nonInteractive : config . nonInteractive } ) ) {
89164 const hint = await promptText ( { message : "Enter your message:" } ) ;
90165 if ( ! hint ) {
91166 process . stderr . write ( "Chat cancelled.\n" ) ;
@@ -113,6 +188,17 @@ export default defineCommand({
113188 // API only supports SSE; streamOutput controls whether to print tokens in real-time
114189 const streamOutput = format === "text" && ! ! process . stdout . isTTY ;
115190
191+ // Attach --image URLs to messages (multimodal content array)
192+ if ( hasImages ) {
193+ if ( hasEmbeddedImages ( messages ) ) {
194+ throw new BailianError (
195+ "Cannot use --image when messages already contain embedded image_url content parts. Use one approach or the other." ,
196+ ExitCode . USAGE ,
197+ ) ;
198+ }
199+ attachImagesToLastUserMessage ( messages , imageUrls ! ) ;
200+ }
201+
116202 const body : KnowledgeChatRequest = {
117203 input : {
118204 messages,
@@ -125,11 +211,6 @@ export default defineCommand({
125211 stream : true ,
126212 } ;
127213
128- const imageUrls = flags . image as string [ ] | undefined ;
129- if ( imageUrls && imageUrls . length > 0 ) {
130- body . parameters . agent_options . image_list = imageUrls ;
131- }
132-
133214 const url = knowledgeChatEndpoint ( workspaceId ) ;
134215
135216 if ( config . dryRun ) {
0 commit comments