@@ -169,22 +169,17 @@ def _print_friendly_context(self):
169169
170170 async def assemble_context (self ) -> dict :
171171 """将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。"""
172- # 构建内容块列表
173172 content_blocks = []
174173
175- # 1. 用户原始发言(OpenAI 建议:用户发言在前)
176174 if self .prompt and self .prompt .strip ():
177175 content_blocks .append ({"type" : "text" , "text" : self .prompt })
178176 elif self .image_urls :
179- # 如果没有文本但有图片,添加占位文本
180177 content_blocks .append ({"type" : "text" , "text" : "[图片]" })
181178
182- # 2. 额外的内容块(系统提醒、指令等)
183179 if self .extra_user_content_parts :
184180 for part in self .extra_user_content_parts :
185181 content_blocks .append (part .model_dump ())
186182
187- # 3. 图片内容
188183 if self .image_urls :
189184 for image_url in self .image_urls :
190185 if image_url .startswith ("http" ):
@@ -202,7 +197,6 @@ async def assemble_context(self) -> dict:
202197 {"type" : "image_url" , "image_url" : {"url" : image_data }},
203198 )
204199
205- # 只有当只有一个来自 prompt 的文本块且没有额外内容块时,才降级为简单格式以保持向后兼容
206200 if (
207201 len (content_blocks ) == 1
208202 and content_blocks [0 ]["type" ] == "text"
@@ -211,17 +205,56 @@ async def assemble_context(self) -> dict:
211205 ):
212206 return {"role" : "user" , "content" : content_blocks [0 ]["text" ]}
213207
214- # 否则返回多模态格式
215208 return {"role" : "user" , "content" : content_blocks }
216209
210+ @staticmethod
211+ def _detect_mime_type (header_bytes : bytes ) -> str :
212+ """根据文件头魔术字节检测图片的 MIME 类型,无法识别时回退到 image/jpeg"""
213+ if len (header_bytes ) >= 3 and header_bytes [:3 ] == b'\xff \xd8 \xff ' :
214+ return "image/jpeg"
215+ if len (header_bytes ) >= 8 and header_bytes [:8 ] == b'\x89 PNG\r \n \x1a \n ' :
216+ return "image/png"
217+ if len (header_bytes ) >= 4 and header_bytes [:4 ] == b'GIF8' :
218+ return "image/gif"
219+ if len (header_bytes ) >= 12 and header_bytes [:4 ] == b'RIFF' and header_bytes [8 :12 ] == b'WEBP' :
220+ return "image/webp"
221+ if len (header_bytes ) >= 2 and header_bytes [:2 ] == b'BM' :
222+ return "image/bmp"
223+ if len (header_bytes ) >= 4 and header_bytes [:4 ] in (b'II\x2a \x00 ' , b'MM\x00 \x2a ' ):
224+ return "image/tiff"
225+ if len (header_bytes ) >= 4 and header_bytes [:4 ] == b'\x00 \x00 \x01 \x00 ' :
226+ return "image/x-icon"
227+ if b'<svg' in header_bytes [:256 ].lower ():
228+ return "image/svg+xml"
229+ if len (header_bytes ) >= 12 and header_bytes [4 :12 ] == b'ftypavif' :
230+ return "image/avif"
231+ if len (header_bytes ) >= 12 and header_bytes [4 :8 ] == b'ftyp' :
232+ brand = header_bytes [8 :12 ]
233+ if brand in (b'heic' , b'heix' , b'hevc' , b'hevx' , b'mif1' ):
234+ return "image/heif"
235+ return "image/jpeg"
236+
217237 async def _encode_image_bs64 (self , image_url : str ) -> str :
218- """将图片转换为 base64"""
238+ """将图片转换为 base64,自动检测实际 MIME 类型 """
219239 if image_url .startswith ("base64://" ):
220- return image_url .replace ("base64://" , "data:image/jpeg;base64," )
240+ raw_b64 = image_url [len ("base64://" ):]
241+ try :
242+ sample = raw_b64 [:32 ]
243+ missing_padding = len (sample ) % 4
244+ if missing_padding :
245+ sample += '=' * (4 - missing_padding )
246+ header_bytes = base64 .b64decode (sample )
247+ mime_type = self ._detect_mime_type (header_bytes )
248+ except Exception :
249+ mime_type = "image/jpeg"
250+ return f"data:{ mime_type } ;base64,{ raw_b64 } "
251+
221252 with open (image_url , "rb" ) as f :
253+ header_bytes = f .read (16 )
254+ mime_type = self ._detect_mime_type (header_bytes )
255+ f .seek (0 )
222256 image_bs64 = base64 .b64encode (f .read ()).decode ("utf-8" )
223- return "data:image/jpeg;base64," + image_bs64
224- return ""
257+ return f"data:{ mime_type } ;base64,{ image_bs64 } "
225258
226259
227260@dataclass
@@ -362,7 +395,7 @@ def completion_text(self, value) -> None:
362395 comp
363396 for comp in self .result_chain .chain
364397 if not isinstance (comp , Comp .Plain )
365- ] # 清空 Plain 组件
398+ ]
366399 self .result_chain .chain .insert (0 , Comp .Plain (value ))
367400 else :
368401 self ._completion_text = value
@@ -397,7 +430,6 @@ def to_openai_to_calls_model(self) -> list[ToolCall]:
397430 name = self .tools_call_name [idx ],
398431 arguments = json .dumps (tool_call_arg ),
399432 ),
400- # the extra_content will not serialize if it's None when calling ToolCall.model_dump()
401433 extra_content = self .tools_call_extra_content .get (
402434 self .tools_call_ids [idx ]
403435 ),
0 commit comments