@@ -757,6 +757,48 @@ def _create_prompt_from_embeddings(
757757
758758 return prompt , sequence_length , embeddings_tensor
759759
760+ async def _load_image_batch (
761+ self , image_mm_items : list [Dict [str , Any ]]
762+ ) -> list [Any ]:
763+ """
764+ Load a batch of images from multimodal data items.
765+
766+ Args:
767+ image_mm_items: List of multimodal data items for images
768+ Returns:
769+ List of loaded image data
770+ Raises:
771+ Exception: If any image fails to load
772+ """
773+ image_futures = []
774+ for item in image_mm_items :
775+ if isinstance (item , dict ) and URL_VARIANT_KEY in item :
776+ url = item [URL_VARIANT_KEY ]
777+ image_futures .append (self .image_loader .load_image (url ))
778+ logger .debug (f"Preparing to load image from URL: { url [:80 ]} ..." )
779+ elif isinstance (item , dict ) and DECODED_VARIANT_KEY in item :
780+ logger .warning (
781+ "Decoded multimodal data not yet supported in standard worker"
782+ )
783+
784+ results = await asyncio .gather (* image_futures , return_exceptions = True )
785+ loaded_images = []
786+ collective_exceptions = ""
787+ for i , result in enumerate (results ):
788+ if isinstance (result , Exception ):
789+ url = image_mm_items [i ].get (URL_VARIANT_KEY , "unknown" )
790+ logger .error (f"Failed to load image from { url [:80 ]} ...: { result } " )
791+ collective_exceptions += (
792+ f"Failed to load image from { url [:80 ]} ...: { result } \n "
793+ )
794+ continue
795+ loaded_images .append (result )
796+
797+ if collective_exceptions :
798+ raise Exception (collective_exceptions )
799+
800+ return loaded_images
801+
760802 async def _extract_multimodal_data (
761803 self , request : Dict [str , Any ]
762804 ) -> Dict [str , Any ] | None :
@@ -777,25 +819,7 @@ async def _extract_multimodal_data(
777819 vllm_mm_data = {}
778820
779821 # Process image_url entries
780- images = []
781- for item in mm_map .get (IMAGE_URL_KEY , []):
782- if isinstance (item , dict ) and URL_VARIANT_KEY in item :
783- url = item [URL_VARIANT_KEY ]
784- try :
785- # ImageLoader supports both data: and http(s): URLs with caching
786- image = await self .image_loader .load_image (url )
787- images .append (image )
788- logger .debug (f"Loaded image from URL: { url [:80 ]} ..." )
789- except Exception :
790- logger .exception (f"Failed to load image from { url [:80 ]} ..." )
791- raise
792- elif isinstance (item , dict ) and DECODED_VARIANT_KEY in item :
793- # Decoded support from PRs #3971/#3988 (frontend decoding + NIXL transfer)
794- # Will contain NIXL metadata for direct memory access
795- # TODO: Implement NIXL read when PRs merge
796- logger .warning (
797- "Decoded multimodal data not yet supported in standard worker"
798- )
822+ images = await self ._load_image_batch (mm_map .get (IMAGE_URL_KEY , []))
799823
800824 if images :
801825 # vLLM expects single image or list
0 commit comments