Skip to content

Commit 0b7e127

Browse files
authored
fix: [vLLM multimodel] launch image loading in parallel (#5444)
Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com>
1 parent 7ebd5f8 commit 0b7e127

1 file changed

Lines changed: 43 additions & 19 deletions

File tree

components/src/dynamo/vllm/handlers.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,48 @@ def _create_prompt_from_embeddings(
757757

758758
return prompt, sequence_length, embeddings_tensor
759759

760+
async def _load_image_batch(
761+
self, image_mm_items: list[Dict[str, Any]]
762+
) -> list[Any]:
763+
"""
764+
Load a batch of images from multimodal data items.
765+
766+
Args:
767+
image_mm_items: List of multimodal data items for images
768+
Returns:
769+
List of loaded image data
770+
Raises:
771+
Exception: If any image fails to load
772+
"""
773+
image_futures = []
774+
for item in image_mm_items:
775+
if isinstance(item, dict) and URL_VARIANT_KEY in item:
776+
url = item[URL_VARIANT_KEY]
777+
image_futures.append(self.image_loader.load_image(url))
778+
logger.debug(f"Preparing to load image from URL: {url[:80]}...")
779+
elif isinstance(item, dict) and DECODED_VARIANT_KEY in item:
780+
logger.warning(
781+
"Decoded multimodal data not yet supported in standard worker"
782+
)
783+
784+
results = await asyncio.gather(*image_futures, return_exceptions=True)
785+
loaded_images = []
786+
collective_exceptions = ""
787+
for i, result in enumerate(results):
788+
if isinstance(result, Exception):
789+
url = image_mm_items[i].get(URL_VARIANT_KEY, "unknown")
790+
logger.error(f"Failed to load image from {url[:80]}...: {result}")
791+
collective_exceptions += (
792+
f"Failed to load image from {url[:80]}...: {result}\n"
793+
)
794+
continue
795+
loaded_images.append(result)
796+
797+
if collective_exceptions:
798+
raise Exception(collective_exceptions)
799+
800+
return loaded_images
801+
760802
async def _extract_multimodal_data(
761803
self, request: Dict[str, Any]
762804
) -> Dict[str, Any] | None:
@@ -777,25 +819,7 @@ async def _extract_multimodal_data(
777819
vllm_mm_data = {}
778820

779821
# Process image_url entries
780-
images = []
781-
for item in mm_map.get(IMAGE_URL_KEY, []):
782-
if isinstance(item, dict) and URL_VARIANT_KEY in item:
783-
url = item[URL_VARIANT_KEY]
784-
try:
785-
# ImageLoader supports both data: and http(s): URLs with caching
786-
image = await self.image_loader.load_image(url)
787-
images.append(image)
788-
logger.debug(f"Loaded image from URL: {url[:80]}...")
789-
except Exception:
790-
logger.exception(f"Failed to load image from {url[:80]}...")
791-
raise
792-
elif isinstance(item, dict) and DECODED_VARIANT_KEY in item:
793-
# Decoded support from PRs #3971/#3988 (frontend decoding + NIXL transfer)
794-
# Will contain NIXL metadata for direct memory access
795-
# TODO: Implement NIXL read when PRs merge
796-
logger.warning(
797-
"Decoded multimodal data not yet supported in standard worker"
798-
)
822+
images = await self._load_image_batch(mm_map.get(IMAGE_URL_KEY, []))
799823

800824
if images:
801825
# vLLM expects single image or list

0 commit comments

Comments
 (0)