@@ -560,23 +560,40 @@ def launch_visual_gen_server(
560560 visual_gen_args: Optional validated VisualGenArgs for model configuration.
561561 metadata_server_cfg: Optional metadata server configuration.
562562 """
563- logger .info (f"Initializing VisualGen ({ model } )" )
563+ # Reserve the listening (host, port) by binding the socket *before*
564+ # constructing the VisualGen pipeline, then hand the bound socket to
565+ # uvicorn. VisualGen initialization can take many minutes; if we deferred
566+ # the bind until uvicorn started, anything else on the host could grab the
567+ # port in that window and trtllm-serve would die at bind() time.
568+ addr_info = socket .getaddrinfo (host , port , socket .AF_UNSPEC ,
569+ socket .SOCK_STREAM )
570+ address_family = socket .AF_INET6 if all (
571+ [info [0 ] == socket .AF_INET6 for info in addr_info ]) else socket .AF_INET
572+ with socket .socket (address_family , socket .SOCK_STREAM ) as s :
573+ try :
574+ s .bind ((host , port ))
575+ except OSError as e :
576+ raise RuntimeError (f"Failed to bind socket to { host } :{ port } : { e } " )
564577
565- visual_gen_model = VisualGen ( model = model , args = visual_gen_args )
578+ logger . info ( f"Initializing VisualGen ( { model } )" )
566579
567- n_workers = visual_gen_model .args .parallel_config .n_workers
568- logger .info (f"World size: { n_workers } " )
569- logger .info (f"CFG size: { visual_gen_model .args .parallel_config .cfg_size } " )
570- logger .info (
571- f"Ulysses size: { visual_gen_model .args .parallel_config .ulysses_size } " )
580+ visual_gen_model = VisualGen (model = model , args = visual_gen_args )
572581
573- server = OpenAIServer (generator = visual_gen_model ,
574- model = model ,
575- server_role = ServerRole .VISUAL_GEN ,
576- metadata_server_cfg = metadata_server_cfg ,
577- tool_parser = None )
578- _apply_fastapi_middlewares (server .app , middleware )
579- asyncio .run (server (host , port ))
582+ n_workers = visual_gen_model .args .parallel_config .n_workers
583+ logger .info (f"World size: { n_workers } " )
584+ logger .info (
585+ f"CFG size: { visual_gen_model .args .parallel_config .cfg_size } " )
586+ logger .info (
587+ f"Ulysses size: { visual_gen_model .args .parallel_config .ulysses_size } "
588+ )
589+
590+ server = OpenAIServer (generator = visual_gen_model ,
591+ model = model ,
592+ server_role = ServerRole .VISUAL_GEN ,
593+ metadata_server_cfg = metadata_server_cfg ,
594+ tool_parser = None )
595+ _apply_fastapi_middlewares (server .app , middleware )
596+ asyncio .run (server (host , port , sockets = [s ]))
580597
581598
582599class ChoiceWithAlias (click .Choice ):
0 commit comments