@@ -106,6 +106,40 @@ def padding_resize(
106106
107107 return img_pad
108108
109+ def use_auto_target_shape (self ):
110+ return self .config .get ("auto_target_shape" , True )
111+
112+ def get_comfy_target_shape (self ):
113+ height = (int (self .config ["target_height" ]) // 16 ) * 16
114+ width = (int (self .config ["target_width" ]) // 16 ) * 16
115+ if height <= 0 or width <= 0 :
116+ raise ValueError (f"Invalid WanAnimate target shape: height={ height } , width={ width } " )
117+ return height , width
118+
119+ def center_crop_to_aspect (self , img , height , width ):
120+ ori_height , ori_width = img .shape [:2 ]
121+ target_aspect = width / height
122+ ori_aspect = ori_width / ori_height
123+ if ori_aspect > target_aspect :
124+ crop_width = max (1 , round (ori_height * target_aspect ))
125+ x0 = max (0 , (ori_width - crop_width ) // 2 )
126+ img = img [:, x0 : x0 + crop_width ]
127+ elif ori_aspect < target_aspect :
128+ crop_height = max (1 , round (ori_width / target_aspect ))
129+ y0 = max (0 , (ori_height - crop_height ) // 2 )
130+ img = img [y0 : y0 + crop_height ]
131+ return img
132+
133+ def comfy_resize (self , img , height , width , interpolation = cv2 .INTER_LANCZOS4 , crop = None ):
134+ if crop == "center" :
135+ img = self .center_crop_to_aspect (img , height = height , width = width )
136+ if img .shape [0 ] == height and img .shape [1 ] == width :
137+ return img
138+ return cv2 .resize (img , (width , height ), interpolation = interpolation )
139+
140+ def comfy_resize_frames (self , frames , height , width , interpolation = cv2 .INTER_LANCZOS4 , crop = None ):
141+ return np .stack ([self .comfy_resize (frame , height , width , interpolation = interpolation , crop = crop ) for frame in frames ])
142+
109143 def prepare_source (self , src_pose_path , src_face_path , src_ref_path ):
110144 pose_video_reader = VideoReader (src_pose_path )
111145 pose_len = len (pose_video_reader )
@@ -118,7 +152,14 @@ def prepare_source(self, src_pose_path, src_face_path, src_ref_path):
118152 face_images = face_video_reader .get_batch (face_idxs ).asnumpy ()
119153 height , width = cond_images [0 ].shape [:2 ]
120154 refer_images = cv2 .imread (src_ref_path )[..., ::- 1 ]
121- refer_images = self .padding_resize (refer_images , height = height , width = width )
155+ if self .use_auto_target_shape ():
156+ refer_images = self .padding_resize (refer_images , height = height , width = width )
157+ else :
158+ target_height , target_width = self .get_comfy_target_shape ()
159+ logger .info (f"WanAnimate uses config target shape: height={ target_height } , width={ target_width } " )
160+ cond_images = self .comfy_resize_frames (cond_images , target_height , target_width )
161+ refer_images = self .comfy_resize (refer_images , target_height , target_width )
162+ face_images = self .comfy_resize_frames (face_images , 512 , 512 , crop = "center" )
122163 return cond_images , face_images , refer_images
123164
124165 def prepare_source_for_replace (self , src_bg_path , src_mask_path ):
@@ -132,6 +173,10 @@ def prepare_source_for_replace(self, src_bg_path, src_mask_path):
132173 mask_idxs = list (range (mask_len ))
133174 mask_images = mask_video_reader .get_batch (mask_idxs ).asnumpy ()
134175 mask_images = mask_images [:, :, :, 0 ] / 255
176+ if not self .use_auto_target_shape ():
177+ target_height , target_width = self .get_comfy_target_shape ()
178+ bg_images = self .comfy_resize_frames (bg_images , target_height , target_width )
179+ mask_images = self .comfy_resize_frames (mask_images , target_height , target_width , interpolation = cv2 .INTER_NEAREST )
135180 return bg_images , mask_images
136181
137182 @ProfilingContext4DebugL2 ("Run Image Encoders" )
0 commit comments