Skip to content

Commit 83d5e2a

Browse files
committed
feat(s2v): add fixed_min_side 1080p support and audio re-mux for SekoTalk
- wan_audio_runner: add 1080p tier to fixed_min_side resize mode - wan_audio_runner: read resize_mode/fixed_area from input_info first, fallback to config - wan_audio_runner: re-mux with original audio after local inference to replace 16kHz VARecorder output - input_info: add fixed_area field to S2VInputInfo
1 parent dde9808 commit 83d5e2a

2 files changed

Lines changed: 30 additions & 6 deletions

File tree

lightx2v/models/runners/wan/wan_audio_runner.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import io
33
import json
44
import os
5+
import subprocess
56
import warnings
67
from dataclasses import dataclass
78
from typing import Dict, List, Optional, Tuple, Union
@@ -100,13 +101,15 @@ def resize_image(img, resize_mode="adaptive", bucket_shape=None, fixed_area=None
100101
target_h, target_w = bucket_config[closet_ratio][0]
101102
elif resize_mode == "fixed_min_side":
102103
min_side = 720
103-
if fixed_area == "720p":
104+
if fixed_area == "1080p":
105+
min_side = 1080
106+
elif fixed_area == "720p":
104107
min_side = 720
105108
elif fixed_area == "480p":
106109
min_side = 480
107110
else:
108-
logger.warning(f"[wan_audio] fixed_area is not '480p' or '720p', using default 480p: {fixed_area}")
109-
min_side = 480
111+
logger.warning(f"[wan_audio] fixed_area is not '480p', '720p' or '1080p', using default 720p: {fixed_area}")
112+
min_side = 720
110113
if ori_ratio < 1.0:
111114
target_h = min_side
112115
target_w = round(target_h / ori_ratio)
@@ -392,9 +395,9 @@ def read_image_input(self, img_path):
392395

393396
ref_img, h, w = resize_image(
394397
ref_img,
395-
resize_mode=self.config.get("resize_mode", "adaptive"),
398+
resize_mode=getattr(self.input_info, "resize_mode", None) or self.config.get("resize_mode", "adaptive"),
396399
bucket_shape=self.config.get("bucket_shape", None),
397-
fixed_area=self.config.get("fixed_area", None),
400+
fixed_area=getattr(self.input_info, "fixed_area", None) or self.config.get("fixed_area", None),
398401
fixed_shape=self.config.get("fixed_shape", None),
399402
)
400403
logger.info(f"[wan_audio] resize_image target_h: {h}, target_w: {w}")
@@ -729,7 +732,27 @@ def run_main(self):
729732

730733
# fixed audio segments inputs
731734
if self.va_controller.reader is None:
732-
return super().run_main()
735+
# Save paths before super().run_main() clears input_info
736+
out_path = getattr(self.input_info, "save_result_path", None)
737+
orig_audio = (getattr(self.input_info, "audio_path", "") or "").split(",")[0].strip() or None
738+
result = super().run_main()
739+
# Stop VARecorder so ffmpeg finishes writing the file
740+
if self.va_controller is not None:
741+
self.va_controller.clear()
742+
self.va_controller = None
743+
# Re-mux with original audio to replace 16kHz audio
744+
if out_path and orig_audio and os.path.isfile(out_path) and os.path.isfile(orig_audio):
745+
try:
746+
tmp = out_path + ".remux.mp4"
747+
cmd = ["ffmpeg", "-y", "-i", out_path, "-i", orig_audio,
748+
"-c:v", "copy", "-c:a", "copy",
749+
"-map", "0:v:0", "-map", "1:a:0", "-shortest", tmp]
750+
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
751+
os.replace(tmp, out_path)
752+
logger.info(f"[wan_audio] Re-muxed with original audio: {orig_audio}")
753+
except Exception as exc:
754+
logger.warning(f"[wan_audio] Re-mux failed: {exc}")
755+
return result
733756

734757
self.va_controller.start()
735758
self.init_run()

lightx2v/utils/input_info.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class S2VInputInfo:
119119
stream_config: dict = field(default_factory=dict)
120120
# shape related
121121
resize_mode: str = field(default_factory=str)
122+
fixed_area: str = field(default_factory=str)
122123
original_shape: list = field(default_factory=list)
123124
resized_shape: list = field(default_factory=list)
124125
latent_shape: list = field(default_factory=list)

0 commit comments

Comments
 (0)