From 166438ce71ebd600d0e0d9056961709fc983ea3e Mon Sep 17 00:00:00 2001 From: zhenggf Date: Tue, 30 Jun 2026 10:43:52 +0800 Subject: [PATCH 1/3] perf: optimize ffmpeg video saving (cherry picked from commit 673a80dc1c566109f90e9b8068eba864b42c9aa9) --- lightx2v/utils/utils.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py index 2087e99e1..8a1df1126 100755 --- a/lightx2v/utils/utils.py +++ b/lightx2v/utils/utils.py @@ -260,6 +260,7 @@ def save_to_video( # Get ffmpeg executable from imageio_ffmpeg ffmpeg_exe = ffmpeg.get_ffmpeg_exe() out_pix = output_pix_fmt or "yuv420p" + ffmpeg_preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip() if lossless: command = [ @@ -283,9 +284,10 @@ def save_to_video( "libx264rgb", "-crf", "0", - "-an", # No audio - output_path, ] + if ffmpeg_preset: + command.extend(["-preset", ffmpeg_preset]) + command.extend(["-an", output_path]) # No audio else: command = [ ffmpeg_exe, @@ -308,9 +310,10 @@ def save_to_video( "libx264", "-pix_fmt", out_pix, - "-an", # No audio - output_path, ] + if ffmpeg_preset: + command.extend(["-preset", ffmpeg_preset]) + command.extend(["-an", output_path]) # No audio # Run FFmpeg (stderr to DEVNULL: avoids pipe buffer deadlock; no need to capture for errors) process = subprocess.Popen( @@ -322,14 +325,15 @@ def save_to_video( if process.stdin is None: raise BrokenPipeError("No stdin buffer received.") - # Write frames to FFmpeg - for frame in frames: - # Pad frame if needed - if frame.shape[0] < height or frame.shape[1] < width: - padded = np.zeros((height, width, 3), dtype=np.uint8) - padded[: frame.shape[0], : frame.shape[1]] = frame - frame = padded - process.stdin.write(frame.tobytes()) + if frames.shape[1] == height and frames.shape[2] == width: + process.stdin.write(np.ascontiguousarray(frames).tobytes()) + else: + for frame in frames: + if frame.shape[0] < height or frame.shape[1] < width: + padded = np.zeros((height, width, 3), dtype=np.uint8) + padded[: frame.shape[0], : frame.shape[1]] = frame + frame = padded + process.stdin.write(frame.tobytes()) process.stdin.close() process.wait() From 3e020da0c30a5dd767c33c6926d5a173d6f18df5 Mon Sep 17 00:00:00 2001 From: zhenggf Date: Tue, 30 Jun 2026 14:27:38 +0800 Subject: [PATCH 2/3] perf: avoid extra copies when streaming frames to ffmpeg --- lightx2v/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py index 8a1df1126..e0c40165a 100755 --- a/lightx2v/utils/utils.py +++ b/lightx2v/utils/utils.py @@ -326,14 +326,14 @@ def save_to_video( raise BrokenPipeError("No stdin buffer received.") if frames.shape[1] == height and frames.shape[2] == width: - process.stdin.write(np.ascontiguousarray(frames).tobytes()) + process.stdin.write(np.ascontiguousarray(frames)) else: for frame in frames: if frame.shape[0] < height or frame.shape[1] < width: padded = np.zeros((height, width, 3), dtype=np.uint8) padded[: frame.shape[0], : frame.shape[1]] = frame frame = padded - process.stdin.write(frame.tobytes()) + process.stdin.write(np.ascontiguousarray(frame)) process.stdin.close() process.wait() From 498f09f9a559d1483d69960b14e29c94350394d0 Mon Sep 17 00:00:00 2001 From: zhenggf Date: Wed, 1 Jul 2026 17:04:20 +0800 Subject: [PATCH 3/3] docs: clarify ffmpeg preset handling --- lightx2v/utils/utils.py | 42 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py index e0c40165a..32b253fc5 100755 --- a/lightx2v/utils/utils.py +++ b/lightx2v/utils/utils.py @@ -22,6 +22,34 @@ torch_device_module = getattr(torch, AI_DEVICE) +_FFMPEG_PRESETS = ( + "ultrafast", + "superfast", + "veryfast", + "faster", + "fast", + "medium", + "slow", + "slower", + "veryslow", + "placebo", +) + + +def _get_ffmpeg_preset(): + preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip().lower() + if not preset: + return None + if preset not in _FFMPEG_PRESETS: + logger.warning( + "Invalid LIGHTX2V_FFMPEG_PRESET={!r}; expected one of {}. Ignoring it.", + preset, + ", ".join(_FFMPEG_PRESETS), + ) + return None + return preset + + def is_main_process(): return not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0 @@ -231,6 +259,12 @@ def save_to_video( method: Save method - "imageio" or "ffmpeg" lossless: Whether to use lossless encoding (ffmpeg method only) output_pix_fmt: Pixel format for output (ffmpeg method only) + + Environment: + LIGHTX2V_FFMPEG_PRESET: Optional x264 preset. Allowed values are + ultrafast, superfast, veryfast, faster, fast, medium, slow, + slower, veryslow, and placebo. Faster presets reduce encoding + latency but usually produce larger files. """ assert images.dim() == 4 and images.shape[-1] == 3, "Input must be [N, H, W, C] with C=3" @@ -260,7 +294,7 @@ def save_to_video( # Get ffmpeg executable from imageio_ffmpeg ffmpeg_exe = ffmpeg.get_ffmpeg_exe() out_pix = output_pix_fmt or "yuv420p" - ffmpeg_preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip() + ffmpeg_preset = _get_ffmpeg_preset() if lossless: command = [ @@ -287,7 +321,7 @@ def save_to_video( ] if ffmpeg_preset: command.extend(["-preset", ffmpeg_preset]) - command.extend(["-an", output_path]) # No audio + command.extend(["-an", output_path]) # Keep the existing no-audio output behavior. else: command = [ ffmpeg_exe, @@ -313,7 +347,7 @@ def save_to_video( ] if ffmpeg_preset: command.extend(["-preset", ffmpeg_preset]) - command.extend(["-an", output_path]) # No audio + command.extend(["-an", output_path]) # Keep the existing no-audio output behavior. # Run FFmpeg (stderr to DEVNULL: avoids pipe buffer deadlock; no need to capture for errors) process = subprocess.Popen( @@ -326,10 +360,12 @@ def save_to_video( raise BrokenPipeError("No stdin buffer received.") if frames.shape[1] == height and frames.shape[2] == width: + # Fast path: stream the whole contiguous frame buffer without per-frame bytes copies. process.stdin.write(np.ascontiguousarray(frames)) else: for frame in frames: if frame.shape[0] < height or frame.shape[1] < width: + # H.264/yuv420p requires even dimensions, so pad odd-sized frames before encoding. padded = np.zeros((height, width, 3), dtype=np.uint8) padded[: frame.shape[0], : frame.shape[1]] = frame frame = padded