From 166438ce71ebd600d0e0d9056961709fc983ea3e Mon Sep 17 00:00:00 2001
From: zhenggf <zhenggf@local>
Date: Tue, 30 Jun 2026 10:43:52 +0800
Subject: [PATCH 1/3] perf: optimize ffmpeg video saving

(cherry picked from commit 673a80dc1c566109f90e9b8068eba864b42c9aa9)
---
 lightx2v/utils/utils.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py
index 2087e99e1..8a1df1126 100755
--- a/lightx2v/utils/utils.py
+++ b/lightx2v/utils/utils.py
@@ -260,6 +260,7 @@ def save_to_video(
         # Get ffmpeg executable from imageio_ffmpeg
         ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
         out_pix = output_pix_fmt or "yuv420p"
+        ffmpeg_preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip()
 
         if lossless:
             command = [
@@ -283,9 +284,10 @@ def save_to_video(
                 "libx264rgb",
                 "-crf",
                 "0",
-                "-an",  # No audio
-                output_path,
             ]
+            if ffmpeg_preset:
+                command.extend(["-preset", ffmpeg_preset])
+            command.extend(["-an", output_path])  # No audio
         else:
             command = [
                 ffmpeg_exe,
@@ -308,9 +310,10 @@ def save_to_video(
                 "libx264",
                 "-pix_fmt",
                 out_pix,
-                "-an",  # No audio
-                output_path,
             ]
+            if ffmpeg_preset:
+                command.extend(["-preset", ffmpeg_preset])
+            command.extend(["-an", output_path])  # No audio
 
         # Run FFmpeg (stderr to DEVNULL: avoids pipe buffer deadlock; no need to capture for errors)
         process = subprocess.Popen(
@@ -322,14 +325,15 @@ def save_to_video(
         if process.stdin is None:
             raise BrokenPipeError("No stdin buffer received.")
 
-        # Write frames to FFmpeg
-        for frame in frames:
-            # Pad frame if needed
-            if frame.shape[0] < height or frame.shape[1] < width:
-                padded = np.zeros((height, width, 3), dtype=np.uint8)
-                padded[: frame.shape[0], : frame.shape[1]] = frame
-                frame = padded
-            process.stdin.write(frame.tobytes())
+        if frames.shape[1] == height and frames.shape[2] == width:
+            process.stdin.write(np.ascontiguousarray(frames).tobytes())
+        else:
+            for frame in frames:
+                if frame.shape[0] < height or frame.shape[1] < width:
+                    padded = np.zeros((height, width, 3), dtype=np.uint8)
+                    padded[: frame.shape[0], : frame.shape[1]] = frame
+                    frame = padded
+                process.stdin.write(frame.tobytes())
 
         process.stdin.close()
         process.wait()

From 3e020da0c30a5dd767c33c6926d5a173d6f18df5 Mon Sep 17 00:00:00 2001
From: zhenggf <zhenggf@local>
Date: Tue, 30 Jun 2026 14:27:38 +0800
Subject: [PATCH 2/3] perf: avoid extra copies when streaming frames to ffmpeg

---
 lightx2v/utils/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py
index 8a1df1126..e0c40165a 100755
--- a/lightx2v/utils/utils.py
+++ b/lightx2v/utils/utils.py
@@ -326,14 +326,14 @@ def save_to_video(
             raise BrokenPipeError("No stdin buffer received.")
 
         if frames.shape[1] == height and frames.shape[2] == width:
-            process.stdin.write(np.ascontiguousarray(frames).tobytes())
+            process.stdin.write(np.ascontiguousarray(frames))
         else:
             for frame in frames:
                 if frame.shape[0] < height or frame.shape[1] < width:
                     padded = np.zeros((height, width, 3), dtype=np.uint8)
                     padded[: frame.shape[0], : frame.shape[1]] = frame
                     frame = padded
-                process.stdin.write(frame.tobytes())
+                process.stdin.write(np.ascontiguousarray(frame))
 
         process.stdin.close()
         process.wait()

From 498f09f9a559d1483d69960b14e29c94350394d0 Mon Sep 17 00:00:00 2001
From: zhenggf <zhenggf@local>
Date: Wed, 1 Jul 2026 17:04:20 +0800
Subject: [PATCH 3/3] docs: clarify ffmpeg preset handling

---
 lightx2v/utils/utils.py | 42 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/lightx2v/utils/utils.py b/lightx2v/utils/utils.py
index e0c40165a..32b253fc5 100755
--- a/lightx2v/utils/utils.py
+++ b/lightx2v/utils/utils.py
@@ -22,6 +22,34 @@
 torch_device_module = getattr(torch, AI_DEVICE)
 
 
+_FFMPEG_PRESETS = (
+    "ultrafast",
+    "superfast",
+    "veryfast",
+    "faster",
+    "fast",
+    "medium",
+    "slow",
+    "slower",
+    "veryslow",
+    "placebo",
+)
+
+
+def _get_ffmpeg_preset():
+    preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip().lower()
+    if not preset:
+        return None
+    if preset not in _FFMPEG_PRESETS:
+        logger.warning(
+            "Invalid LIGHTX2V_FFMPEG_PRESET={!r}; expected one of {}. Ignoring it.",
+            preset,
+            ", ".join(_FFMPEG_PRESETS),
+        )
+        return None
+    return preset
+
+
 def is_main_process():
     return not dist.is_available() or not dist.is_initialized() or dist.get_rank() == 0
 
@@ -231,6 +259,12 @@ def save_to_video(
         method: Save method - "imageio" or "ffmpeg"
         lossless: Whether to use lossless encoding (ffmpeg method only)
         output_pix_fmt: Pixel format for output (ffmpeg method only)
+
+    Environment:
+        LIGHTX2V_FFMPEG_PRESET: Optional x264 preset. Allowed values are
+            ultrafast, superfast, veryfast, faster, fast, medium, slow,
+            slower, veryslow, and placebo. Faster presets reduce encoding
+            latency but usually produce larger files.
     """
     assert images.dim() == 4 and images.shape[-1] == 3, "Input must be [N, H, W, C] with C=3"
 
@@ -260,7 +294,7 @@ def save_to_video(
         # Get ffmpeg executable from imageio_ffmpeg
         ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
         out_pix = output_pix_fmt or "yuv420p"
-        ffmpeg_preset = os.environ.get("LIGHTX2V_FFMPEG_PRESET", "").strip()
+        ffmpeg_preset = _get_ffmpeg_preset()
 
         if lossless:
             command = [
@@ -287,7 +321,7 @@ def save_to_video(
             ]
             if ffmpeg_preset:
                 command.extend(["-preset", ffmpeg_preset])
-            command.extend(["-an", output_path])  # No audio
+            command.extend(["-an", output_path])  # Keep the existing no-audio output behavior.
         else:
             command = [
                 ffmpeg_exe,
@@ -313,7 +347,7 @@ def save_to_video(
             ]
             if ffmpeg_preset:
                 command.extend(["-preset", ffmpeg_preset])
-            command.extend(["-an", output_path])  # No audio
+            command.extend(["-an", output_path])  # Keep the existing no-audio output behavior.
 
         # Run FFmpeg (stderr to DEVNULL: avoids pipe buffer deadlock; no need to capture for errors)
         process = subprocess.Popen(
@@ -326,10 +360,12 @@ def save_to_video(
             raise BrokenPipeError("No stdin buffer received.")
 
         if frames.shape[1] == height and frames.shape[2] == width:
+            # Fast path: stream the whole contiguous frame buffer without per-frame bytes copies.
             process.stdin.write(np.ascontiguousarray(frames))
         else:
             for frame in frames:
                 if frame.shape[0] < height or frame.shape[1] < width:
+                    # H.264/yuv420p requires even dimensions, so pad odd-sized frames before encoding.
                     padded = np.zeros((height, width, 3), dtype=np.uint8)
                     padded[: frame.shape[0], : frame.shape[1]] = frame
                     frame = padded