Skip to content

Commit 71c946a

Browse files
committed
[backends] Fix OpenCV seeking with VFR videos
1 parent 3ebc734 commit 71c946a

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

scenedetect/backends/opencv.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,22 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
233233
target_secs = (self.base_timecode + target).seconds
234234
self._has_grabbed = False
235235
if target_secs > 0:
236-
# Use CAP_PROP_POS_MSEC for time-accurate seeking (correct for both CFR and VFR).
237-
# Seek one frame before the target so the next read() returns the frame at target.
236+
# Seek one frame before target so the next read() returns the frame at target.
238237
one_frame_ms = 1000.0 / float(self._frame_rate)
239238
seek_ms = max(0.0, target_secs * 1000.0 - one_frame_ms)
240239
self._cap.set(cv2.CAP_PROP_POS_MSEC, seek_ms)
241240
self._has_grabbed = self._cap.grab()
241+
if self._has_grabbed:
242+
# VFR correction: set(CAP_PROP_POS_MSEC) converts time using avg_fps internally,
243+
# which can land ~1s too early for VFR video. Read forward until we reach the
244+
# intended position. The threshold (2x one_frame_ms) never triggers for CFR.
245+
actual_ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
246+
corrections = 0
247+
while actual_ms < seek_ms - 2.0 * one_frame_ms and corrections < 100:
248+
if not self._cap.grab():
249+
break
250+
actual_ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
251+
corrections += 1
242252
# If we seeked past the end, back up one frame.
243253
if not self._has_grabbed:
244254
seek_pos = round(self._cap.get(cv2.CAP_PROP_POS_FRAMES) - 1.0)

tests/test_vfr.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515
import os
1616
import typing as ty
1717

18+
import cv2
19+
import numpy as np
1820
import pytest
1921

2022
from scenedetect import SceneManager, open_video
21-
from scenedetect.common import FrameTimecode, Timecode
23+
from scenedetect.common import Timecode
2224
from scenedetect.detectors import ContentDetector
25+
from scenedetect.output import save_images
2326
from scenedetect.stats_manager import StatsManager
2427

2528
# Expected scene cuts for `goldeneye-vfr.mp4` detected with ContentDetector() and end_time=10.0s.
@@ -203,3 +206,44 @@ def test_cfr_frame_num_exact(self, test_movie_clip: str):
203206
for expected_frame in range(1, 11):
204207
assert video.read() is not False
205208
assert video.position.frame_num == expected_frame - 1
209+
210+
def test_vfr_save_images_opencv_matches_pyav(self, test_vfr_video: str, tmp_path):
211+
"""OpenCV save-images thumbnails should match PyAV thumbnails for all scenes.
212+
213+
If the OpenCV seek off-by-one bug is present, scene thumbnails will show content from the
214+
wrong scene; MSE against PyAV (ground truth) will be very high for those scenes.
215+
"""
216+
# Run save-images for both backends with 1 image per scene for simplicity.
217+
scene_lists = {}
218+
for backend in ("pyav", "opencv"):
219+
out_dir = tmp_path / backend
220+
out_dir.mkdir()
221+
video = open_video(test_vfr_video, backend=backend)
222+
sm = SceneManager()
223+
sm.add_detector(ContentDetector())
224+
sm.detect_scenes(video=video)
225+
scene_lists[backend] = sm.get_scene_list()
226+
assert len(scene_lists[backend]) > 0
227+
save_images(scene_lists[backend], video, num_images=1, output_dir=str(out_dir))
228+
229+
pyav_imgs = sorted((tmp_path / "pyav").glob("*.jpg"))
230+
opencv_imgs = sorted((tmp_path / "opencv").glob("*.jpg"))
231+
assert len(pyav_imgs) > 0
232+
assert len(pyav_imgs) == len(opencv_imgs), (
233+
f"Image count mismatch: pyav={len(pyav_imgs)}, opencv={len(opencv_imgs)}"
234+
)
235+
236+
# Compare every corresponding thumbnail. Wrong-scene content produces very high MSE.
237+
MAX_MSE = 5000
238+
for pyav_path, opencv_path in zip(pyav_imgs, opencv_imgs, strict=False):
239+
img_pyav = cv2.imread(str(pyav_path))
240+
img_opencv = cv2.imread(str(opencv_path))
241+
assert img_pyav is not None, f"Failed to load {pyav_path}"
242+
assert img_opencv is not None, f"Failed to load {opencv_path}"
243+
if img_pyav.shape != img_opencv.shape:
244+
# Resize opencv image to match pyav dimensions before comparing.
245+
img_opencv = cv2.resize(img_opencv, (img_pyav.shape[1], img_pyav.shape[0]))
246+
mse = float(np.mean((img_pyav.astype(np.float32) - img_opencv.astype(np.float32)) ** 2))
247+
assert mse < MAX_MSE, (
248+
f"Thumbnail mismatch for {pyav_path.name} vs {opencv_path.name}: MSE={mse:.0f}"
249+
)

0 commit comments

Comments
 (0)