Breakthrough · Breakthrough · Apr 1, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/docs/cli/backends.rst b/docs/cli/backends.rst
@@ -21,13 +21,17 @@ It is mostly reliable and fast, although can occasionally run into issues proces
 
 The OpenCV backend also supports image sequences as inputs (e.g. ``frame%02d.jpg`` if you want to load frame001.jpg, frame002.jpg, frame003.jpg...). Make sure to specify the framerate manually (``-f``/``--framerate``) to ensure accurate timing calculations.
 
+Variable framerate (VFR) video is supported. Scene detection uses PTS-derived timestamps from ``CAP_PROP_POS_MSEC`` for accurate timecodes. Seeking compensates for OpenCV's average-fps-based internal seek approximation, so output timecodes remain accurate across the full video.
+
 
 =======================================================================
 PyAV
 =======================================================================
 
 The `PyAV <https://github.com/PyAV-Org/PyAV>`_ backend (`av package <https://pypi.org/project/av/>`_) is a more robust backend that handles multiple audio tracks and frame decode errors gracefully.
 
+Variable framerate (VFR) video is fully supported. PyAV uses native PTS timestamps directly from the container, giving the most accurate timecodes for VFR content.
+
 This backend can be used by specifying ``-b pyav`` via command line, or setting ``backend = pyav`` under the ``[global]`` section of your :ref:`config file <scenedetect_cli-config_file>`.
 
 
@@ -41,4 +45,6 @@ MoviePy launches ffmpeg as a subprocess, and can be used with various types of i
 
     The MoviePy backend is still under development and is not included with current Windows distribution. To enable MoviePy support, you must install PySceneDetect using `python` and `pip`.
 
+    Variable framerate (VFR) video is **not supported**. MoviePy assumes a fixed framerate, so timecodes for VFR content will be inaccurate. Use the PyAV or OpenCV backend instead.
+
 This backend can be used by specifying ``-b moviepy`` via command line, or setting ``backend = moviepy`` under the ``[global]`` section of your :ref:`config file <scenedetect_cli-config_file>`.
diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py
@@ -401,17 +401,19 @@ def _save_xml_fcp(
     sequence = ElementTree.SubElement(project, "sequence")
     ElementTree.SubElement(sequence, "name").text = context.video_stream.name
 
+    fps = float(context.video_stream.frame_rate)
+    ntsc = "True" if context.video_stream.frame_rate.denominator != 1 else "False"
     duration = scenes[-1][1] - scenes[0][0]
-    ElementTree.SubElement(sequence, "duration").text = f"{duration.frame_num}"
+    ElementTree.SubElement(sequence, "duration").text = str(round(duration.seconds * fps))
 
     rate = ElementTree.SubElement(sequence, "rate")
-    ElementTree.SubElement(rate, "timebase").text = str(context.video_stream.frame_rate)
-    ElementTree.SubElement(rate, "ntsc").text = "False"
+    ElementTree.SubElement(rate, "timebase").text = str(round(fps))
+    ElementTree.SubElement(rate, "ntsc").text = ntsc
 
     timecode = ElementTree.SubElement(sequence, "timecode")
     tc_rate = ElementTree.SubElement(timecode, "rate")
-    ElementTree.SubElement(tc_rate, "timebase").text = str(context.video_stream.frame_rate)
-    ElementTree.SubElement(tc_rate, "ntsc").text = "False"
+    ElementTree.SubElement(tc_rate, "timebase").text = str(round(fps))
+    ElementTree.SubElement(tc_rate, "ntsc").text = ntsc
     ElementTree.SubElement(timecode, "frame").text = "0"
     ElementTree.SubElement(timecode, "displayformat").text = "NDF"
 
@@ -427,13 +429,13 @@ def _save_xml_fcp(
         ElementTree.SubElement(clip, "name").text = f"Shot {i + 1}"
         ElementTree.SubElement(clip, "enabled").text = "TRUE"
         ElementTree.SubElement(clip, "rate").append(
-            ElementTree.fromstring(f"<timebase>{context.video_stream.frame_rate}</timebase>")
+            ElementTree.fromstring(f"<timebase>{round(fps)}</timebase>")
         )
-        # TODO: Are these supposed to be frame numbers or another format?
-        ElementTree.SubElement(clip, "start").text = str(start.frame_num)
-        ElementTree.SubElement(clip, "end").text = str(end.frame_num)
-        ElementTree.SubElement(clip, "in").text = str(start.frame_num)
-        ElementTree.SubElement(clip, "out").text = str(end.frame_num)
+        # Frame numbers relative to the declared <timebase> fps, computed from PTS seconds.
+        ElementTree.SubElement(clip, "start").text = str(round(start.seconds * fps))
+        ElementTree.SubElement(clip, "end").text = str(round(end.seconds * fps))
+        ElementTree.SubElement(clip, "in").text = str(round(start.seconds * fps))
+        ElementTree.SubElement(clip, "out").text = str(round(end.seconds * fps))
 
         file_ref = ElementTree.SubElement(clip, "file", id=f"file{i + 1}")
         ElementTree.SubElement(file_ref, "name").text = context.video_stream.name
@@ -485,6 +487,9 @@ def save_xml(
         logger.error(f"Unknown format: {format}")
 
 
+# TODO: We have to export framerate as a float for OTIO's current format. When OTIO supports
+# fractional timecodes, we should export the framerate as a rational number instead.
+# https://github.com/AcademySoftwareFoundation/OpenTimelineIO/issues/190
 def save_otio(
     context: CliContext,
     scenes: SceneList,
@@ -501,7 +506,7 @@ def save_otio(
     video_name = context.video_stream.name
     video_path = os.path.abspath(context.video_stream.path)
     video_base_name = os.path.basename(context.video_stream.path)
-    frame_rate = context.video_stream.frame_rate
+    frame_rate = float(context.video_stream.frame_rate)
 
     # List of track mapping to resource type.
     # TODO(https://scenedetect.com/issues/497): Allow OTIO export without an audio track.
@@ -534,12 +539,12 @@ def save_otio(
                                 "duration": {
                                     "OTIO_SCHEMA": "RationalTime.1",
                                     "rate": frame_rate,
-                                    "value": float((end - start).frame_num),
+                                    "value": round((end - start).seconds * frame_rate, 6),
                                 },
                                 "start_time": {
                                     "OTIO_SCHEMA": "RationalTime.1",
                                     "rate": frame_rate,
-                                    "value": float(start.frame_num),
+                                    "value": round(start.seconds * frame_rate, 6),
                                 },
                             },
                             "enabled": True,

diff --git a/scenedetect/backends/moviepy.py b/scenedetect/backends/moviepy.py
@@ -17,14 +17,15 @@
 """
 
 import typing as ty
+from fractions import Fraction
 from logging import getLogger
 
 import cv2
 import numpy as np
 from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader
 
 from scenedetect.backends.opencv import VideoStreamCv2
-from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, FrameTimecode
+from scenedetect.common import FrameTimecode, Timecode, framerate_to_fraction
 from scenedetect.platform import get_file_name
 from scenedetect.video_stream import SeekError, VideoOpenFailure, VideoStream
 
@@ -83,9 +84,9 @@ def __init__(
     """Unique name used to identify this backend."""
 
     @property
-    def frame_rate(self) -> float:
-        """Framerate in frames/sec."""
-        return self._reader.fps
+    def frame_rate(self) -> Fraction:
+        """Framerate in frames/sec as a rational Fraction."""
+        return framerate_to_fraction(self._reader.fps)
 
     @property
     def path(self) -> ty.Union[bytes, str]:
@@ -135,7 +136,14 @@ def position(self) -> FrameTimecode:
         calling `read`. This will always return 0 (e.g. be equal to `base_timecode`) if no frames
         have been `read` yet."""
         frame_number = max(self._frame_number - 1, 0)
-        return FrameTimecode(frame_number, self.frame_rate)
+        # Synthesize a Timecode from the frame count and rational framerate.
+        # MoviePy assumes CFR, so this is equivalent to frame-based timing.
+        # Use the framerate denominator as the time_base denominator for exact timing.
+        fps = self.frame_rate
+        time_base = Fraction(1, fps.numerator)
+        pts = frame_number * fps.denominator
+        timecode = Timecode(pts=pts, time_base=time_base)
+        return FrameTimecode(timecode=timecode, fps=fps)
 
     @property
     def position_ms(self) -> float:
@@ -173,10 +181,6 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
             ValueError: `target` is not a valid value (i.e. it is negative).
         """
         success = False
-        if _USE_PTS_IN_DEVELOPMENT:
-            # TODO(https://scenedetect.com/issue/168): Need to handle PTS here.
-            raise NotImplementedError()
-
         if not isinstance(target, FrameTimecode):
             target = FrameTimecode(target, self.frame_rate)
         try:

diff --git a/scenedetect/backends/opencv.py b/scenedetect/backends/opencv.py
@@ -27,7 +27,7 @@
 import cv2
 import numpy as np
 
-from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, MAX_FPS_DELTA, FrameTimecode, Timecode
+from scenedetect.common import MAX_FPS_DELTA, FrameTimecode, Timecode, framerate_to_fraction
 from scenedetect.platform import get_file_name
 from scenedetect.video_stream import (
     FrameRateUnavailable,
@@ -111,7 +111,7 @@ def __init__(
         self._cap: ty.Optional[cv2.VideoCapture] = (
             None  # Reference to underlying cv2.VideoCapture object.
         )
-        self._frame_rate: ty.Optional[float] = None
+        self._frame_rate: ty.Optional[Fraction] = None
 
         # VideoCapture state
         self._has_grabbed = False
@@ -144,7 +144,7 @@ def capture(self) -> cv2.VideoCapture:
     """Unique name used to identify this backend."""
 
     @property
-    def frame_rate(self) -> float:
+    def frame_rate(self) -> Fraction:
         assert self._frame_rate
         return self._frame_rate
 
@@ -196,30 +196,25 @@ def aspect_ratio(self) -> float:
 
     @property
     def timecode(self) -> Timecode:
-        """Current position within stream as a Timecode. This is not frame accurate."""
+        """Current position within stream as a Timecode."""
         # *NOTE*: Although OpenCV has `CAP_PROP_PTS`, it doesn't seem to be reliable. For now, we
-        # use `CAP_PROP_POS_MSEC` instead, with a time base of 1/1000. Unfortunately this means that
-        # rounding errors will affect frame accuracy with this backend.
-        pts = self._cap.get(cv2.CAP_PROP_POS_MSEC)
-        time_base = Fraction(1, 1000)
-        return Timecode(pts=round(pts), time_base=time_base)
+        # use `CAP_PROP_POS_MSEC` instead, converting to microseconds for sufficient precision to
+        # avoid frame-boundary rounding errors at common framerates like 24000/1001.
+        ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
+        time_base = Fraction(1, 1000000)
+        return Timecode(pts=round(ms * 1000), time_base=time_base)
 
     @property
     def position(self) -> FrameTimecode:
-        # TODO(https://scenedetect.com/issue/168): See if there is a better way to do this, or
-        # add a config option before landing this.
-        if _USE_PTS_IN_DEVELOPMENT:
-            timecode = self.timecode
-            # If PTS is 0 but we've read frames, derive from frame number.
-            # This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
-            if timecode.pts == 0 and self.frame_number > 0:
-                time_sec = (self.frame_number - 1) / self.frame_rate
-                pts = round(time_sec * 1000)
-                timecode = Timecode(pts=pts, time_base=Fraction(1, 1000))
-            return FrameTimecode(timecode=timecode, fps=self.frame_rate)
-        if self.frame_number < 1:
-            return self.base_timecode
-        return self.base_timecode + (self.frame_number - 1)
+        timecode = self.timecode
+        # If PTS is 0 but we've read frames, derive from frame number.
+        # This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
+        if timecode.pts == 0 and self.frame_number > 0:
+            fps = self.frame_rate
+            time_base = Fraction(1, fps.numerator)
+            pts = (self.frame_number - 1) * fps.denominator
+            timecode = Timecode(pts=pts, time_base=time_base)
+        return FrameTimecode(timecode=timecode, fps=self.frame_rate)
 
     @property
     def position_ms(self) -> float:
@@ -235,23 +230,32 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
         if target < 0:
             raise ValueError("Target seek position cannot be negative!")
 
-        # TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
-        # Have to seek one behind and call grab() after to that the VideoCapture
-        # returns a valid timestamp when using CAP_PROP_POS_MSEC.
-        target_frame_cv2 = (self.base_timecode + target).frame_num
-        if target_frame_cv2 > 0:
-            target_frame_cv2 -= 1
-        self._cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_cv2)
+        target_secs = (self.base_timecode + target).seconds
         self._has_grabbed = False
-        # Preemptively grab the frame behind the target position if possible.
-        if target > 0:
+        if target_secs > 0:
+            # Seek one frame before target so the next read() returns the frame at target.
+            one_frame_ms = 1000.0 / float(self._frame_rate)
+            seek_ms = max(0.0, target_secs * 1000.0 - one_frame_ms)
+            self._cap.set(cv2.CAP_PROP_POS_MSEC, seek_ms)
             self._has_grabbed = self._cap.grab()
-            # If we seeked past the end of the video, need to seek one frame backwards
-            # from the current position and grab that frame instead.
+            if self._has_grabbed:
+                # VFR correction: set(CAP_PROP_POS_MSEC) converts time using avg_fps internally,
+                # which can land ~1s too early for VFR video. Read forward until we reach the
+                # intended position. The threshold (2x one_frame_ms) never triggers for CFR.
+                actual_ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
+                corrections = 0
+                while actual_ms < seek_ms - 2.0 * one_frame_ms and corrections < 100:
+                    if not self._cap.grab():
+                        break
+                    actual_ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
+                    corrections += 1
+            # If we seeked past the end, back up one frame.
             if not self._has_grabbed:
                 seek_pos = round(self._cap.get(cv2.CAP_PROP_POS_FRAMES) - 1.0)
                 self._cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, seek_pos))
                 self._has_grabbed = self._cap.grab()
+        else:
+            self._cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
 
     def reset(self):
         """Close and re-open the VideoStream (should be equivalent to calling `seek(0)`)."""
@@ -329,14 +333,11 @@ def _open_capture(self, framerate: ty.Optional[float] = None):
                 raise FrameRateUnavailable()
 
         self._cap = cap
-        self._frame_rate = framerate
+        self._frame_rate = framerate_to_fraction(framerate)
         self._has_grabbed = False
         cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 1.0)  # https://github.com/opencv/opencv/issues/26795
 
 
-# TODO(https://scenedetect.com/issues/168): Support non-monotonic timing for `position`. VFR timecode
-# support is a prerequisite for this. Timecodes are currently calculated by multiplying the
-# framerate by number of frames. Actual elapsed time can be obtained via `position_ms` for now.
 class VideoCaptureAdapter(VideoStream):
     """Adapter for existing VideoCapture objects. Unlike VideoStreamCv2, this class supports
     VideoCaptures which may not support seeking.
@@ -378,7 +379,7 @@ def __init__(
                 raise FrameRateUnavailable()
 
         self._cap = cap
-        self._frame_rate: float = framerate
+        self._frame_rate: Fraction = framerate_to_fraction(framerate)
         self._num_frames = 0
         self._max_read_attempts = max_read_attempts
         self._decode_failures = 0
@@ -408,7 +409,7 @@ def capture(self) -> cv2.VideoCapture:
     """Unique name used to identify this backend."""
 
     @property
-    def frame_rate(self) -> float:
+    def frame_rate(self) -> Fraction:
         """Framerate in frames/sec."""
         assert self._frame_rate
         return self._frame_rate
@@ -439,8 +440,6 @@ def frame_size(self) -> ty.Tuple[int, int]:
     @property
     def duration(self) -> ty.Optional[FrameTimecode]:
         """Duration of the stream as a FrameTimecode, or None if non terminating."""
-        # TODO(https://scenedetect.com/issue/168): This will be incorrect for VFR. See if there is
-        # another property we can use to estimate the video length correctly.
         frame_count = math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
         if frame_count > 0:
             return self.base_timecode + frame_count
@@ -455,7 +454,12 @@ def aspect_ratio(self) -> float:
     def position(self) -> FrameTimecode:
         if self.frame_number < 1:
             return self.base_timecode
-        return self.base_timecode + (self.frame_number - 1)
+        # Synthesize a Timecode from frame count and rational framerate.
+        fps = self.frame_rate
+        time_base = Fraction(1, fps.numerator)
+        pts = (self.frame_number - 1) * fps.denominator
+        timecode = Timecode(pts=pts, time_base=time_base)
+        return FrameTimecode(timecode=timecode, fps=fps)
 
     @property
     def position_ms(self) -> float: