Skip to content

Commit edddcbe

Browse files
committed
[output] Replace frame_margin with temporal_margin property in save_images API
1 parent d75833f commit edddcbe

File tree

4 files changed

+311
-82
lines changed

4 files changed

+311
-82
lines changed

scenedetect.cfg

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,13 @@
227227
# Compression amount for png images (0 to 9). Only affects size, not quality.
228228
#compression = 3
229229

230-
# Number of frames to ignore around each scene cut when selecting frames.
230+
# [DEPRECATED] Number of frames to ignore around each scene cut when selecting frames.
231+
# TODO(v0.7): Remove this and add backwards compatibility helpers.
231232
#frame-margin = 1
232233

234+
# Amount of time to ignore at the beginning/end of a shot when selecting frames.
235+
#temporal-margin = 0.04s
236+
233237
# Resize by scale factor (0.5 = half, 1.0 = same, 2.0 = double).
234238
#scale = 1.0
235239

scenedetect/common.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,44 @@ def __sub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTi
656656
to_return -= other
657657
return to_return
658658

659+
def __mul__(self, factor: ty.Union[int, float]) -> "FrameTimecode":
660+
"""Multiply timecode by a scalar factor. Returns a new FrameTimecode."""
661+
if not isinstance(factor, (int, float)):
662+
return NotImplemented
663+
to_return = FrameTimecode(timecode=self)
664+
if isinstance(to_return._time, Timecode):
665+
to_return._time = Timecode(
666+
pts=max(0, round(to_return._time.pts * factor)),
667+
time_base=to_return._time.time_base,
668+
)
669+
elif isinstance(to_return._time, _Seconds):
670+
to_return._time = _Seconds(max(0.0, to_return._time.value * factor))
671+
else:
672+
to_return._time = _FrameNumber(max(0, round(to_return._time.value * factor)))
673+
return to_return
674+
675+
def __rmul__(self, factor: ty.Union[int, float]) -> "FrameTimecode":
676+
"""Multiply timecode by a scalar factor (reversed). Returns a new FrameTimecode."""
677+
return self.__mul__(factor)
678+
679+
def __truediv__(self, divisor: ty.Union[int, float]) -> "FrameTimecode":
680+
"""Divide timecode by a scalar divisor. Returns a new FrameTimecode."""
681+
if not isinstance(divisor, (int, float)):
682+
return NotImplemented
683+
if divisor == 0:
684+
raise ZeroDivisionError("Cannot divide FrameTimecode by zero")
685+
to_return = FrameTimecode(timecode=self)
686+
if isinstance(to_return._time, Timecode):
687+
to_return._time = Timecode(
688+
pts=max(0, round(to_return._time.pts / divisor)),
689+
time_base=to_return._time.time_base,
690+
)
691+
elif isinstance(to_return._time, _Seconds):
692+
to_return._time = _Seconds(max(0.0, to_return._time.value / divisor))
693+
else:
694+
to_return._time = _FrameNumber(max(0, round(to_return._time.value / divisor)))
695+
return to_return
696+
659697
# TODO(v1.0): __int__ and __float__ should be removed. Mark as deprecated, and indicate
660698
# need to use relevant property instead.
661699

scenedetect/output/image.py

Lines changed: 67 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def __init__(
7777
height: ty.Optional[int] = None,
7878
width: ty.Optional[int] = None,
7979
interpolation: Interpolation = Interpolation.CUBIC,
80+
temporal_margin: ty.Optional[FrameTimecode] = None,
8081
):
8182
"""Multi-threaded implementation of save-images functionality. Uses background threads to
8283
handle image encoding and saving images to disk to improve parallelism.
@@ -85,10 +86,10 @@ def __init__(
8586
8687
Arguments:
8788
num_images: Number of images to generate for each scene. Minimum is 1.
88-
frame_margin: Number of frames to pad each scene around the beginning
89+
frame_margin: [DEPRECATED] Number of frames to pad each scene around the beginning
8990
and end (e.g. moves the first/last image into the scene by N frames).
9091
Can set to 0, but will result in some video files failing to extract
91-
the very last frame.
92+
the very last frame. Use `temporal_margin` instead.
9293
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
9394
encoder_param: Quality/compression efficiency, based on type of image:
9495
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
@@ -109,9 +110,14 @@ def __init__(
109110
Specifying only width will rescale the image to that number of pixels wide
110111
while preserving the aspect ratio.
111112
interpolation: Type of interpolation to use when resizing images.
113+
temporal_margin: Amount of time to ignore at the beginning/end of a scene when
114+
selecting frames. Can be specified as frames (int), seconds (float), or timecode
115+
string when creating the FrameTimecode. Uses presentation time (PTS) for selection.
116+
When set, takes precedence over `frame_margin`.
112117
"""
113118
self._num_images = num_images
114119
self._frame_margin = frame_margin
120+
self._temporal_margin = temporal_margin
115121
self._image_extension = image_extension
116122
self._image_name_template = image_name_template
117123
self._scale = scale
@@ -290,48 +296,55 @@ def image_save_thread(self, save_queue: queue.Queue, progress_bar: tqdm):
290296
if progress_bar is not None:
291297
progress_bar.update(1)
292298

299+
def _generate_scene_timecodes(
300+
self, start: FrameTimecode, end: FrameTimecode
301+
) -> ty.Iterable[FrameTimecode]:
302+
"""Generate timecodes for images to extract from a single scene.
303+
304+
Uses temporal_margin to determine the effective time range, then distributes
305+
images evenly across that range using time-based arithmetic.
306+
"""
307+
# Use temporal_margin if set, otherwise fall back to frame_margin converted to time
308+
if self._temporal_margin is not None:
309+
margin = self._temporal_margin
310+
elif self._frame_margin > 0:
311+
margin = FrameTimecode(self._frame_margin, fps=start.framerate)
312+
else:
313+
margin = FrameTimecode(0, fps=start.framerate)
314+
315+
# Calculate effective time range with margin, clamped to scene bounds
316+
first_time = min(start + margin, end)
317+
last_time = max(end - margin, start)
318+
319+
# Handle edge case where margins overlap (scene shorter than 2x margin)
320+
if first_time > last_time:
321+
# Use middle of scene for all images
322+
middle = start + (end - start) / 2
323+
first_time = last_time = middle
324+
325+
if self._num_images == 1:
326+
# Single image: use middle of effective range
327+
yield first_time + (last_time - first_time) / 2
328+
elif self._num_images == 2:
329+
yield first_time
330+
yield last_time
331+
else:
332+
# Multiple images: first at margin, last at margin, rest evenly distributed
333+
duration = last_time - first_time
334+
for i in range(self._num_images):
335+
if i == 0:
336+
yield first_time
337+
elif i == self._num_images - 1:
338+
yield last_time
339+
else:
340+
# Evenly distribute middle images
341+
fraction = i / (self._num_images - 1)
342+
yield first_time + duration * fraction
343+
293344
def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]:
294345
"""Generates a list of timecodes for each scene in `scene_list` based on the current config
295346
parameters."""
296-
# TODO(v0.7): This needs to be fixed as part of PTS overhaul.
297-
framerate = scene_list[0][0].framerate
298-
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
299-
return [
300-
(
301-
FrameTimecode(int(f), fps=framerate)
302-
for f in (
303-
# middle frames
304-
a[len(a) // 2]
305-
if (0 < j < self._num_images - 1) or self._num_images == 1
306-
# first frame
307-
else min(a[0] + self._frame_margin, a[-1])
308-
if j == 0
309-
# last frame
310-
else max(a[-1] - self._frame_margin, a[0])
311-
# for each evenly-split array of frames in the scene list
312-
for j, a in enumerate(np.array_split(r, self._num_images))
313-
)
314-
)
315-
for r in (
316-
# pad ranges to number of images
317-
r
318-
if 1 + r[-1] - r[0] >= self._num_images
319-
else list(r) + [r[-1]] * (self._num_images - len(r))
320-
# create range of frames in scene
321-
for r in (
322-
range(
323-
start.frame_num,
324-
start.frame_num
325-
+ max(
326-
1, # guard against zero length scenes
327-
end.frame_num - start.frame_num,
328-
),
329-
)
330-
# for each scene in scene list
331-
for start, end in scene_list
332-
)
333-
)
334-
]
347+
return [self._generate_scene_timecodes(start, end) for start, end in scene_list]
335348

336349
def resize_image(
337350
self,
@@ -358,6 +371,7 @@ def save_images(
358371
width: ty.Optional[int] = None,
359372
interpolation: Interpolation = Interpolation.CUBIC,
360373
threading: bool = True,
374+
temporal_margin: ty.Optional[FrameTimecode] = None,
361375
) -> ty.Dict[int, ty.List[str]]:
362376
"""Save a set number of images from each scene, given a list of scenes
363377
and the associated video/frame source.
@@ -371,7 +385,7 @@ def save_images(
371385
frame_margin: Number of frames to pad each scene around the beginning
372386
and end (e.g. moves the first/last image into the scene by N frames).
373387
Can set to 0, but will result in some video files failing to extract
374-
the very last frame.
388+
the very last frame. Discarded if `temporal_margin` is set.
375389
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
376390
encoder_param: Quality/compression efficiency, based on type of image:
377391
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
@@ -396,6 +410,9 @@ def save_images(
396410
while preserving the aspect ratio.
397411
interpolation: Type of interpolation to use when resizing images.
398412
threading: Offload image encoding and disk IO to background threads to improve performance.
413+
temporal_margin: Amount of time to pad each scene around the beginning and end. Takes
414+
precedence over `frame_margin` when set. Can be created from seconds (float), frames
415+
(int), or timecode string.
399416
400417
Returns:
401418
Dictionary of the format { scene_num : [image_paths] }, where scene_num is the
@@ -432,6 +449,7 @@ def save_images(
432449
height,
433450
width,
434451
interpolation,
452+
temporal_margin,
435453
)
436454
return extractor.run(video, scene_list, output_dir, show_progress)
437455

@@ -451,45 +469,13 @@ def save_images(
451469
image_num_format = "%0"
452470
image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d"
453471

454-
framerate = scene_list[0][0]._rate
455-
456-
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
457-
timecode_list = [
458-
[
459-
FrameTimecode(int(f), fps=framerate)
460-
for f in (
461-
# middle frames
462-
a[len(a) // 2]
463-
if (0 < j < num_images - 1) or num_images == 1
464-
# first frame
465-
else min(a[0] + frame_margin, a[-1])
466-
if j == 0
467-
# last frame
468-
else max(a[-1] - frame_margin, a[0])
469-
# for each evenly-split array of frames in the scene list
470-
for j, a in enumerate(np.array_split(r, num_images))
471-
)
472-
]
473-
for i, r in enumerate(
474-
[
475-
# pad ranges to number of images
476-
r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r))
477-
# create range of frames in scene
478-
for r in (
479-
range(
480-
start.frame_num,
481-
start.frame_num
482-
+ max(
483-
1, # guard against zero length scenes
484-
end.frame_num - start.frame_num,
485-
),
486-
)
487-
# for each scene in scene list
488-
for start, end in scene_list
489-
)
490-
]
491-
)
492-
]
472+
# Use _ImageExtractor to generate timecodes (shares logic with threaded path)
473+
extractor = _ImageExtractor(
474+
num_images=num_images,
475+
frame_margin=frame_margin,
476+
temporal_margin=temporal_margin,
477+
)
478+
timecode_list = [list(tc) for tc in extractor.generate_timecode_list(scene_list)]
493479

494480
image_filenames = {i: [] for i in range(len(timecode_list))}
495481
aspect_ratio = video.aspect_ratio

0 commit comments

Comments
 (0)