Skip to content

Commit fb7590f

Browse files
committed
[api] Convert frame-based defaults to temporal values (#531)
Replace frame-based defaults with temporal (time-based) values to properly support VFR video. Rename save-images --frame-margin to --margin (default 0.1s), and change min_scene_len default from 15 frames to "0.6s" across all detectors. FlashFilter now accepts temporal values directly. Deprecated options still work with warnings.
1 parent f0f7edb commit fb7590f

File tree

18 files changed

+362
-85
lines changed

18 files changed

+362
-85
lines changed

docs/cli.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -658,11 +658,11 @@ Options
658658

659659
Default: ``3``
660660

661-
.. option:: -m N, --frame-margin N
661+
.. option:: -m TIMECODE, --margin TIMECODE
662662

663-
Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.
663+
Margin from scene boundary for first/last image. Accepts time (``0.1s``), frames (``3``), or timecode (``00:00:00.100``).
664664

665-
Default: ``3``
665+
Default: ``0.1s``
666666

667667
.. option:: -s S, --scale S
668668

scenedetect.cfg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,9 @@
227227
# Compression amount for png images (0 to 9). Only affects size, not quality.
228228
#compression = 3
229229

230-
# Number of frames to ignore around each scene cut when selecting frames.
231-
#frame-margin = 1
230+
# Margin from scene boundary for first/last image. Accepts time (0.1s),
231+
# frames (3), or timecode (00:00:00.100).
232+
#margin = 0.1s
232233

233234
# Resize by scale factor (0.5 = half, 1.0 = same, 2.0 = double).
234235
#scale = 1.0

scenedetect/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
VideoMetadata,
5454
SceneMetadata,
5555
)
56-
from scenedetect.detector import SceneDetector
56+
from scenedetect.detector import DEFAULT_MIN_SCENE_LEN, SceneDetector
5757
from scenedetect.detectors import (
5858
ContentDetector,
5959
AdaptiveDetector,

scenedetect/_cli/__init__.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,12 +1396,18 @@ def split_video_command(
13961396
)
13971397
@click.option(
13981398
"-m",
1399+
"--margin",
1400+
metavar="DURATION",
1401+
default=None,
1402+
type=click.STRING,
1403+
help="Margin from scene boundary for first/last image. Accepts duration (0.1s), frame count (3), or HH:MM:SS.mmm format.%s"
1404+
% (USER_CONFIG.get_help_string("save-images", "margin")),
1405+
)
1406+
@click.option(
13991407
"--frame-margin",
1400-
metavar="N",
14011408
default=None,
1402-
type=click.INT,
1403-
help="Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.%s"
1404-
% (USER_CONFIG.get_help_string("save-images", "num-images")),
1409+
type=click.STRING,
1410+
hidden=True,
14051411
)
14061412
@click.option(
14071413
"--scale",
@@ -1441,7 +1447,8 @@ def save_images_command(
14411447
quality: ty.Optional[int] = None,
14421448
png: bool = False,
14431449
compression: ty.Optional[int] = None,
1444-
frame_margin: ty.Optional[int] = None,
1450+
margin: ty.Optional[str] = None,
1451+
frame_margin: ty.Optional[str] = None,
14451452
scale: ty.Optional[float] = None,
14461453
height: ty.Optional[int] = None,
14471454
width: ty.Optional[int] = None,
@@ -1487,9 +1494,13 @@ def save_images_command(
14871494
raise click.BadParameter("\n".join(error_strs), param_hint="save-images")
14881495
output = ctx.config.get_value("save-images", "output", output)
14891496

1497+
if frame_margin is not None and margin is None:
1498+
logger.warning("--frame-margin is deprecated, use --margin instead.")
1499+
margin = frame_margin
1500+
14901501
save_images_args = {
14911502
"encoder_param": compression if png else quality,
1492-
"frame_margin": ctx.config.get_value("save-images", "frame-margin", frame_margin),
1503+
"margin": ctx.config.get_value("save-images", "margin", margin),
14931504
"height": height,
14941505
"image_extension": image_extension,
14951506
"filename": ctx.config.get_value("save-images", "filename", filename),

scenedetect/_cli/commands.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def save_images(
180180
scenes: SceneList,
181181
cuts: CutList,
182182
num_images: int,
183-
frame_margin: int,
183+
margin: ty.Union[int, float, str],
184184
image_extension: str,
185185
encoder_param: int,
186186
filename: str,
@@ -199,7 +199,7 @@ def save_images(
199199
scene_list=scenes,
200200
video=context.video_stream,
201201
num_images=num_images,
202-
frame_margin=frame_margin,
202+
margin=margin,
203203
image_extension=image_extension,
204204
encoder_param=encoder_param,
205205
image_name_template=filename,

scenedetect/_cli/config.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ class XmlFormat(Enum):
412412
"compression": RangeValue(3, min_val=0, max_val=9),
413413
"filename": "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER",
414414
"format": "jpeg",
415-
"frame-margin": 1,
415+
"margin": TimecodeValue("0.1s"),
416416
"height": 0,
417417
"num-images": 3,
418418
"output": None,
@@ -504,6 +504,12 @@ class XmlFormat(Enum):
504504
DEPRECATED_COMMANDS: ty.Dict[str, str] = {"export-html": "save-html"}
505505
"""Deprecated config file sections that have a 1:1 mapping to a new replacement."""
506506

507+
DEPRECATED_OPTIONS: ty.Dict[ty.Tuple[str, str], str] = {
508+
("save-images", "frame-margin"): "margin",
509+
}
510+
"""Deprecated config file options that have a 1:1 mapping to a new replacement.
511+
Keys are (section, old_option) tuples, values are the new option name."""
512+
507513

508514
def _validate_structure(parser: ConfigParser) -> ty.Tuple[bool, ty.List[LogMessage]]:
509515
"""Validates the layout of the section/option mapping. Returns a bool indicating if validation
@@ -538,7 +544,16 @@ def _validate_structure(parser: ConfigParser) -> ty.Tuple[bool, ty.List[LogMessa
538544
logs.append((logging.ERROR, f"Unsupported config section: [{section_name}]"))
539545
continue
540546
for option_name, _ in parser.items(section_name):
541-
if option_name not in CONFIG_MAP[section].keys():
547+
if (section, option_name) in DEPRECATED_OPTIONS:
548+
new_option = DEPRECATED_OPTIONS[(section, option_name)]
549+
logs.append(
550+
(
551+
logging.WARNING,
552+
f"[{section_name}] option `{option_name}` is deprecated,"
553+
f" use `{new_option}` instead.",
554+
)
555+
)
556+
elif option_name not in CONFIG_MAP[section].keys():
542557
success = False
543558
logs.append(
544559
(
@@ -564,6 +579,13 @@ def _parse_config(parser: ConfigParser) -> ty.Tuple[ty.Optional[ConfigDict], ty.
564579
replacement = DEPRECATED_COMMANDS[deprecated_command]
565580
parser[replacement] = parser[deprecated_command]
566581
del parser[deprecated_command]
582+
# Re-map deprecated options to their replacements. Only remap when the new option is not
583+
# already explicitly set (the explicit value should take precedence).
584+
for (section, old_option), new_option in DEPRECATED_OPTIONS.items():
585+
if section in parser and old_option in parser[section]:
586+
if new_option not in parser[section]:
587+
parser[section][new_option] = parser[section][old_option]
588+
parser.remove_option(section, old_option)
567589
for command in CONFIG_MAP:
568590
config[command] = {}
569591
for option in CONFIG_MAP[command]:

scenedetect/_cli/context.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def get_detect_content_params(
314314
self,
315315
threshold: ty.Optional[float] = None,
316316
luma_only: bool = None,
317-
min_scene_len: ty.Optional[str] = None,
317+
min_scene_len: ty.Optional[ty.Union[int, float, str]] = None,
318318
weights: ty.Optional[ty.Tuple[float, float, float, float]] = None,
319319
kernel_size: ty.Optional[int] = None,
320320
filter_mode: ty.Optional[str] = None,
@@ -325,10 +325,9 @@ def get_detect_content_params(
325325
else:
326326
if min_scene_len is None:
327327
if self.config.is_default("detect-content", "min-scene-len"):
328-
min_scene_len = self.min_scene_len.frame_num
328+
min_scene_len = self.min_scene_len.seconds
329329
else:
330330
min_scene_len = self.config.get_value("detect-content", "min-scene-len")
331-
min_scene_len = self.parse_timecode(min_scene_len).frame_num
332331

333332
if weights is not None:
334333
try:
@@ -354,7 +353,7 @@ def get_detect_adaptive_params(
354353
min_content_val: ty.Optional[float] = None,
355354
frame_window: ty.Optional[int] = None,
356355
luma_only: bool = None,
357-
min_scene_len: ty.Optional[str] = None,
356+
min_scene_len: ty.Optional[ty.Union[int, float, str]] = None,
358357
weights: ty.Optional[ty.Tuple[float, float, float, float]] = None,
359358
kernel_size: ty.Optional[int] = None,
360359
) -> ty.Dict[str, ty.Any]:
@@ -365,10 +364,9 @@ def get_detect_adaptive_params(
365364
else:
366365
if min_scene_len is None:
367366
if self.config.is_default("detect-adaptive", "min-scene-len"):
368-
min_scene_len = self.min_scene_len.frame_num
367+
min_scene_len = self.min_scene_len.seconds
369368
else:
370369
min_scene_len = self.config.get_value("detect-adaptive", "min-scene-len")
371-
min_scene_len = self.parse_timecode(min_scene_len).frame_num
372370

373371
if weights is not None:
374372
try:
@@ -395,7 +393,7 @@ def get_detect_threshold_params(
395393
threshold: ty.Optional[float] = None,
396394
fade_bias: ty.Optional[float] = None,
397395
add_last_scene: bool = None,
398-
min_scene_len: ty.Optional[str] = None,
396+
min_scene_len: ty.Optional[ty.Union[int, float, str]] = None,
399397
) -> ty.Dict[str, ty.Any]:
400398
"""Handle detect-threshold command options and return args to construct one with."""
401399

@@ -404,10 +402,9 @@ def get_detect_threshold_params(
404402
else:
405403
if min_scene_len is None:
406404
if self.config.is_default("detect-threshold", "min-scene-len"):
407-
min_scene_len = self.min_scene_len.frame_num
405+
min_scene_len = self.min_scene_len.seconds
408406
else:
409407
min_scene_len = self.config.get_value("detect-threshold", "min-scene-len")
410-
min_scene_len = self.parse_timecode(min_scene_len).frame_num
411408
# TODO(v1.0): add_last_scene cannot be disabled right now.
412409
return {
413410
"add_final_scene": add_last_scene
@@ -421,7 +418,7 @@ def get_detect_hist_params(
421418
self,
422419
threshold: ty.Optional[float] = None,
423420
bins: ty.Optional[int] = None,
424-
min_scene_len: ty.Optional[str] = None,
421+
min_scene_len: ty.Optional[ty.Union[int, float, str]] = None,
425422
) -> ty.Dict[str, ty.Any]:
426423
"""Handle detect-hist command options and return args to construct one with."""
427424

@@ -430,10 +427,9 @@ def get_detect_hist_params(
430427
else:
431428
if min_scene_len is None:
432429
if self.config.is_default("detect-hist", "min-scene-len"):
433-
min_scene_len = self.min_scene_len.frame_num
430+
min_scene_len = self.min_scene_len.seconds
434431
else:
435432
min_scene_len = self.config.get_value("detect-hist", "min-scene-len")
436-
min_scene_len = self.parse_timecode(min_scene_len).frame_num
437433
return {
438434
"bins": self.config.get_value("detect-hist", "bins", bins),
439435
"min_scene_len": min_scene_len,
@@ -445,7 +441,7 @@ def get_detect_hash_params(
445441
threshold: ty.Optional[float] = None,
446442
size: ty.Optional[int] = None,
447443
lowpass: ty.Optional[int] = None,
448-
min_scene_len: ty.Optional[str] = None,
444+
min_scene_len: ty.Optional[ty.Union[int, float, str]] = None,
449445
) -> ty.Dict[str, ty.Any]:
450446
"""Handle detect-hash command options and return args to construct one with."""
451447

@@ -454,10 +450,9 @@ def get_detect_hash_params(
454450
else:
455451
if min_scene_len is None:
456452
if self.config.is_default("detect-hash", "min-scene-len"):
457-
min_scene_len = self.min_scene_len.frame_num
453+
min_scene_len = self.min_scene_len.seconds
458454
else:
459455
min_scene_len = self.config.get_value("detect-hash", "min-scene-len")
460-
min_scene_len = self.parse_timecode(min_scene_len).frame_num
461456
return {
462457
"lowpass": self.config.get_value("detect-hash", "lowpass", lowpass),
463458
"min_scene_len": min_scene_len,

scenedetect/detector.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
event (in, out, cut, etc...).
2525
"""
2626

27+
import math
2728
import typing as ty
2829
from abc import ABC, abstractmethod
2930
from enum import Enum
@@ -33,6 +34,9 @@
3334
from scenedetect.common import FrameTimecode
3435
from scenedetect.stats_manager import StatsManager
3536

37+
DEFAULT_MIN_SCENE_LEN = "0.6s"
38+
"""Default minimum scene length for all detectors."""
39+
3640

3741
class SceneDetector(ABC):
3842
"""Base class to inherit from when implementing a scene detection algorithm.
@@ -114,26 +118,50 @@ class Mode(Enum):
114118
SUPPRESS = 1
115119
"""Suppress consecutive cuts until the filter length has passed."""
116120

117-
def __init__(self, mode: Mode, length: int):
121+
def __init__(self, mode: Mode, length: ty.Union[int, float, str]):
118122
"""
119123
Arguments:
120124
mode: The mode to use when enforcing `length`.
121-
length: Number of frames to use when filtering cuts.
125+
length: Minimum scene length. Can be an int (number of frames), float (seconds),
126+
or str (e.g. ``"0.6s"``, ``"00:00:00.600"``).
122127
"""
123128
self._mode = mode
124-
self._filter_length = length # Number of frames to use for activating the filter.
125-
self._filter_secs: ty.Optional[float] = None # Threshold in seconds, computed on first use.
129+
self._filter_length = length
130+
# Threshold in seconds. Set immediately for temporal values, or computed on first use
131+
# from the video framerate for frame-based (int) values.
132+
self._filter_secs: ty.Optional[float] = None
133+
if isinstance(length, float):
134+
self._filter_secs = length
135+
elif isinstance(length, str) and not length.strip().isdigit():
136+
# Temporal string like "0.6s" or "00:00:00.600" - parse to seconds immediately.
137+
self._filter_secs = FrameTimecode(timecode=length, fps=100.0).seconds
138+
elif isinstance(length, str):
139+
# Digit-only string - treat as frame count, defer until we know the framerate.
140+
self._filter_length = int(length)
126141
self._last_above = None # Last frame above threshold.
127142
self._merge_enabled = False # Used to disable merging until at least one cut was found.
128143
self._merge_triggered = False # True when the merge filter is active.
129144
self._merge_start = None # Frame number where we started the merge filter.
130145

131146
@property
132147
def max_behind(self) -> int:
133-
return 0 if self._mode == FlashFilter.Mode.SUPPRESS else self._filter_length
148+
if self._mode == FlashFilter.Mode.SUPPRESS:
149+
return 0
150+
if isinstance(self._filter_length, int):
151+
return self._filter_length
152+
# For temporal values, estimate using a conservative high framerate to ensure the event
153+
# buffer is large enough. ceil(seconds * 240fps) covers up to 240fps video.
154+
return math.ceil(self._filter_secs * 240.0) if self._filter_secs else 0
155+
156+
@property
157+
def _is_disabled(self) -> bool:
158+
"""Filter is disabled when length is zero."""
159+
if self._filter_secs is not None:
160+
return self._filter_secs <= 0.0
161+
return self._filter_length <= 0
134162

135163
def filter(self, timecode: FrameTimecode, above_threshold: bool) -> ty.List[FrameTimecode]:
136-
if not self._filter_length > 0:
164+
if self._is_disabled:
137165
return [timecode] if above_threshold else []
138166
if self._last_above is None:
139167
self._last_above = timecode

scenedetect/detectors/adaptive_detector.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import numpy as np
2323

2424
from scenedetect.common import FrameTimecode
25+
from scenedetect.detector import DEFAULT_MIN_SCENE_LEN
2526
from scenedetect.detectors import ContentDetector
2627

2728
logger = getLogger("pyscenedetect")
@@ -38,7 +39,7 @@ class AdaptiveDetector(ContentDetector):
3839
def __init__(
3940
self,
4041
adaptive_threshold: float = 3.0,
41-
min_scene_len: int = 15,
42+
min_scene_len: ty.Union[int, float, str] = DEFAULT_MIN_SCENE_LEN,
4243
window_width: int = 2,
4344
min_content_val: float = 15.0,
4445
weights: ContentDetector.Components = ContentDetector.DEFAULT_COMPONENT_WEIGHTS,
@@ -49,8 +50,9 @@ def __init__(
4950
Arguments:
5051
adaptive_threshold: Threshold (float) that score ratio must exceed to trigger a
5152
new scene (see frame metric adaptive_ratio in stats file).
52-
min_scene_len: Once a cut is detected, this many frames must pass before a new one can
53-
be added to the scene list. Can be an int or FrameTimecode type.
53+
min_scene_len: Once a cut is detected, this much time must pass before a new one can
54+
be added to the scene list. Can be an int (frames), float (seconds), or str
55+
(e.g. ``"0.6s"``).
5456
window_width: Size of window (number of frames) before and after each frame to
5557
average together in order to detect deviations from the mean. Must be at least 1.
5658
min_content_val: Minimum threshold (float) that the content_val must exceed in order to

0 commit comments

Comments
 (0)