Skip to content

Commit 5617561

Browse files
yonigozlanvasqu
andauthored
Fix redundant logic in video processing SmolVLM (huggingface#45272)
fix redundant logic video processing smolvlm Co-authored-by: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com>
1 parent 0ea540e commit 5617561

1 file changed

Lines changed: 6 additions & 20 deletions

File tree

src/transformers/models/smolvlm/video_processing_smolvlm.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
IMAGENET_STANDARD_STD,
2323
PILImageResampling,
2424
SizeDict,
25-
pil_torch_interpolation_mapping,
2625
)
2726
from ...processing_utils import Unpack, VideosKwargs
2827
from ...utils import TensorType, is_torchvision_available, logging
@@ -148,21 +147,6 @@ def resize(
148147
Returns:
149148
`torch.Tensor`: The resized video.
150149
"""
151-
if resample is not None:
152-
if isinstance(resample, (PILImageResampling, int)):
153-
interpolation = pil_torch_interpolation_mapping[resample]
154-
else:
155-
interpolation = resample
156-
else:
157-
interpolation = tvF.InterpolationMode.BILINEAR
158-
if interpolation == tvF.InterpolationMode.LANCZOS:
159-
logger.warning_once(
160-
"You have used fast image processor with LANCZOS resample which not yet supported for torch.Tensor. "
161-
"BICUBIC resample will be used as an alternative. Please fall back to image processor if you "
162-
"want full consistency with the original model."
163-
)
164-
interpolation = tvF.InterpolationMode.BICUBIC
165-
166150
if size.longest_edge:
167151
# Resize the image so that the shortest edge or the longest edge is of the given size
168152
# while maintaining the aspect ratio of the original image.
@@ -175,12 +159,14 @@ def resize(
175159
else:
176160
raise ValueError(f"Size must contain 'height' and 'width' keys, or 'longest_edge' key. Got {size}.")
177161

178-
video = tvF.resize(video, new_size, interpolation=interpolation, antialias=antialias)
162+
video = super().resize(
163+
video, SizeDict(height=new_size[0], width=new_size[1]), resample=resample, antialias=antialias
164+
)
179165

180166
# Resize again to match image processor when `do_image_splitting=False`. Frames have to be squared to `max_image_size`
181-
# NOTE: videos are always processoed without image splitting
182-
max_size = self.max_image_size["longest_edge"], self.max_image_size["longest_edge"]
183-
video = tvF.resize(video, max_size, interpolation=interpolation, antialias=antialias)
167+
# NOTE: videos are always processed without image splitting
168+
max_size = SizeDict(height=self.max_image_size["longest_edge"], width=self.max_image_size["longest_edge"])
169+
video = super().resize(video, max_size, resample=resample, antialias=antialias)
184170
return video
185171

186172
def pad(

0 commit comments

Comments
 (0)