Skip to content

Commit 2ccc141

Browse files
Copilotanxiangsir
andcommitted
Fix preprocessing order to match CLIP-style (resize then crop)
Co-authored-by: anxiangsir <31175974+anxiangsir@users.noreply.github.com>
1 parent 9cadde7 commit 2ccc141

1 file changed

Lines changed: 20 additions & 11 deletions

File tree

tests/test_consistency.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@
2121
def manual_center_crop_preprocess(image: Image.Image, size: int = DEFAULT_IMAGE_SIZE,
2222
mean: list = None, std: list = None) -> torch.Tensor:
2323
"""
24-
Manual center-crop preprocessing for images.
24+
Manual center-crop preprocessing for images (CLIP-style).
25+
26+
This follows the standard CLIP preprocessing pipeline:
27+
1. Resize the shorter edge to target size (preserving aspect ratio)
28+
2. Center crop to target size x target size
29+
3. Normalize with mean and std
2530
2631
Args:
2732
image: PIL Image to preprocess
@@ -41,20 +46,24 @@ def manual_center_crop_preprocess(image: Image.Image, size: int = DEFAULT_IMAGE_
4146
if image.mode != "RGB":
4247
image = image.convert("RGB")
4348

44-
# Center crop
49+
# Step 1: Resize shorter edge to target size (CLIP-style)
4550
width, height = image.size
46-
min_dim = min(width, height)
51+
if width < height:
52+
new_width = size
53+
new_height = int(height * size / width)
54+
else:
55+
new_height = size
56+
new_width = int(width * size / height)
4757

48-
# Calculate crop box for center crop
49-
left = (width - min_dim) // 2
50-
top = (height - min_dim) // 2
51-
right = left + min_dim
52-
bottom = top + min_dim
58+
image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
5359

54-
image = image.crop((left, top, right, bottom))
60+
# Step 2: Center crop to target size
61+
left = (new_width - size) // 2
62+
top = (new_height - size) // 2
63+
right = left + size
64+
bottom = top + size
5565

56-
# Resize to target size
57-
image = image.resize((size, size), Image.Resampling.BICUBIC)
66+
image = image.crop((left, top, right, bottom))
5867

5968
# Convert to numpy array and normalize to [0, 1]
6069
img_array = np.array(image).astype(np.float32) / 255.0

0 commit comments

Comments
 (0)