Skip to content

Commit e285edf

Browse files
authored
Extend orientation beyond UP (pytorch#20088)
Differential Revision: D107156015 Pull Request resolved: pytorch#20088
1 parent d7f1ccb commit e285edf

11 files changed

Lines changed: 708 additions & 76 deletions

extension/apple/ExecuTorch/Exported/ExecuTorch+ImageProcessor.swift

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,33 +64,50 @@ public extension ImageProcessor {
6464
/// RGBA, 8-bit NV12, and 10-bit P010. Output is a `Tensor<Float>` with
6565
/// shape `[1, 3, target_height, target_width]`.
6666
///
67-
/// The buffer is treated as already upright: orientation correction is not
68-
/// applied and cannot be derived from a CVPixelBuffer, so the caller is
69-
/// responsible for supplying an upright buffer.
70-
func process(_ pixelBuffer: CVPixelBuffer) throws -> Tensor<Float> {
71-
let anyTensor = try processPixelBuffer(pixelBuffer)
67+
/// `orientation` is the EXIF orientation of the buffer's contents; the
68+
/// pipeline rotates it upright before resizing. It cannot be derived from a
69+
/// CVPixelBuffer, so the caller supplies it (defaults to `.up`).
70+
func process(
71+
_ pixelBuffer: CVPixelBuffer,
72+
orientation: ImageOrientation = .up
73+
) throws -> Tensor<Float> {
74+
let anyTensor = try processPixelBuffer(pixelBuffer, orientation: orientation)
7275
return Tensor<Float>(anyTensor)
7376
}
7477

7578
/// Process a CVPixelBuffer into a caller-provided tensor, reusing its storage.
7679
///
77-
/// Avoids the per-call allocation of `process(_:)`, which matters for
78-
/// sustained video. `tensor` must be a `Tensor<Float>` with shape
80+
/// Avoids the per-call allocation of `process(_:orientation:)`, which matters
81+
/// for sustained video. `tensor` must be a `Tensor<Float>` with shape
7982
/// `[1, 3, target_height, target_width]`; its storage is overwritten and can
8083
/// be reused across frames. The contents are valid until the next call that
8184
/// writes into the same tensor.
8285
///
83-
/// The buffer is treated as already upright (see `process(_:)`).
84-
func process(_ pixelBuffer: CVPixelBuffer, into tensor: Tensor<Float>) throws {
85-
try processPixelBuffer(pixelBuffer, into: tensor.anyTensor)
86+
/// `orientation` matches `process(_:orientation:)` (defaults to `.up`).
87+
func process(
88+
_ pixelBuffer: CVPixelBuffer,
89+
orientation: ImageOrientation = .up,
90+
into tensor: Tensor<Float>
91+
) throws {
92+
try processPixelBuffer(
93+
pixelBuffer, orientation: orientation, into: tensor.anyTensor)
8694
}
8795

8896
/// Letterbox padding (per side, in pixels) applied for a source of the given
8997
/// size: `x` is the left/right pad and `y` the top/bottom pad of the resized
9098
/// content. Returns `(0, 0)` for the stretch resize mode or the top-left
9199
/// anchor. Lets callers map the padded output back to the source region.
92-
func computeLetterboxPadding(inputWidth: Int, inputHeight: Int) -> (x: Int, y: Int) {
93-
let padding = __computeLetterboxPadding(forInputWidth: inputWidth, height: inputHeight)
100+
///
101+
/// `orientation` is the EXIF orientation of the source (defaults to `.up`);
102+
/// the dimensions are oriented before the padding is computed, matching the
103+
/// geometry `process(_:orientation:)` produces.
104+
func computeLetterboxPadding(
105+
inputWidth: Int,
106+
inputHeight: Int,
107+
orientation: ImageOrientation = .up
108+
) -> (x: Int, y: Int) {
109+
let padding = __computeLetterboxPadding(
110+
forInputWidth: inputWidth, height: inputHeight, orientation: orientation)
94111
return (padding.x, padding.y)
95112
}
96113
}

extension/apple/ExecuTorch/Exported/ExecuTorchImageProcessor.h

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ typedef struct ExecuTorchImageLetterboxPadding {
3030
NSInteger y;
3131
} ExecuTorchImageLetterboxPadding NS_SWIFT_NAME(ImageLetterboxPadding);
3232

33+
/// EXIF orientation of the source image. The pipeline rotates the content
34+
/// upright before resizing. Only these rotation codes are supported.
35+
typedef NS_ENUM(uint8_t, ExecuTorchImageOrientation) {
36+
ExecuTorchImageOrientationUp = 1, // no rotation
37+
ExecuTorchImageOrientationDown = 3, // 180 degrees
38+
ExecuTorchImageOrientationRight = 6, // 90 degrees clockwise
39+
ExecuTorchImageOrientationLeft = 8, // 90 degrees counter-clockwise
40+
} NS_SWIFT_NAME(ImageOrientation);
3341
NS_SWIFT_NAME(ImageNormalization)
3442
__attribute__((objc_subclassing_restricted))
3543
@interface ExecuTorchImageNormalization : NSObject
@@ -93,36 +101,52 @@ __attribute__((objc_subclassing_restricted))
93101

94102
- (instancetype)initWithConfig:(ExecuTorchImageProcessorConfig *)config;
95103

104+
/// Process a CVPixelBuffer into a normalized float tensor, treating the buffer
105+
/// as already upright (orientation `up`). Use
106+
/// processPixelBuffer:orientation:error: to specify a source orientation.
107+
- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
108+
error:(NSError **)error;
109+
110+
/// Reuse-friendly variant of processPixelBuffer:error: that writes into a
111+
/// caller-provided tensor; treats the buffer as already upright (orientation
112+
/// `up`). See processPixelBuffer:orientation:intoTensor:error:.
113+
- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
114+
intoTensor:(ExecuTorchTensor *)tensor
115+
error:(NSError **)error;
116+
96117
/// Process a CVPixelBuffer into a normalized float tensor.
97118
///
98119
/// Auto-detects pixel format from the buffer's metadata. Supported
99120
/// formats: BGRA, RGBA, 8-bit NV12, and 10-bit P010 (P010 is narrowed to NV12
100121
/// internally). Other formats return an error.
101122
///
102-
/// The buffer is treated as already upright. Orientation correction is not
103-
/// applied and cannot be derived from a CVPixelBuffer, so the caller is
104-
/// responsible for supplying an upright buffer (e.g. by configuring the
105-
/// capture connection's orientation).
123+
/// `orientation` is the EXIF orientation of the buffer's contents; the pipeline
124+
/// rotates it upright before resizing. It cannot be derived from a
125+
/// CVPixelBuffer, so the caller supplies it (e.g. from capture metadata).
106126
///
107127
/// @param pixelBuffer The input pixel buffer.
128+
/// @param orientation The source orientation.
108129
/// @param error On failure, set to an NSError describing what went wrong.
109130
/// @return An ExecuTorchTensor with shape [1, 3, H, W] (CHW), or nil on failure.
110131
- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
132+
orientation:(ExecuTorchImageOrientation)orientation
111133
error:(NSError **)error;
112134

113135
/// Process a CVPixelBuffer into a caller-provided tensor, reusing its storage.
114136
///
115-
/// Avoids the per-call output allocation of processPixelBuffer:error:, which
116-
/// matters for sustained video. `tensor` must be a Float tensor shaped
137+
/// Avoids the per-call output allocation of processPixelBuffer:orientation:error:,
138+
/// which matters for sustained video. `tensor` must be a Float tensor shaped
117139
/// [1, 3, targetHeight, targetWidth]; its storage is overwritten and can be
118140
/// reused across frames. The result aliases `tensor`, so the caller must
119141
/// finish using the previous result before the next call.
120142
///
121143
/// @param pixelBuffer The input pixel buffer.
144+
/// @param orientation The source orientation (see processPixelBuffer:orientation:error:).
122145
/// @param tensor The output tensor to fill.
123146
/// @param error On failure, set to an NSError describing what went wrong.
124147
/// @return YES on success, NO on failure.
125148
- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
149+
orientation:(ExecuTorchImageOrientation)orientation
126150
intoTensor:(ExecuTorchTensor *)tensor
127151
error:(NSError **)error;
128152

@@ -132,11 +156,31 @@ __attribute__((objc_subclassing_restricted))
132156
/// top-left anchor. Lets callers map the padded output back to the source
133157
/// region without replicating the resize geometry.
134158
///
159+
/// Treats the source as already upright (orientation `up`). Use
160+
/// computeLetterboxPaddingForInputWidth:height:orientation: for a rotated
161+
/// source.
162+
///
163+
/// @param inputWidth The source pixel width.
164+
/// @param inputHeight The source pixel height.
165+
/// @return The {x, y} padding in pixels.
166+
- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth
167+
height:(NSInteger)inputHeight
168+
NS_REFINED_FOR_SWIFT;
169+
170+
/// Letterbox padding (per side, in pixels) the processor applies for a source
171+
/// of the given size and orientation. The source dimensions are oriented
172+
/// (width/height swapped for the 90-degree rotations) before the padding is
173+
/// computed, so the result matches the geometry that
174+
/// processPixelBuffer:orientation:error: produces. Returns {0, 0} for the
175+
/// stretch resize mode or the top-left anchor.
176+
///
135177
/// @param inputWidth The source pixel width.
136178
/// @param inputHeight The source pixel height.
179+
/// @param orientation The source orientation (see processPixelBuffer:orientation:error:).
137180
/// @return The {x, y} padding in pixels.
138181
- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth
139182
height:(NSInteger)inputHeight
183+
orientation:(ExecuTorchImageOrientation)orientation
140184
NS_REFINED_FOR_SWIFT;
141185

142186
+ (instancetype)new NS_UNAVAILABLE;

extension/apple/ExecuTorch/Exported/ExecuTorchImageProcessor.mm

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
static_assert((int)ExecuTorchImageResizeModeLetterbox == (int)ResizeMode::LETTERBOX, "ExecuTorchImageResizeModeLetterbox must match ResizeMode::LETTERBOX");
3232
static_assert((int)ExecuTorchImageLetterboxAnchorCenter == (int)LetterboxAnchor::CENTER, "ExecuTorchImageLetterboxAnchorCenter must match LetterboxAnchor::CENTER");
3333
static_assert((int)ExecuTorchImageLetterboxAnchorTopLeft == (int)LetterboxAnchor::TOP_LEFT, "ExecuTorchImageLetterboxAnchorTopLeft must match LetterboxAnchor::TOP_LEFT");
34+
static_assert((int)ExecuTorchImageOrientationUp == (int)Orientation::UP, "ExecuTorchImageOrientationUp must match Orientation::UP");
35+
static_assert((int)ExecuTorchImageOrientationDown == (int)Orientation::DOWN, "ExecuTorchImageOrientationDown must match Orientation::DOWN");
36+
static_assert((int)ExecuTorchImageOrientationRight == (int)Orientation::RIGHT, "ExecuTorchImageOrientationRight must match Orientation::RIGHT");
37+
static_assert((int)ExecuTorchImageOrientationLeft == (int)Orientation::LEFT, "ExecuTorchImageOrientationLeft must match Orientation::LEFT");
3438

3539
// MARK: - Private interfaces
3640

@@ -178,17 +182,36 @@ - (instancetype)initWithConfig:(ExecuTorchImageProcessorConfig *)config {
178182

179183
- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
180184
error:(NSError **)error {
185+
return [self processPixelBuffer:pixelBuffer
186+
orientation:ExecuTorchImageOrientationUp
187+
error:error];
188+
}
189+
190+
- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
191+
intoTensor:(ExecuTorchTensor *)tensor
192+
error:(NSError **)error {
193+
return [self processPixelBuffer:pixelBuffer
194+
orientation:ExecuTorchImageOrientationUp
195+
intoTensor:tensor
196+
error:error];
197+
}
198+
199+
- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
200+
orientation:(ExecuTorchImageOrientation)orientation
201+
error:(NSError **)error {
181202
if (!pixelBuffer) {
182203
if (error) {
183204
*error = ExecuTorchErrorWithCode(ExecuTorchErrorCodeInvalidArgument);
184205
}
185206
return nil;
186207
}
187-
auto result = process_pixelbuffer(*_processor, pixelBuffer);
208+
auto result = process_pixelbuffer(
209+
*_processor, pixelBuffer, static_cast<Orientation>(orientation));
188210
return tensorFromResult(result, error);
189211
}
190212

191213
- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
214+
orientation:(ExecuTorchImageOrientation)orientation
192215
intoTensor:(ExecuTorchTensor *)tensor
193216
error:(NSError **)error {
194217
if (!pixelBuffer || !tensor) {
@@ -199,7 +222,8 @@ - (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
199222
}
200223
auto* tensorPtr = reinterpret_cast<TensorPtr*>(tensor.nativeInstance);
201224
auto err = process_pixelbuffer_into(
202-
*_processor, pixelBuffer, Orientation::UP, **tensorPtr);
225+
*_processor, pixelBuffer, static_cast<Orientation>(orientation),
226+
**tensorPtr);
203227
if (err != executorch::runtime::Error::Ok) {
204228
if (error) {
205229
*error = ExecuTorchErrorWithCode((ExecuTorchErrorCode)err);
@@ -211,8 +235,17 @@ - (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
211235

212236
- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth
213237
height:(NSInteger)inputHeight {
238+
return [self computeLetterboxPaddingForInputWidth:inputWidth
239+
height:inputHeight
240+
orientation:ExecuTorchImageOrientationUp];
241+
}
242+
243+
- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth
244+
height:(NSInteger)inputHeight
245+
orientation:(ExecuTorchImageOrientation)orientation {
214246
const auto padding = _processor->compute_letterbox_padding(
215-
static_cast<int32_t>(inputWidth), static_cast<int32_t>(inputHeight));
247+
static_cast<int32_t>(inputWidth), static_cast<int32_t>(inputHeight),
248+
static_cast<Orientation>(orientation));
216249
return {padding.first, padding.second};
217250
}
218251

0 commit comments

Comments
 (0)