|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * This source code is licensed under the BSD-style license found in the |
| 6 | + * LICENSE file in the root directory of this source tree. |
| 7 | + */ |
| 8 | + |
| 9 | +#import <CoreVideo/CoreVideo.h> |
| 10 | +#import <Foundation/Foundation.h> |
| 11 | + |
| 12 | +#import "ExecuTorchTensor.h" |
| 13 | + |
| 14 | +NS_ASSUME_NONNULL_BEGIN |
| 15 | + |
| 16 | +typedef NS_ENUM(uint8_t, ExecuTorchImageResizeMode) { |
| 17 | + ExecuTorchImageResizeModeStretch, |
| 18 | + ExecuTorchImageResizeModeLetterbox, |
| 19 | +} NS_SWIFT_NAME(ImageResizeMode); |
| 20 | + |
| 21 | +typedef NS_ENUM(uint8_t, ExecuTorchImageLetterboxAnchor) { |
| 22 | + ExecuTorchImageLetterboxAnchorCenter, |
| 23 | + ExecuTorchImageLetterboxAnchorTopLeft, |
| 24 | +} NS_SWIFT_NAME(ImageLetterboxAnchor); |
| 25 | + |
| 26 | +/// Per-side letterbox padding in pixels: `x` is the left/right pad and `y` the |
| 27 | +/// top/bottom pad of the resized content. |
| 28 | +typedef struct ExecuTorchImageLetterboxPadding { |
| 29 | + NSInteger x; |
| 30 | + NSInteger y; |
| 31 | +} ExecuTorchImageLetterboxPadding NS_SWIFT_NAME(ImageLetterboxPadding); |
| 32 | + |
| 33 | +NS_SWIFT_NAME(ImageNormalization) |
| 34 | +__attribute__((objc_subclassing_restricted)) |
| 35 | +@interface ExecuTorchImageNormalization : NSObject |
| 36 | + |
| 37 | ++ (instancetype)zeroToOne; |
| 38 | ++ (instancetype)imagenet; |
| 39 | + |
| 40 | +/// Create a normalization with a custom scale factor and per-channel RGB mean |
| 41 | +/// and standard deviation. `mean` and `standardDeviation` must each contain |
| 42 | +/// exactly 3 elements (R, G, B). Normalization is applied per channel as |
| 43 | +/// `(pixel * scaleFactor - mean[c]) / standardDeviation[c]`, so every |
| 44 | +/// `standardDeviation` entry must be nonzero. |
| 45 | +- (instancetype)initWithScaleFactor:(float)scaleFactor |
| 46 | + mean:(NSArray<NSNumber *> *)mean |
| 47 | + standardDeviation:(NSArray<NSNumber *> *)standardDeviation |
| 48 | + NS_REFINED_FOR_SWIFT; |
| 49 | + |
| 50 | ++ (instancetype)new NS_UNAVAILABLE; |
| 51 | +- (instancetype)init NS_UNAVAILABLE; |
| 52 | + |
| 53 | +@end |
| 54 | + |
| 55 | +NS_SWIFT_NAME(ImageProcessorConfig) |
| 56 | +__attribute__((objc_subclassing_restricted)) |
| 57 | +@interface ExecuTorchImageProcessorConfig : NSObject |
| 58 | + |
| 59 | +@property(nonatomic, readonly) NSInteger targetWidth; |
| 60 | +@property(nonatomic, readonly) NSInteger targetHeight; |
| 61 | +@property(nonatomic, readonly) ExecuTorchImageResizeMode resizeMode; |
| 62 | +@property(nonatomic, readonly) ExecuTorchImageLetterboxAnchor letterboxAnchor; |
| 63 | +@property(nonatomic, readonly) float padValue; |
| 64 | +@property(nonatomic, readonly) ExecuTorchImageNormalization *normalization; |
| 65 | +// Minimum source pixel count (width * height) at which the GPU path may be |
| 66 | +// used; smaller inputs run on the CPU. 0 forces GPU, NSIntegerMax forces CPU. |
| 67 | +@property(nonatomic, readonly) NSInteger gpuMinInputPixels; |
| 68 | + |
| 69 | +// Default value for gpuMinInputPixels (mirrors the C++ config default). |
| 70 | +@property(class, nonatomic, readonly) NSInteger defaultGpuMinInputPixels; |
| 71 | + |
| 72 | +- (instancetype)initWithTargetWidth:(NSInteger)targetWidth |
| 73 | + targetHeight:(NSInteger)targetHeight |
| 74 | + resizeMode:(ExecuTorchImageResizeMode)resizeMode |
| 75 | + letterboxAnchor:(ExecuTorchImageLetterboxAnchor)letterboxAnchor |
| 76 | + padValue:(float)padValue |
| 77 | + normalization:(ExecuTorchImageNormalization *)normalization |
| 78 | + gpuMinInputPixels:(NSInteger)gpuMinInputPixels NS_REFINED_FOR_SWIFT; |
| 79 | + |
| 80 | ++ (instancetype)new NS_UNAVAILABLE; |
| 81 | +- (instancetype)init NS_UNAVAILABLE; |
| 82 | + |
| 83 | +@end |
| 84 | + |
| 85 | +/// Thread-safety: ExecuTorchImageProcessor is NOT thread-safe per instance. |
| 86 | +/// Internal scratch buffers are mutated during processing. Use one instance |
| 87 | +/// per concurrent caller. Different instances are safe to use concurrently. |
| 88 | +NS_SWIFT_NAME(ImageProcessor) |
| 89 | +__attribute__((objc_subclassing_restricted)) |
| 90 | +@interface ExecuTorchImageProcessor : NSObject |
| 91 | + |
| 92 | +@property(nonatomic, readonly) ExecuTorchImageProcessorConfig *config; |
| 93 | + |
| 94 | +- (instancetype)initWithConfig:(ExecuTorchImageProcessorConfig *)config; |
| 95 | + |
| 96 | +/// Process a CVPixelBuffer into a normalized float tensor. |
| 97 | +/// |
| 98 | +/// Auto-detects pixel format from the buffer's metadata. Supported |
| 99 | +/// formats: BGRA, RGBA, 8-bit NV12, and 10-bit P010 (P010 is narrowed to NV12 |
| 100 | +/// internally). Other formats return an error. |
| 101 | +/// |
| 102 | +/// The buffer is treated as already upright. Orientation correction is not |
| 103 | +/// applied and cannot be derived from a CVPixelBuffer, so the caller is |
| 104 | +/// responsible for supplying an upright buffer (e.g. by configuring the |
| 105 | +/// capture connection's orientation). |
| 106 | +/// |
| 107 | +/// @param pixelBuffer The input pixel buffer. |
| 108 | +/// @param error On failure, set to an NSError describing what went wrong. |
| 109 | +/// @return An ExecuTorchTensor with shape [1, 3, H, W] (CHW), or nil on failure. |
| 110 | +- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer |
| 111 | + error:(NSError **)error; |
| 112 | + |
| 113 | +/// Process a CVPixelBuffer into a caller-provided tensor, reusing its storage. |
| 114 | +/// |
| 115 | +/// Avoids the per-call output allocation of processPixelBuffer:error:, which |
| 116 | +/// matters for sustained video. `tensor` must be a Float tensor shaped |
| 117 | +/// [1, 3, targetHeight, targetWidth]; its storage is overwritten and can be |
| 118 | +/// reused across frames. The result aliases `tensor`, so the caller must |
| 119 | +/// finish using the previous result before the next call. |
| 120 | +/// |
| 121 | +/// @param pixelBuffer The input pixel buffer. |
| 122 | +/// @param tensor The output tensor to fill. |
| 123 | +/// @param error On failure, set to an NSError describing what went wrong. |
| 124 | +/// @return YES on success, NO on failure. |
| 125 | +- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer |
| 126 | + intoTensor:(ExecuTorchTensor *)tensor |
| 127 | + error:(NSError **)error; |
| 128 | + |
| 129 | +/// Letterbox padding (per side, in pixels) the processor applies for a source |
| 130 | +/// of the given size: `x` is the left/right pad and `y` the top/bottom pad of |
| 131 | +/// the resized content. Returns {0, 0} for the stretch resize mode or the |
| 132 | +/// top-left anchor. Lets callers map the padded output back to the source |
| 133 | +/// region without replicating the resize geometry. |
| 134 | +/// |
| 135 | +/// @param inputWidth The source pixel width. |
| 136 | +/// @param inputHeight The source pixel height. |
| 137 | +/// @return The {x, y} padding in pixels. |
| 138 | +- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth |
| 139 | + height:(NSInteger)inputHeight |
| 140 | + NS_REFINED_FOR_SWIFT; |
| 141 | + |
| 142 | ++ (instancetype)new NS_UNAVAILABLE; |
| 143 | +- (instancetype)init NS_UNAVAILABLE; |
| 144 | + |
| 145 | +@end |
| 146 | + |
| 147 | +NS_ASSUME_NONNULL_END |
0 commit comments