Skip to content

Commit 933b63b

Browse files
feat: use TensorPtrish type for Pixel data input
1 parent 1227dac commit 933b63b

9 files changed

Lines changed: 79 additions & 132 deletions

File tree

apps/computer-vision/app/object_detection/index.tsx

Lines changed: 40 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -4,69 +4,15 @@ import {
44
Detection,
55
useObjectDetection,
66
SSDLITE_320_MOBILENET_V3_LARGE,
7+
ScalarType,
8+
PixelData,
79
} from 'react-native-executorch';
810
import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
911
import ImageWithBboxes from '../../components/ImageWithBboxes';
1012
import React, { useContext, useEffect, useState } from 'react';
1113
import { GeneratingContext } from '../../context';
1214
import ScreenWrapper from '../../ScreenWrapper';
1315
import ColorPalette from '../../colors';
14-
import { Images } from 'react-native-nitro-image';
15-
16-
// Helper function to convert BGRA to RGB
17-
function convertBGRAtoRGB(
18-
buffer: ArrayBuffer,
19-
width: number,
20-
height: number
21-
): ArrayBuffer {
22-
const source = new Uint8Array(buffer);
23-
const rgb = new Uint8Array(width * height * 3);
24-
25-
for (let i = 0; i < width * height; i++) {
26-
// BGRA format: [B, G, R, A] → RGB: [R, G, B]
27-
rgb[i * 3 + 0] = source[i * 4 + 2]; // R
28-
rgb[i * 3 + 1] = source[i * 4 + 1]; // G
29-
rgb[i * 3 + 2] = source[i * 4 + 0]; // B
30-
}
31-
32-
return rgb.buffer;
33-
}
34-
35-
// Helper function to convert image URI to raw RGB pixel data
36-
async function imageUriToPixelData(
37-
uri: string,
38-
targetWidth: number,
39-
targetHeight: number
40-
): Promise<{
41-
data: ArrayBuffer;
42-
width: number;
43-
height: number;
44-
channels: number;
45-
}> {
46-
try {
47-
// Load image and resize to target dimensions
48-
const image = await Images.loadFromFileAsync(uri);
49-
const resized = image.resize(targetWidth, targetHeight);
50-
51-
// Get pixel data as ArrayBuffer (BGRA format from NitroImage)
52-
const rawPixelData = resized.toRawPixelData();
53-
const buffer =
54-
rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
55-
56-
// Convert BGRA to RGB as required by the native API
57-
const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
58-
59-
return {
60-
data: rgbBuffer,
61-
width: targetWidth,
62-
height: targetHeight,
63-
channels: 3, // RGB
64-
};
65-
} catch (error) {
66-
console.error('Error loading image with NitroImage:', error);
67-
throw error;
68-
}
69-
}
7016

7117
export default function ObjectDetectionScreen() {
7218
const [imageUri, setImageUri] = useState('');
@@ -109,30 +55,45 @@ export default function ObjectDetectionScreen() {
10955
};
11056

11157
const runForwardPixels = async () => {
112-
if (imageUri && imageDimensions) {
113-
try {
114-
console.log('Converting image to pixel data...');
115-
// Use original dimensions - let the model resize internally
116-
const pixelData = await imageUriToPixelData(
117-
imageUri,
118-
imageDimensions.width,
119-
imageDimensions.height
120-
);
121-
122-
console.log('Running forward with pixel data...', {
123-
width: pixelData.width,
124-
height: pixelData.height,
125-
channels: pixelData.channels,
126-
dataSize: pixelData.data.byteLength,
127-
});
128-
129-
// Run inference using unified forward() API
130-
const output = await ssdLite.forward(pixelData, 0.3);
131-
console.log('Pixel data result:', output.length, 'detections');
132-
setResults(output);
133-
} catch (e) {
134-
console.error('Error in runForwardPixels:', e);
58+
try {
59+
console.log('Testing with hardcoded pixel data...');
60+
61+
// Create a simple 320x320 test image (all zeros - black image)
62+
// In a real scenario, you would load actual image pixel data here
63+
const width = 320;
64+
const height = 320;
65+
const channels = 3; // RGB
66+
67+
// Create a black image (you can replace this with actual pixel data)
68+
const rgbData = new Uint8Array(width * height * channels);
69+
70+
// Optionally, add some test pattern (e.g., white square in center)
71+
for (let y = 100; y < 220; y++) {
72+
for (let x = 100; x < 220; x++) {
73+
const idx = (y * width + x) * 3;
74+
rgbData[idx + 0] = 255; // R
75+
rgbData[idx + 1] = 255; // G
76+
rgbData[idx + 2] = 255; // B
77+
}
13578
}
79+
80+
const pixelData: PixelData = {
81+
dataPtr: rgbData,
82+
sizes: [height, width, channels],
83+
scalarType: ScalarType.BYTE,
84+
};
85+
86+
console.log('Running forward with hardcoded pixel data...', {
87+
sizes: pixelData.sizes,
88+
dataSize: pixelData.dataPtr.byteLength,
89+
});
90+
91+
// Run inference using unified forward() API
92+
const output = await ssdLite.forward(pixelData, 0.3);
93+
console.log('Pixel data result:', output.length, 'detections');
94+
setResults(output);
95+
} catch (e) {
96+
console.error('Error in runForwardPixels:', e);
13697
}
13798
};
13899

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
#include <rnexecutorch/Error.h>
33
#include <rnexecutorch/ErrorCodes.h>
44
#include <rnexecutorch/Log.h>
5-
#include <rnexecutorch/host_objects/JSTensorViewIn.h>
6-
#include <rnexecutorch/host_objects/JsiConversions.h>
75
#include <rnexecutorch/utils/FrameProcessor.h>
86

97
namespace rnexecutorch {
@@ -21,12 +19,7 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
2119
return preprocessFrame(frame);
2220
}
2321

24-
cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
25-
const jsi::Object &pixelData) const {
26-
// PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
27-
// Use JSI conversion helper to extract the data
28-
auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
29-
22+
cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
3023
// Validate dimensions: sizes must be [height, width, channels]
3124
if (tensorView.sizes.size() != 3) {
3225
char errorMessage[100];
@@ -59,11 +52,11 @@ cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
5952
}
6053

6154
// Create cv::Mat directly from dataPtr (zero-copy view)
55+
// Data is valid for the duration of this synchronous call
6256
uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
6357
cv::Mat image(height, width, CV_8UC3, dataPtr);
6458

65-
// Clone to own the data, since JS memory may be GC'd
66-
return image.clone();
59+
return image;
6760
}
6861

6962
} // namespace models

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,40 +129,35 @@ class VisionModel : public BaseModel {
129129
const jsi::Value &frameData) const;
130130

131131
/**
132-
* @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from
132+
* @brief Extract cv::Mat from raw pixel data (TensorPtr) sent from
133133
* JavaScript
134134
*
135135
* This method enables users to run inference on raw pixel data without file
136136
* I/O. Useful for processing images already in memory (e.g., from canvas,
137137
* image library).
138138
*
139-
* @param runtime JSI runtime
140-
* @param pixelData JSI object containing:
141-
* - data: ArrayBuffer with raw pixel values
142-
* - width: number - image width
143-
* - height: number - image height
144-
* - channels: number - number of channels (3 for RGB, 4 for
145-
* RGBA)
139+
* @param tensorView JSTensorViewIn containing:
140+
* - dataPtr: Pointer to raw pixel values (RGB format)
141+
* - sizes: [height, width, channels] - must be 3D
142+
* - scalarType: Must be ScalarType::Byte (Uint8Array)
146143
*
147144
* @return cv::Mat containing the pixel data
148145
*
149-
* @throws std::runtime_error if pixelData format is invalid
146+
* @throws RnExecutorchError if tensorView format is invalid
150147
*
151148
* @note The returned cv::Mat owns a copy of the data
152-
* @note Expected pixel format: RGB or RGBA, row-major order
149+
* @note Expected pixel format: RGB (3 channels), row-major order
153150
* @note Typical usage from JS:
154151
* @code
155-
* const pixels = new Uint8Array([...]); // Raw pixel data
152+
* const pixels = new Uint8Array([...]); // Raw RGB pixel data
156153
* const result = model.generateFromPixels({
157-
* data: pixels.buffer,
158-
* width: 640,
159-
* height: 480,
160-
* channels: 3
154+
* dataPtr: pixels,
155+
* sizes: [480, 640, 3],
156+
* scalarType: ScalarType.BYTE
161157
* }, 0.5);
162158
* @endcode
163159
*/
164-
cv::Mat extractFromPixels(jsi::Runtime &runtime,
165-
const jsi::Object &pixelData) const;
160+
cv::Mat extractFromPixels(const JSTensorViewIn &tensorView) const;
166161
};
167162

168163
} // namespace models

packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <rnexecutorch/ErrorCodes.h>
55
#include <rnexecutorch/Log.h>
66
#include <rnexecutorch/data_processing/ImageProcessing.h>
7+
#include <rnexecutorch/host_objects/JsiConversions.h>
78
#include <rnexecutorch/utils/FrameProcessor.h>
89

910
namespace rnexecutorch::models::object_detection {
@@ -176,9 +177,12 @@ std::vector<types::Detection>
176177
ObjectDetection::generateFromPixels(jsi::Runtime &runtime,
177178
const jsi::Value &pixelData,
178179
double detectionThreshold) {
179-
// Extract raw pixel data from JavaScript
180-
auto pixelObj = pixelData.asObject(runtime);
181-
cv::Mat image = extractFromPixels(runtime, pixelObj);
180+
// Convert JSI value to JSTensorViewIn
181+
auto tensorView =
182+
jsi_conversion::getValue<JSTensorViewIn>(pixelData, runtime);
183+
184+
// Extract raw pixel data to cv::Mat
185+
cv::Mat image = extractFromPixels(tensorView);
182186

183187
// Use the internal helper - it handles locking, preprocessing, and inference
184188
return runInference(image, detectionThreshold);

packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
8686
buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data);
8787

8888
if (lockResult != 0) {
89-
throw RnExecutorchError(RnExecutorchErrorCode::AccessFailed,
89+
throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
9090
"Failed to lock AHardwareBuffer");
9191
}
9292

packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,8 @@ namespace utils {
99

1010
cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
1111
const jsi::Object &frameData) {
12-
// Get frame dimensions
13-
int width =
14-
static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
15-
int height =
16-
static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
17-
1812
// Try zero-copy path first (nativeBuffer)
13+
// Native buffer contains dimensions, so we don't need width/height properties
1914
if (hasNativeBuffer(runtime, frameData)) {
2015
try {
2116
return extractFromNativeBuffer(runtime, frameData);
@@ -25,7 +20,12 @@ cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
2520
}
2621

2722
// Fallback to ArrayBuffer path (with copy)
23+
// Get frame dimensions for ArrayBuffer path
2824
if (frameData.hasProperty(runtime, "data")) {
25+
int width =
26+
static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
27+
int height =
28+
static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
2929
return extractFromArrayBuffer(runtime, frameData, width, height);
3030
}
3131

packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ export abstract class VisionModule<TOutput> extends BaseModule {
7373
* 1. **String path/URI**: File path, URL, or Base64-encoded string
7474
* 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
7575
*
76-
* **Note**: For VisionCamera frame processing, use `forwardSync` instead.
76+
* **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
7777
* This method is async and cannot be called in worklet context.
7878
*
7979
* @param input - Image source (string path or PixelData object)

packages/react-native-executorch/src/types/common.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,5 +191,5 @@ export interface Frame {
191191
*
192192
* Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer`
193193
*/
194-
getNativeBuffer(): { pointer: number; release(): void };
194+
getNativeBuffer(): { pointer: bigint; release(): void };
195195
}

packages/react-native-executorch/src/types/objectDetection.ts

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { RnExecutorchError } from '../errors/errorUtils';
2-
import { ResourceSource } from './common';
2+
import { ResourceSource, PixelData, Frame } from './common';
33

44
/**
55
* Represents a bounding box for a detected object in an image.
@@ -190,22 +190,14 @@ export interface ObjectDetectionType {
190190
*
191191
* // Pixel data
192192
* const detections2 = await model.forward({
193-
* data: pixelBuffer,
194-
* width: 640,
195-
* height: 480,
196-
* channels: 3
193+
* dataPtr: new Uint8Array(rgbPixels),
194+
* sizes: [480, 640, 3],
195+
* scalarType: ScalarType.BYTE
197196
* });
198197
* ```
199198
*/
200199
forward: (
201-
input:
202-
| string
203-
| {
204-
data: ArrayBuffer;
205-
width: number;
206-
height: number;
207-
channels: number;
208-
},
200+
input: string | PixelData,
209201
detectionThreshold?: number
210202
) => Promise<Detection[]>;
211203

@@ -236,5 +228,7 @@ export interface ObjectDetectionType {
236228
* @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7.
237229
* @returns Array of Detection objects representing detected items in the frame.
238230
*/
239-
runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null;
231+
runOnFrame:
232+
| ((frame: Frame, detectionThreshold?: number) => Detection[])
233+
| null;
240234
}

0 commit comments

Comments
 (0)