@@ -93,350 +93,6 @@ Java_com_executorch_webrtc_ExecutorchFrameProcessor_loadModel(
9393 }
9494}
9595
96- /* *
97- * Process I420 frame - does segmentation and applies blur in one call.
98- *
99- * @param yData Y plane data
100- * @param uData U plane data
101- * @param vData V plane data
102- * @param width Frame width
103- * @param height Frame height
104- * @param yStride Y plane stride
105- * @param uvStride U/V plane stride
106- * @param rotation Frame rotation in degrees (0, 90, 180, 270)
107- * @return Array of 3 byte arrays [Y, U, V] with background blurred (or null on
108- * error)
109- */
110- JNIEXPORT jobjectArray JNICALL
111- Java_com_executorch_webrtc_ExecutorchFrameProcessor_processI420Frame (
112- JNIEnv *env, jobject thiz, jbyteArray yData, jbyteArray uData,
113- jbyteArray vData, jint width, jint height, jint yStride, jint uvStride,
114- jint rotation) {
115-
116- // Get input buffers and their actual sizes
117- jsize yDataSize = env->GetArrayLength (yData);
118- jsize uDataSize = env->GetArrayLength (uData);
119- jsize vDataSize = env->GetArrayLength (vData);
120-
121- jbyte *yPtr = env->GetByteArrayElements (yData, nullptr );
122- jbyte *uPtr = env->GetByteArrayElements (uData, nullptr );
123- jbyte *vPtr = env->GetByteArrayElements (vData, nullptr );
124-
125- if (!yPtr || !uPtr || !vPtr) {
126- LOGE (" Failed to get buffer pointers" );
127- // I'm not sure why we're releasing this here, I mean this is still used in
128- // the C++ for regular frames, no? Or we are copying?
129- if (yPtr)
130- env->ReleaseByteArrayElements (yData, yPtr, JNI_ABORT);
131- if (uPtr)
132- env->ReleaseByteArrayElements (uData, uPtr, JNI_ABORT);
133- if (vPtr)
134- env->ReleaseByteArrayElements (vData, vPtr, JNI_ABORT);
135- return nullptr ;
136- }
137-
138- // Determine actual stride based on buffer sizes
139- // what the fuck is stride?
140- int actualYStride = (yDataSize >= yStride * height) ? yStride : width;
141- int actualUVStride =
142- (uDataSize >= uvStride * (height / 2 )) ? uvStride : (width / 2 );
143-
144- // Rate-limited logging of buffer info
145- static long long lastBufferLogTime = 0 ;
146- auto now = std::chrono::duration_cast<std::chrono::milliseconds>(
147- std::chrono::system_clock::now ().time_since_epoch ())
148- .count ();
149- if (now - lastBufferLogTime > 2000 ) {
150- LOGD (" Buffer sizes: Y=%d, U=%d, V=%d, actualYStride=%d, actualUVStride=%d" ,
151- yDataSize, uDataSize, vDataSize, actualYStride, actualUVStride);
152- lastBufferLogTime = now;
153- }
154-
155- // Create output buffers for Y, U, V
156- jbyteArray outYData = env->NewByteArray (actualYStride * height);
157- jbyteArray outUData = env->NewByteArray (actualUVStride * (height / 2 ));
158- jbyteArray outVData = env->NewByteArray (actualUVStride * (height / 2 ));
159- if (!outYData || !outUData || !outVData) {
160- env->ReleaseByteArrayElements (yData, yPtr, JNI_ABORT);
161- env->ReleaseByteArrayElements (uData, uPtr, JNI_ABORT);
162- env->ReleaseByteArrayElements (vData, vPtr, JNI_ABORT);
163- return nullptr ;
164- }
165-
166- // Merge I420 to single buffer for cvtColor
167- cv::Mat i420 (height * 3 / 2 , width, CV_8UC1);
168-
169- // Copy Y plane row by row (handle stride correctly)
170- uint8_t *ySrc = reinterpret_cast <uint8_t *>(yPtr);
171- for (int row = 0 ; row < height; row++) {
172- memcpy (i420.ptr (row), ySrc + row * actualYStride, width);
173- }
174-
175- // Copy U and V planes
176- auto *uSrc = reinterpret_cast <uint8_t *>(uPtr);
177- auto *vSrc = reinterpret_cast <uint8_t *>(vPtr);
178- uint8_t *uvDst = i420.ptr (height);
179- int uvWidth = width / 2 ;
180- int uvHeight = height / 2 ;
181-
182- for (int row = 0 ; row < uvHeight; row++) {
183- memcpy (uvDst + row * uvWidth, uSrc + row * actualUVStride, uvWidth);
184- }
185- for (int row = 0 ; row < uvHeight; row++) {
186- memcpy (uvDst + uvHeight * uvWidth + row * uvWidth,
187- vSrc + row * actualUVStride, uvWidth);
188- }
189-
190- // Convert to RGB
191- cv::Mat rgbFull;
192- cv::cvtColor (i420, rgbFull, cv::COLOR_YUV2RGB_I420);
193- // Im not sure why we need all the copying, cant we just do sumn like this?
194- // cv::cvtColor(i420, rgbFull, cv::COLOR_YUV2RGB);
195-
196- // Rotate image to upright for model inference
197- cv::Mat rgbRotated;
198- int rotateCode = -1 ;
199- if (rotation == 90 ) {
200- rotateCode = cv::ROTATE_90_CLOCKWISE;
201- } else if (rotation == 180 ) {
202- rotateCode = cv::ROTATE_180;
203- } else if (rotation == 270 ) {
204- rotateCode = cv::ROTATE_90_COUNTERCLOCKWISE;
205- }
206-
207- if (rotateCode >= 0 ) {
208- cv::rotate (rgbFull, rgbRotated, rotateCode);
209- } else {
210- rgbRotated = rgbFull;
211- }
212-
213- // Run segmentation
214- cv::Mat mask;
215-
216- if (!g_modelLoaded || !g_segmentation) {
217- // Rate-limited logging for missing model
218- if (now - g_lastDebugLogTime > 1000 ) {
219- LOGD (" Model not loaded, using placeholder ellipse mask" );
220- g_lastDebugLogTime = now;
221- }
222-
223- // Placeholder ellipse mask
224- mask = cv::Mat (g_modelHeight, g_modelWidth, CV_32FC1);
225- const float centerY = g_modelHeight / 2 .0f ;
226- const float centerX = g_modelWidth / 2 .0f ;
227- const float radiusY = g_modelHeight * 0 .4f ;
228- const float radiusX = g_modelWidth * 0 .35f ;
229-
230- for (int y = 0 ; y < g_modelHeight; y++) {
231- float *row = mask.ptr <float >(y);
232- for (int x = 0 ; x < g_modelWidth; x++) {
233- float dy = (y - centerY) / radiusY;
234- float dx = (x - centerX) / radiusX;
235- float dist = dx * dx + dy * dy;
236- row[x] = (dist < 1 .0f )
237- ? 1 .0f
238- : ((dist < 1 .3f ) ? (1 .0f - ((dist - 1 .0f ) / 0 .3f )) : 0 .0f );
239- }
240- }
241- } else {
242- // Use BaseSemanticSegmentation via generateFromPixels
243- try {
244- // Create JSTensorViewIn from the rotated RGB image
245- // generateFromPixels expects [height, width, 3] RGB uint8 data
246- JSTensorViewIn pixelData;
247- pixelData.dataPtr = rgbRotated.data ;
248- pixelData.sizes = {rgbRotated.rows , rgbRotated.cols , 3 };
249- pixelData.scalarType = executorch::aten::ScalarType::Byte;
250-
251- // Run inference - returns foreground probability mask
252- std::set<std::string, std::less<>> classesOfInterest = {" foreground" };
253- auto result = g_segmentation->generateFromPixels (
254- pixelData, classesOfInterest, false );
255-
256- // Extract foreground mask from result
257- if (result.classBuffers && result.classBuffers ->count (" foreground" )) {
258- auto &fgBuffer = result.classBuffers ->at (" foreground" );
259- auto *fgData = reinterpret_cast <float *>(fgBuffer->data ());
260-
261- // The mask is at model input size, need to get its dimensions
262- // For now, assume it's the model input size
263- mask = cv::Mat (g_modelHeight, g_modelWidth, CV_32FC1, fgData).clone ();
264-
265- // Rate-limited debug logging
266- if (now - g_lastDebugLogTime > 1000 ) {
267- double minVal, maxVal;
268- cv::minMaxLoc (mask, &minVal, &maxVal);
269- LOGD (" Segmentation result: size=%dx%d, min=%.4f, max=%.4f" , mask.cols ,
270- mask.rows , minVal, maxVal);
271- g_lastDebugLogTime = now;
272- }
273- } else {
274- LOGE (" No foreground mask in result, using fallback" );
275- mask = cv::Mat::ones (g_modelHeight, g_modelWidth, CV_32FC1);
276- }
277- } catch (const std::exception &e) {
278- LOGE (" Segmentation failed: %s" , e.what ());
279- mask = cv::Mat::ones (g_modelHeight, g_modelWidth, CV_32FC1);
280- }
281- }
282-
283- // Resize mask to rotated frame size, then rotate back to original orientation
284- cv::Mat fullMask;
285- if (rotation == 90 || rotation == 270 ) {
286- cv::Mat rotatedMask;
287- cv::resize (mask, rotatedMask, cv::Size (height, width), 0 , 0 ,
288- cv::INTER_LINEAR);
289- int inverseRotateCode = (rotation == 90 ) ? cv::ROTATE_90_COUNTERCLOCKWISE
290- : cv::ROTATE_90_CLOCKWISE;
291- cv::rotate (rotatedMask, fullMask, inverseRotateCode);
292- } else if (rotation == 180 ) {
293- cv::resize (mask, fullMask, cv::Size (width, height), 0 , 0 , cv::INTER_LINEAR);
294- cv::rotate (fullMask, fullMask, cv::ROTATE_180);
295- } else {
296- cv::resize (mask, fullMask, cv::Size (width, height), 0 , 0 , cv::INTER_LINEAR);
297- }
298-
299- // Apply smoothstep to mask
300- const float lowThresh = 0 .3f ;
301- const float highThresh = 0 .7f ;
302- cv::Mat t;
303- cv::subtract (fullMask, lowThresh, t);
304- cv::multiply (t, 1 .0f / (highThresh - lowThresh), t);
305- cv::min (t, 1 .0f , t);
306- cv::max (t, 0 .0f , t);
307- cv::Mat t2, smoothMask;
308- cv::multiply (t, t, t2);
309- cv::multiply (t, -2 .0f , smoothMask);
310- cv::add (smoothMask, 3 .0f , smoothMask);
311- cv::multiply (t2, smoothMask, fullMask);
312-
313- // Blur the mask edges for smoother blending
314- cv::GaussianBlur (fullMask, fullMask, cv::Size (15 , 15 ), 0 );
315-
316- // Create Y plane Mat (packed, no stride padding)
317- cv::Mat yMat (height, width, CV_8UC1);
318- for (int row = 0 ; row < height; row++) {
319- memcpy (yMat.ptr (row), ySrc + row * actualYStride, width);
320- }
321-
322- // Create blurred Y using downscale-blur-upscale for performance
323- // 4x downscale for speed, stackBlur is O(1)
324- cv::Mat ySmall, yBlurredSmall, yBlurred;
325- int smallW = width / 4 ;
326- int smallH = height / 4 ;
327- cv::resize (yMat, ySmall, cv::Size (smallW, smallH), 0 , 0 , cv::INTER_AREA);
328- cv::stackBlur (ySmall, yBlurredSmall, cv::Size (21 , 21 ));
329- cv::resize (yBlurredSmall, yBlurred, cv::Size (width, height), 0 , 0 ,
330- cv::INTER_LINEAR);
331-
332- // Create U and V mats from input (packed, no stride padding)
333- cv::Mat uMat (uvHeight, uvWidth, CV_8UC1);
334- cv::Mat vMat (uvHeight, uvWidth, CV_8UC1);
335- for (int row = 0 ; row < uvHeight; row++) {
336- memcpy (uMat.ptr (row), uSrc + row * actualUVStride, uvWidth);
337- memcpy (vMat.ptr (row), vSrc + row * actualUVStride, uvWidth);
338- }
339-
340- // Blur U and V using same downscale-blur-upscale approach for performance
341- // U/V are already at half res, so 2x downscale = quarter res
342- cv::Mat uSmall, vSmall, uBlurredSmall, vBlurredSmall, uBlurred, vBlurred;
343- int uvSmallW = uvWidth / 2 ;
344- int uvSmallH = uvHeight / 2 ;
345- cv::resize (uMat, uSmall, cv::Size (uvSmallW, uvSmallH), 0 , 0 , cv::INTER_AREA);
346- cv::resize (vMat, vSmall, cv::Size (uvSmallW, uvSmallH), 0 , 0 , cv::INTER_AREA);
347- cv::stackBlur (uSmall, uBlurredSmall, cv::Size (11 , 11 ));
348- cv::stackBlur (vSmall, vBlurredSmall, cv::Size (11 , 11 ));
349- cv::resize (uBlurredSmall, uBlurred, cv::Size (uvWidth, uvHeight), 0 , 0 ,
350- cv::INTER_LINEAR);
351- cv::resize (vBlurredSmall, vBlurred, cv::Size (uvWidth, uvHeight), 0 , 0 ,
352- cv::INTER_LINEAR);
353-
354- // Downscale mask for UV blending (UV is half resolution)
355- cv::Mat uvMask;
356- cv::resize (fullMask, uvMask, cv::Size (uvWidth, uvHeight), 0 , 0 ,
357- cv::INTER_LINEAR);
358-
359- // Blend Y: foreground (mask=1) uses original, background (mask=0) uses
360- // blurred
361- std::vector<uint8_t > outY (actualYStride * height);
362-
363- for (int row = 0 ; row < height; row++) {
364- const uint8_t *srcY = yMat.ptr <uint8_t >(row);
365- const uint8_t *blurY = yBlurred.ptr <uint8_t >(row);
366- const float *maskRow = fullMask.ptr <float >(row);
367- uint8_t *dstY = outY.data () + row * actualYStride;
368-
369- for (int col = 0 ; col < width; col++) {
370- float prob = maskRow[col];
371- dstY[col] =
372- static_cast <uint8_t >(blurY[col] * (1 .0f - prob) + srcY[col] * prob);
373- }
374- if (actualYStride > width) {
375- memcpy (dstY + width, ySrc + row * actualYStride + width,
376- actualYStride - width);
377- }
378- }
379-
380- // Blend U plane
381- std::vector<uint8_t > outU (actualUVStride * uvHeight);
382- for (int row = 0 ; row < uvHeight; row++) {
383- const uint8_t *srcU = uMat.ptr <uint8_t >(row);
384- const uint8_t *blurU = uBlurred.ptr <uint8_t >(row);
385- const float *maskRow = uvMask.ptr <float >(row);
386- uint8_t *dstU = outU.data () + row * actualUVStride;
387-
388- for (int col = 0 ; col < uvWidth; col++) {
389- float prob = maskRow[col];
390- dstU[col] =
391- static_cast <uint8_t >(blurU[col] * (1 .0f - prob) + srcU[col] * prob);
392- }
393- if (actualUVStride > uvWidth) {
394- memcpy (dstU + uvWidth, uSrc + row * actualUVStride + uvWidth,
395- actualUVStride - uvWidth);
396- }
397- }
398-
399- // Blend V plane
400- std::vector<uint8_t > outV (actualUVStride * uvHeight);
401- for (int row = 0 ; row < uvHeight; row++) {
402- const uint8_t *srcV = vMat.ptr <uint8_t >(row);
403- const uint8_t *blurV = vBlurred.ptr <uint8_t >(row);
404- const float *maskRow = uvMask.ptr <float >(row);
405- uint8_t *dstV = outV.data () + row * actualUVStride;
406-
407- for (int col = 0 ; col < uvWidth; col++) {
408- float prob = maskRow[col];
409- dstV[col] =
410- static_cast <uint8_t >(blurV[col] * (1 .0f - prob) + srcV[col] * prob);
411- }
412- if (actualUVStride > uvWidth) {
413- memcpy (dstV + uvWidth, vSrc + row * actualUVStride + uvWidth,
414- actualUVStride - uvWidth);
415- }
416- }
417-
418- // Copy data to output arrays
419- env->SetByteArrayRegion (outYData, 0 , actualYStride * height,
420- reinterpret_cast <jbyte *>(outY.data ()));
421- env->SetByteArrayRegion (outUData, 0 , actualUVStride * uvHeight,
422- reinterpret_cast <jbyte *>(outU.data ()));
423- env->SetByteArrayRegion (outVData, 0 , actualUVStride * uvHeight,
424- reinterpret_cast <jbyte *>(outV.data ()));
425-
426- env->ReleaseByteArrayElements (yData, yPtr, JNI_ABORT);
427- env->ReleaseByteArrayElements (uData, uPtr, JNI_ABORT);
428- env->ReleaseByteArrayElements (vData, vPtr, JNI_ABORT);
429-
430- // Create result array of 3 byte arrays [Y, U, V]
431- jclass byteArrayClass = env->FindClass (" [B" );
432- jobjectArray result = env->NewObjectArray (3 , byteArrayClass, nullptr );
433- env->SetObjectArrayElement (result, 0 , outYData);
434- env->SetObjectArrayElement (result, 1 , outUData);
435- env->SetObjectArrayElement (result, 2 , outVData);
436-
437- return result;
438- }
439-
44096/* *
44197 * Run segmentation on RGBA pixels, returns grayscale mask (0-255 bytes).
44298 * Used by GL-based blur pipeline.
0 commit comments