@@ -120,8 +120,10 @@ namespace cuda {
120120 this ->frame = frame;
121121
122122 auto hwframe_ctx = (AVHWFramesContext *) hw_frames_ctx->data ;
123- if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12 ) {
124- BOOST_LOG (error) << " cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12" sv;
123+
124+ if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12 &&
125+ hwframe_ctx->sw_format != AV_PIX_FMT_YUV444P ) {
126+ BOOST_LOG (error) << " cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P" sv;
125127 return -1 ;
126128 }
127129
@@ -132,6 +134,8 @@ namespace cuda {
132134 }
133135 }
134136
137+ is_yuv444 = (hwframe_ctx->sw_format == AV_PIX_FMT_YUV444P );
138+
135139 auto cuda_ctx = (AVCUDADeviceContext *) hwframe_ctx->device_ctx ->hwctx ;
136140
137141 stream = make_stream ();
@@ -178,7 +182,11 @@ namespace cuda {
178182 return ;
179183 }
180184
181- sws.convert (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex->texture .linear , stream.get (), {frame->width , frame->height , 0 , 0 });
185+ if (is_yuv444) {
186+ sws.convert_yuv444 (frame->data [0 ], frame->data [1 ], frame->data [2 ], frame->linesize [0 ], tex->texture .linear , stream.get (), {frame->width , frame->height , 0 , 0 });
187+ } else {
188+ sws.convert_nv12 (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex->texture .linear , stream.get (), {frame->width , frame->height , 0 , 0 });
189+ }
182190 }
183191
184192 cudaTextureObject_t tex_obj (const tex_t &tex) const {
@@ -194,13 +202,18 @@ namespace cuda {
194202 // When height and width don't change, it's not necessary to use linear interpolation
195203 bool linear_interpolation;
196204
205+ bool is_yuv444;
206+
197207 sws_t sws;
198208 };
199209
200210 class cuda_ram_t : public cuda_t {
201211 public:
202212 int convert (platf::img_t &img) override {
203- return sws.load_ram (img, tex.array ) || sws.convert (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex_obj (tex), stream.get ());
213+ if (is_yuv444) {
214+ return sws.load_ram (img, tex.array ) || sws.convert_yuv444 (frame->data [0 ], frame->data [1 ], frame->data [2 ], frame->linesize [0 ], tex_obj (tex), stream.get ());
215+ }
216+ return sws.load_ram (img, tex.array ) || sws.convert_nv12 (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex_obj (tex), stream.get ());
204217 }
205218
206219 int set_frame (AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
@@ -224,7 +237,10 @@ namespace cuda {
224237 class cuda_vram_t : public cuda_t {
225238 public:
226239 int convert (platf::img_t &img) override {
227- return sws.convert (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex_obj (((img_t *) &img)->tex ), stream.get ());
240+ if (is_yuv444) {
241+ return sws.convert_yuv444 (frame->data [0 ], frame->data [1 ], frame->data [2 ], frame->linesize [0 ], tex_obj (((img_t *) &img)->tex ), stream.get ());
242+ }
243+ return sws.convert_nv12 (frame->data [0 ], frame->data [1 ], frame->linesize [0 ], frame->linesize [1 ], tex_obj (((img_t *) &img)->tex ), stream.get ());
228244 }
229245 };
230246
@@ -274,6 +290,13 @@ namespace cuda {
274290 return -1 ;
275291 }
276292
293+ struct cu_resources {
294+ registered_resource_t y_res;
295+ registered_resource_t u_res;
296+ registered_resource_t v_res;
297+ registered_resource_t uv_res;
298+ };
299+
277300 class gl_cuda_vram_t : public platf ::avcodec_encode_device_t {
278301 public:
279302 /* *
@@ -335,28 +358,44 @@ namespace cuda {
335358 this ->hwframe .reset (frame);
336359 this ->frame = frame;
337360
361+ auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data ;
362+
363+ if (hw_frames_ctx->sw_format != AV_PIX_FMT_NV12 &&
364+ hw_frames_ctx->sw_format != AV_PIX_FMT_YUV444P ) {
365+ BOOST_LOG (error) << " cuda::gl_cuda_vram_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P" sv;
366+ return -1 ;
367+ }
368+
338369 if (!frame->buf [0 ]) {
339370 if (av_hwframe_get_buffer (hw_frames_ctx_buf, frame, 0 )) {
340- BOOST_LOG (error) << " Couldn't get hwframe for VAAPI " sv;
371+ BOOST_LOG (error) << " Couldn't get hwframe for NVENC_GL " sv;
341372 return -1 ;
342373 }
343374 }
344375
345- auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data ;
346376 sw_format = hw_frames_ctx->sw_format ;
377+ is_yuv444 = (sw_format == AV_PIX_FMT_YUV444P );
347378
348- auto nv12_opt = egl::create_target (frame->width , frame->height , sw_format);
349- if (!nv12_opt) {
350- return -1 ;
351- }
352-
353- auto sws_opt = egl::sws_t::make (width, height, frame->width , frame->height , sw_format);
379+ auto sws_opt = egl::sws_t::make (width, height, frame->width , frame->height , sw_format, is_yuv444);
354380 if (!sws_opt) {
355381 return -1 ;
356382 }
357383
358384 this ->sws = std::move (*sws_opt);
359- this ->nv12 = std::move (*nv12_opt);
385+
386+ if (is_yuv444) {
387+ auto yuv444_opt = egl::create_yuv444_target (frame->width , frame->height , sw_format);
388+ if (!yuv444_opt) {
389+ return -1 ;
390+ }
391+ this ->yuv444 = std::move (*yuv444_opt);
392+ } else {
393+ auto nv12_opt = egl::create_nv12_target (frame->width , frame->height , sw_format);
394+ if (!nv12_opt) {
395+ return -1 ;
396+ }
397+ this ->nv12 = std::move (*nv12_opt);
398+ }
360399
361400 auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx ->hwctx ;
362401
@@ -367,9 +406,14 @@ namespace cuda {
367406
368407 cuda_ctx->stream = stream.get ();
369408
370- CU_CHECK (cdf->cuGraphicsGLRegisterImage (&y_res, nv12->tex [0 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register Y plane texture" );
371- CU_CHECK (cdf->cuGraphicsGLRegisterImage (&uv_res, nv12->tex [1 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register UV plane texture" );
372-
409+ if (is_yuv444) {
410+ CU_CHECK (cdf->cuGraphicsGLRegisterImage (&cu_res.y_res , yuv444->tex [0 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register Y texture" );
411+ CU_CHECK (cdf->cuGraphicsGLRegisterImage (&cu_res.u_res , yuv444->tex [1 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register U texture" );
412+ CU_CHECK (cdf->cuGraphicsGLRegisterImage (&cu_res.v_res , yuv444->tex [2 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register V texture" );
413+ } else {
414+ CU_CHECK (cdf->cuGraphicsGLRegisterImage (&cu_res.y_res , nv12->tex [0 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register Y plane texture" );
415+ CU_CHECK (cdf->cuGraphicsGLRegisterImage (&cu_res.uv_res , nv12->tex [1 ], GL_TEXTURE_2D , CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY ), " Couldn't register UV plane texture" );
416+ }
373417 return 0 ;
374418 }
375419
@@ -398,33 +442,61 @@ namespace cuda {
398442 rgb = std::move (*rgb_opt);
399443 }
400444
401- // Perform the color conversion and scaling in GL
402- sws.load_vram (descriptor, offset_x, offset_y, rgb->tex [0 ]);
403- sws.convert (nv12->buf );
404-
405445 auto fmt_desc = av_pix_fmt_desc_get (sw_format);
406446
407- // Map the GL textures to read for CUDA
408- CUgraphicsResource resources[2 ] = {y_res.get (), uv_res.get ()};
409- CU_CHECK (cdf->cuGraphicsMapResources (2 , resources, stream.get ()), " Couldn't map GL textures in CUDA" );
447+ sws.load_vram (descriptor, offset_x, offset_y, rgb->tex [0 ], is_yuv444);
448+
449+ if (is_yuv444) {
450+ // Perform the color conversion and scaling in GL
451+ sws.convert_yuv444 (yuv444->buf );
410452
411- // Copy from the GL textures to the target CUDA frame
412- for (int i = 0 ; i < 2 ; i++) {
413- CUDA_MEMCPY2D cpy = {};
414- cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY ;
415- CU_CHECK (cdf->cuGraphicsSubResourceGetMappedArray (&cpy.srcArray , resources[i], 0 , 0 ), " Couldn't get mapped plane array" );
453+ // Map the GL textures to read for CUDA
454+ std::array<CUgraphicsResource, 3 > resources = {{cu_res.y_res .get (), cu_res.u_res .get (), cu_res.v_res .get ()}};
455+ CU_CHECK (cdf->cuGraphicsMapResources (resources.size (), resources.data (), stream.get ()), " Couldn't map GL textures in CUDA" );
416456
417- cpy. dstMemoryType = CU_MEMORYTYPE_DEVICE ;
418- cpy. dstDevice = (CUdeviceptr) frame-> data [i];
419- cpy. dstPitch = frame-> linesize [i] ;
420- cpy.WidthInBytes = (frame-> width * fmt_desc-> comp [i]. step ) >> (i ? fmt_desc-> log2_chroma_w : 0 ) ;
421- cpy. Height = frame-> height >> (i ? fmt_desc-> log2_chroma_h : 0 );
457+ // Copy from the GL textures to the target CUDA frame
458+ for ( int i = 0 ; i < 3 ; i++) {
459+ CUDA_MEMCPY2D cpy = {} ;
460+ cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY ;
461+ CU_CHECK (cdf-> cuGraphicsSubResourceGetMappedArray (&cpy. srcArray , resources[i], 0 , 0 ), " Couldn't get mapped plane array " );
422462
423- CU_CHECK_IGNORE (cdf->cuMemcpy2DAsync (&cpy, stream.get ()), " Couldn't copy texture to CUDA frame" );
463+ cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE ;
464+ cpy.dstDevice = (CUdeviceptr) frame->data [i];
465+ cpy.dstPitch = frame->linesize [i];
466+ cpy.WidthInBytes = (frame->width * fmt_desc->comp [i].step );
467+ cpy.Height = frame->height ;
468+
469+ CU_CHECK_IGNORE (cdf->cuMemcpy2DAsync (&cpy, stream.get ()), " Couldn't copy texture to CUDA frame" );
470+ }
471+ // Unmap the textures to allow modification from GL again
472+ CU_CHECK (cdf->cuGraphicsUnmapResources (resources.size (), resources.data (), stream.get ()), " Couldn't unmap GL textures from CUDA" );
473+
474+ } else {
475+ // Perform the color conversion and scaling in GL
476+ sws.convert_nv12 (nv12->buf );
477+
478+ // Map the GL textures to read for CUDA
479+ std::array<CUgraphicsResource, 2 > resources = {{cu_res.y_res .get (), cu_res.uv_res .get ()}};
480+ CU_CHECK (cdf->cuGraphicsMapResources (resources.size (), resources.data (), stream.get ()), " Couldn't map GL textures in CUDA" );
481+
482+ // Copy from the GL textures to the target CUDA frame
483+ for (int i = 0 ; i < 2 ; i++) {
484+ CUDA_MEMCPY2D cpy = {};
485+ cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY ;
486+ CU_CHECK (cdf->cuGraphicsSubResourceGetMappedArray (&cpy.srcArray , resources[i], 0 , 0 ), " Couldn't get mapped plane array" );
487+
488+ cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE ;
489+ cpy.dstDevice = (CUdeviceptr) frame->data [i];
490+ cpy.dstPitch = frame->linesize [i];
491+ cpy.WidthInBytes = (frame->width * fmt_desc->comp [i].step ) >> (i ? fmt_desc->log2_chroma_w : 0 );
492+ cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0 );
493+
494+ CU_CHECK_IGNORE (cdf->cuMemcpy2DAsync (&cpy, stream.get ()), " Couldn't copy texture to CUDA frame" );
495+ }
496+ // Unmap the textures to allow modification from GL again
497+ CU_CHECK (cdf->cuGraphicsUnmapResources (resources.size (), resources.data (), stream.get ()), " Couldn't unmap GL textures from CUDA" );
424498 }
425499
426- // Unmap the textures to allow modification from GL again
427- CU_CHECK (cdf->cuGraphicsUnmapResources (2 , resources, stream.get ()), " Couldn't unmap GL textures from CUDA" );
428500 return 0 ;
429501 }
430502
@@ -446,6 +518,7 @@ namespace cuda {
446518
447519 egl::sws_t sws;
448520 egl::nv12_t nv12;
521+ egl::yuv444_t yuv444;
449522 AVPixelFormat sw_format;
450523
451524 int height;
@@ -454,11 +527,12 @@ namespace cuda {
454527 std::uint64_t sequence;
455528 egl::rgb_t rgb;
456529
457- registered_resource_t y_res;
458- registered_resource_t uv_res;
530+ cu_resources cu_res;
459531
460532 int offset_x;
461533 int offset_y;
534+
535+ bool is_yuv444;
462536 };
463537
464538 std::unique_ptr<platf::avcodec_encode_device_t > make_avcodec_encode_device (int width, int height, bool vram) {
0 commit comments