Skip to content

Commit 2b2ebb3

Browse files
committed
WIP Convert nvdec CUDA to OpenGL texture
I do not yet have the CUDA SDK
1 parent 6e3272a commit 2b2ebb3

1 file changed

Lines changed: 202 additions & 3 deletions

File tree

src/modules/avformat/producer_avformat.c

Lines changed: 202 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@
8080
#include <libavutil/hwcontext_d3d11va.h>
8181
#endif
8282

83+
// CUDA headers (optional - only if CUDA SDK is available)
84+
#if defined(__has_include)
85+
#if __has_include(<cuda.h>) && __has_include(<cudaGL.h>)
86+
#define HAVE_CUDA 1
87+
#include <cuda.h>
88+
#include <cudaGL.h>
89+
#include <libavutil/hwcontext_cuda.h>
90+
#endif
91+
#endif
92+
8393
// System header files
8494
#include <limits.h>
8595
#include <math.h>
@@ -925,6 +935,10 @@ static mlt_image_format pick_image_format(enum AVPixelFormat pix_fmt,
925935
return mlt_image_opengl_texture;
926936
case AV_PIX_FMT_D3D11:
927937
return mlt_image_opengl_texture;
938+
#ifdef HAVE_CUDA
939+
case AV_PIX_FMT_CUDA:
940+
return mlt_image_opengl_texture;
941+
#endif
928942
default:
929943
current_format = mlt_image_yuv422;
930944
}
@@ -3020,7 +3034,163 @@ static int convert_d3d11_to_opengl_texture(producer_avformat self,
30203034

30213035
return final_texture > 0 ? 0 : -1;
30223036
}
3023-
#endif
3037+
3038+
#ifdef HAVE_CUDA
3039+
// Destructor for OpenGL textures created from CUDA
3040+
static void cuda_texture_destructor(void *data)
3041+
{
3042+
if (data) {
3043+
GLuint *texture_id = (GLuint *) data;
3044+
if (*texture_id) {
3045+
// Unregister CUDA resource if needed
3046+
cudaGraphicsResource_t *cuda_resource = NULL;
3047+
// Try to get the CUDA resource from texture (stored separately if needed)
3048+
// For now, just delete the GL texture
3049+
glDeleteTextures(1, texture_id);
3050+
}
3051+
mlt_pool_release(data);
3052+
}
3053+
}
3054+
3055+
static int convert_cuda_to_opengl_texture(producer_avformat self,
3056+
AVFrame *cuda_frame,
3057+
uint8_t **buffer,
3058+
int width,
3059+
int height,
3060+
mlt_frame frame)
3061+
{
3062+
if (!cuda_frame || !cuda_frame->data[0]) {
3063+
return -1;
3064+
}
3065+
3066+
// Get CUDA device pointer from frame data
3067+
CUdeviceptr cuda_ptr = (CUdeviceptr) cuda_frame->data[0];
3068+
3069+
// Determine pixel format - CUDA frames are typically NV12 or P010
3070+
// For now, assume NV12 (8-bit YUV 4:2:0)
3071+
3072+
GLuint texture_id = 0;
3073+
cudaGraphicsResource_t cuda_resource = NULL;
3074+
CUresult cu_result;
3075+
3076+
// Initialize CUDA if not already done
3077+
static int cuda_initialized = 0;
3078+
if (!cuda_initialized) {
3079+
CUcontext cuda_context;
3080+
cu_result = cuInit(0);
3081+
if (cu_result != CUDA_SUCCESS) {
3082+
mlt_log_error(MLT_PRODUCER_SERVICE(self->parent), "Failed to initialize CUDA\n");
3083+
return -1;
3084+
}
3085+
cuda_initialized = 1;
3086+
}
3087+
3088+
// Create OpenGL texture
3089+
glGenTextures(1, &texture_id);
3090+
glBindTexture(GL_TEXTURE_2D, texture_id);
3091+
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
3092+
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3093+
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
3094+
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
3095+
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
3096+
3097+
// Register OpenGL texture with CUDA
3098+
cudaError_t cuda_err = cudaGraphicsGLRegisterImage(&cuda_resource,
3099+
texture_id,
3100+
GL_TEXTURE_2D,
3101+
cudaGraphicsRegisterFlagsWriteDiscard);
3102+
3103+
if (cuda_err != cudaSuccess) {
3104+
mlt_log_error(MLT_PRODUCER_SERVICE(self->parent),
3105+
"Failed to register GL texture with CUDA: %s\n",
3106+
cudaGetErrorString(cuda_err));
3107+
glDeleteTextures(1, &texture_id);
3108+
return -1;
3109+
}
3110+
3111+
// Map the resource for CUDA access
3112+
cuda_err = cudaGraphicsMapResources(1, &cuda_resource, 0);
3113+
if (cuda_err != cudaSuccess) {
3114+
mlt_log_error(MLT_PRODUCER_SERVICE(self->parent),
3115+
"Failed to map CUDA resource: %s\n",
3116+
cudaGetErrorString(cuda_err));
3117+
cudaGraphicsUnregisterResource(cuda_resource);
3118+
glDeleteTextures(1, &texture_id);
3119+
return -1;
3120+
}
3121+
3122+
// Get mapped array
3123+
cudaArray_t cuda_array;
3124+
cuda_err = cudaGraphicsSubResourceGetMappedArray(&cuda_array, cuda_resource, 0, 0);
3125+
if (cuda_err != cudaSuccess) {
3126+
mlt_log_error(MLT_PRODUCER_SERVICE(self->parent),
3127+
"Failed to get mapped CUDA array: %s\n",
3128+
cudaGetErrorString(cuda_err));
3129+
cudaGraphicsUnmapResources(1, &cuda_resource, 0);
3130+
cudaGraphicsUnregisterResource(cuda_resource);
3131+
glDeleteTextures(1, &texture_id);
3132+
return -1;
3133+
}
3134+
3135+
// Copy from CUDA device memory to the mapped texture
3136+
// For NV12 input, we need to convert to RGBA
3137+
// This would require a CUDA kernel for YUV to RGB conversion
3138+
// For now, use a simple memcpy (assumes data is already RGB/RGBA)
3139+
3140+
size_t data_size = width * height * 4; // RGBA
3141+
cuda_err = cudaMemcpy2DToArray(cuda_array,
3142+
0,
3143+
0,
3144+
(void *) cuda_ptr,
3145+
width * 4,
3146+
width * 4,
3147+
height,
3148+
cudaMemcpyDeviceToDevice);
3149+
3150+
if (cuda_err != cudaSuccess) {
3151+
mlt_log_error(MLT_PRODUCER_SERVICE(self->parent),
3152+
"Failed to copy CUDA data to texture: %s\n",
3153+
cudaGetErrorString(cuda_err));
3154+
}
3155+
3156+
// Unmap the resource
3157+
cudaGraphicsUnmapResources(1, &cuda_resource, 0);
3158+
cudaGraphicsUnregisterResource(cuda_resource);
3159+
3160+
if (cuda_err != cudaSuccess) {
3161+
glDeleteTextures(1, &texture_id);
3162+
return -1;
3163+
}
3164+
3165+
mlt_log_debug(MLT_PRODUCER_SERVICE(self->parent),
3166+
"Successfully converted CUDA to OpenGL texture: %u\n",
3167+
texture_id);
3168+
3169+
// Store the texture ID in heap memory managed by the frame
3170+
if (texture_id) {
3171+
GLuint *texture_ptr = mlt_pool_alloc(sizeof(GLuint));
3172+
if (texture_ptr) {
3173+
*texture_ptr = texture_id;
3174+
3175+
// Register the texture with the frame so it gets cleaned up when frame is destroyed
3176+
mlt_properties_set_data(MLT_FRAME_PROPERTIES(frame),
3177+
"cuda.opengl.texture",
3178+
texture_ptr,
3179+
0,
3180+
cuda_texture_destructor,
3181+
NULL);
3182+
3183+
// Point the image buffer to our texture ID storage
3184+
if (*buffer) {
3185+
*((GLuint **) buffer) = texture_ptr;
3186+
}
3187+
}
3188+
}
3189+
3190+
return texture_id > 0 ? 0 : -1;
3191+
}
3192+
#endif // HAVE_CUDA
3193+
#endif // _WIN32
30243194

30253195
// returns resulting YUV colorspace
30263196
static int convert_image(producer_avformat self,
@@ -3157,7 +3327,20 @@ static int convert_image(producer_avformat self,
31573327
} else {
31583328
colorspace = mlt_colorspace_invalid;
31593329
}
3160-
#endif
3330+
#ifdef HAVE_CUDA
3331+
} else if (*format == mlt_image_opengl_texture && pix_fmt == AV_PIX_FMT_CUDA) {
3332+
// Handle CUDA to OpenGL texture conversion
3333+
// frame->data[i] contain CUdeviceptr pointers
3334+
if (convert_cuda_to_opengl_texture(self, frame, buffer, width, height, mlt_frame_obj)
3335+
== 0) {
3336+
// Successfully created OpenGL texture
3337+
mlt_log_info(MLT_PRODUCER_SERVICE(self->parent),
3338+
"Successfully converted CUDA to OpenGL texture\n");
3339+
} else {
3340+
colorspace = mlt_colorspace_invalid;
3341+
}
3342+
#endif // HAVE_CUDA
3343+
#endif // _WIN32
31613344
} else if (dst_pix_fmt != AV_PIX_FMT_NONE) {
31623345
colorspace = convert_image_yuvp(self,
31633346
profile,
@@ -3731,7 +3914,23 @@ static int producer_get_image(mlt_frame frame,
37313914
decode_errors = 0;
37323915
}
37333916
}
3734-
#endif
3917+
#ifdef HAVE_CUDA
3918+
// Check if we want OpenGL texture format for CUDA
3919+
if (*format == mlt_image_opengl_texture
3920+
&& self->hwaccel.pix_fmt == AV_PIX_FMT_CUDA) {
3921+
// Keep the hardware frame for direct OpenGL texture conversion
3922+
CUdeviceptr cuda_ptr = (CUdeviceptr) self->video_frame->data[0];
3923+
if (cuda_ptr) {
3924+
mlt_log_verbose(
3925+
MLT_PRODUCER_SERVICE(producer),
3926+
"Keeping CUDA frame for OpenGL texture conversion\n");
3927+
// The CUDA data will be converted to OpenGL texture in convert_image
3928+
got_picture = 1;
3929+
decode_errors = 0;
3930+
}
3931+
}
3932+
#endif // HAVE_CUDA
3933+
#endif // _WIN32
37353934
if (!got_picture) {
37363935
if ((error = hwaccel_download(self->video_frame))) {
37373936
mlt_log_error(MLT_PRODUCER_SERVICE(producer),

0 commit comments

Comments
 (0)