From ae97ae1baa0c62a232ee20982230f0d74e7d3972 Mon Sep 17 00:00:00 2001 From: Robert Mader Date: Fri, 9 May 2025 15:35:35 +0200 Subject: [PATCH 1/2] drm: drm_fourcc: add 10/12/16bit software decoder YCbCr formats This adds FOURCCs for 3-plane 10/12/16bit YCbCr formats used by software decoders like ffmpeg, dav1d and libvpx. The intended use-case is buffer sharing between decoders and GPUs by allocating buffers with e.g. udmabuf or dma-heaps, avoiding unnecessary copies and format conversions in various scenarios. Unlike formats typically used by hardware decoders the 10/12bit formats use a LSB alignment. In order to allow fast implementations in GL and Vulkan the padding must contain only zeros, so the float representation can be calculated by multiplying with 2^6=64 or 2^4=16 respectively. MRs or branches for Mesa, Vulkan, Gstreamer, Weston and Mutter can be found at: - https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34303 - https://github.com/rmader/Vulkan-Docs/commits/ycbcr-16bit-lsb-formats/ - https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8540 - https://gitlab.freedesktop.org/wayland/weston/-/merge_requests/1753 - https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/4348 The naming scheme follows the 'P' and 'Q' formats. The 'S' stands for 'software' and was selected in order to make remembering easy. The 'Sx16' formats could as well be 'Qx16'. We stick with 'S' as 16bit software decoders are likely much more common than hardware ones for the foreseeable future. Note that these formats already have Vulkan equivalents: - VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM - VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM - VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM Signed-off-by: Robert Mader Reviewed-by: Daniel Stone Link: https://lore.kernel.org/r/20250509133535.60330-1-robert.mader@collabora.com Signed-off-by: Daniel Stone --- drivers/gpu/drm/drm_fourcc.c | 27 ++++++++++++++++++++++++++ include/uapi/drm/drm_fourcc.h | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 3a94ca211f9ce9..55ddd99fd7f6a8 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -346,6 +346,33 @@ const struct drm_format_info *__drm_format_info(u32 format) { .format = DRM_FORMAT_P030, .depth = 0, .num_planes = 2, .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, .vsub = 2, .is_yuv = true}, + { .format = DRM_FORMAT_S010, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true}, + { .format = DRM_FORMAT_S210, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true}, + { .format = DRM_FORMAT_S410, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 1, .vsub = 1, .is_yuv = true}, + { .format = DRM_FORMAT_S012, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true}, + { .format = DRM_FORMAT_S212, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true}, + { .format = DRM_FORMAT_S412, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 1, .vsub = 1, .is_yuv = true}, + { .format = DRM_FORMAT_S016, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 2, .is_yuv = true}, + { .format = DRM_FORMAT_S216, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 2, .vsub = 1, .is_yuv = true}, + { .format = DRM_FORMAT_S416, .depth = 0, .num_planes = 3, + .char_per_block = { 2, 2, 2 }, .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, + .hsub = 1, .vsub = 1, .is_yuv = true}, }; unsigned int i; diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 78abd819fd62e5..dffae521914842 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -377,6 +377,42 @@ extern "C" { */ #define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^6=64. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [6:10] little endian + * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian + * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian + */ +#define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ +#define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ +#define DRM_FORMAT_S410 fourcc_code('S', '4', '1', '0') /* non-subsampled Cb (1) and Cr (2) planes 10 bits per channel */ + +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^4=16. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [4:12] little endian + * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian + * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian + */ +#define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ +#define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ +#define DRM_FORMAT_S412 fourcc_code('S', '4', '1', '2') /* non-subsampled Cb (1) and Cr (2) planes 12 bits per channel */ + +/* + * 3 plane YCbCr + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr plane, [15:0] Cr little endian + * index 2 = Cb plane, [15:0] Cb little endian + */ +#define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ +#define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ +#define DRM_FORMAT_S416 fourcc_code('S', '4', '1', '6') /* non-subsampled Cb (1) and Cr (2) planes 16 bits per channel */ + /* * 3 plane YCbCr * index 0: Y plane, [7:0] Y From fac588936be6ced9550be7633a09463a086f4c9e Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Tue, 15 Jul 2025 11:57:57 +0100 Subject: [PATCH 2/2] drm/vc4: plane: Add support for P01[026] and Q01[026] formats There are now formats defined for 2-plane YUV420 at 10, 12, and 16 bit depth using the most significant bits of the 16bit word (P010, P012, and P016), and 3-plane YUV420 at those depths using the least significant bits of the 16 bit word (S010, S012, and S016). VC4_GEN_6 can support all those formats although only composing using at most 10bits of resolution, so add them as supported formats for all planes. Signed-off-by: Dave Stevenson --- drivers/gpu/drm/vc4/vc4_plane.c | 54 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/vc4/vc4_regs.h | 9 ++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 675082a97bceb9..4eb9a63255fb12 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -36,6 +36,7 @@ static const struct hvs_format { u32 pixel_order; u32 pixel_order_hvs5; bool hvs5_only; + bool hvs6_only; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, @@ -247,6 +248,42 @@ static const struct hvs_format { .pixel_order = HVS_PIXEL_ORDER_BGRA, .pixel_order_hvs5 = HVS_PIXEL_ORDER_RGBA, }, + { + .drm = DRM_FORMAT_P010, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_2PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, + { + .drm = DRM_FORMAT_P012, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_2PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, + { + .drm = DRM_FORMAT_P016, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_2PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, + { + .drm = DRM_FORMAT_S010, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_9_0_3PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, + { + .drm = DRM_FORMAT_S012, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_11_2_3PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, + { + .drm = DRM_FORMAT_S016, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_3PLANE, + .pixel_order_hvs5 = HVS_PIXEL_ORDER_XYCBCR, + .hvs6_only = true, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) @@ -2635,6 +2672,12 @@ static bool vc4_format_mod_supported(struct drm_plane *plane, case DRM_FORMAT_YVU420: case DRM_FORMAT_NV16: case DRM_FORMAT_NV61: + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + case DRM_FORMAT_S010: + case DRM_FORMAT_S012: + case DRM_FORMAT_S016: default: return (modifier == DRM_FORMAT_MOD_LINEAR); } @@ -2669,10 +2712,13 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, }; for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { - if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { - formats[num_formats] = hvs_formats[i].drm; - num_formats++; - } + if (hvs_formats[i].hvs5_only && vc4->gen < VC4_GEN_5) + continue; + if (hvs_formats[i].hvs6_only && vc4->gen < VC4_GEN_6_C) + continue; + + formats[num_formats] = hvs_formats[i].drm; + num_formats++; } vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 27158be19952c8..68d83d27c32ad9 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -1079,6 +1079,15 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_AYUV444_RGB = 15, HVS_PIXEL_FORMAT_RGBA1010102 = 16, HVS_PIXEL_FORMAT_YCBCR_10BIT = 17, + /* 10 bit YUV420 formats with data with various different alignments */ + HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_2PLANE = 24, + HVS_PIXEL_FORMAT_YCBCR_YUV420_15_6_3PLANE = 25, + HVS_PIXEL_FORMAT_YCBCR_YUV420_13_4_2PLANE = 26, + HVS_PIXEL_FORMAT_YCBCR_YUV420_13_4_3PLANE = 27, + HVS_PIXEL_FORMAT_YCBCR_YUV420_11_2_2PLANE = 28, + HVS_PIXEL_FORMAT_YCBCR_YUV420_11_2_3PLANE = 29, + HVS_PIXEL_FORMAT_YCBCR_YUV420_9_0_2PLANE = 30, + HVS_PIXEL_FORMAT_YCBCR_YUV420_9_0_3PLANE = 31, }; /* Note: the LSB is the rightmost character shown. Only valid for