From 293009424f4a2500c3335a5486da397190b8f7f5 Mon Sep 17 00:00:00 2001 From: Srikanth Kiran Kotagiri Date: Fri, 25 Jul 2025 16:10:01 -0700 Subject: [PATCH 1/2] Adding support for tiles images --- .gitignore | 1 + fftools/ffmpeg_filter.c | 65 +++ libavcodec/hevc/hevcdec.c | 10 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_heif_tile_compositor_auto.c | 631 +++++++++++++++++++++ libavformat/mov.c | 77 +++ libavutil/Makefile | 2 + libavutil/heif_color.c | 159 ++++++ libavutil/heif_color.h | 65 +++ 10 files changed, 1012 insertions(+) create mode 100644 libavfilter/vf_heif_tile_compositor_auto.c create mode 100644 libavutil/heif_color.c create mode 100644 libavutil/heif_color.h diff --git a/.gitignore b/.gitignore index 4aa49c52c7299..cdd9c67a098ad 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ /libavfilter/vulkan/*.c /.*/ !/.forgejo/ +reference.heic diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c index d6f9c610d60f5..8dc4f237b8ccd 100644 --- a/fftools/ffmpeg_filter.c +++ b/fftools/ffmpeg_filter.c @@ -68,6 +68,9 @@ typedef struct FilterGraphPriv { Scheduler *sch; unsigned sch_idx; + + // Metadata for stream group information (e.g., HEIF tiles) + AVDictionary *metadata; } FilterGraphPriv; static FilterGraphPriv *fgp_from_fg(FilterGraph *fg) @@ -1039,6 +1042,7 @@ void fg_free(FilterGraph **pfg) av_frame_free(&fgp->frame); av_frame_free(&fgp->frame_enc); + av_dict_free(&fgp->metadata); av_freep(pfg); } @@ -1333,6 +1337,63 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter) } } + // Extract HEIF metadata for auto-compositor filter when using stream group syntax + if ((ss.stream_list == STREAM_LIST_GROUP_IDX || ss.stream_list == STREAM_LIST_GROUP_ID) && + ifilter->graph && strstr(ifilter->graph->graph_desc, "heif_auto_compositor")) { + + AVStreamGroup *g = NULL; + if (ss.stream_list == STREAM_LIST_GROUP_IDX && + ss.list_id >= 0 && ss.list_id < s->nb_stream_groups) { + g = s->stream_groups[ss.list_id]; + } else if (ss.stream_list == STREAM_LIST_GROUP_ID) { + for (unsigned i = 0; i < s->nb_stream_groups; i++) { + if (ss.list_id == s->stream_groups[i]->id) { + g = s->stream_groups[i]; + break; + } + } + } + + if (g && g->type == AV_STREAM_GROUP_PARAMS_TILE_GRID) { + AVStreamGroupTileGrid *tile = g->params.tile_grid; + + av_log(fg, AV_LOG_INFO, "HEIF stream group %d found: %d tiles, canvas %dx%d, presentation %dx%d\n", + ss.list_id, tile->nb_tiles, tile->coded_width, tile->coded_height, tile->width, tile->height); + + // Store metadata with stream group specific keys for access by the filter + FilterGraphPriv *fgp = fgp_from_fg(ifilter->graph); + char key_prefix[32]; + snprintf(key_prefix, sizeof(key_prefix), "heif_g%d", (int)ss.list_id); + + av_dict_set(&fgp->metadata, av_asprintf("%s_canvas_w", key_prefix), av_asprintf("%d", tile->coded_width), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_canvas_h", key_prefix), av_asprintf("%d", tile->coded_height), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_presentation_w", key_prefix), av_asprintf("%d", tile->width), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_presentation_h", key_prefix), av_asprintf("%d", tile->height), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_horizontal_offset", key_prefix), av_asprintf("%d", tile->horizontal_offset), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_vertical_offset", key_prefix), av_asprintf("%d", tile->vertical_offset), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + av_dict_set(&fgp->metadata, av_asprintf("%s_total_tiles", key_prefix), av_asprintf("%d", tile->nb_tiles), AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); + + // Store the stream group index for the filter to identify which group it's using + av_dict_set(&fgp->metadata, "heif_active_group", av_asprintf("%d", (int)ss.list_id), AV_DICT_DONT_STRDUP_VAL); + + // Store tile positioning information using the offsets array + for (unsigned t = 0; t < tile->nb_tiles && t < g->nb_streams; t++) { + char key[64]; + + snprintf(key, sizeof(key), "%s_tile_%u_x", key_prefix, t); + av_dict_set(&fgp->metadata, key, av_asprintf("%d", tile->offsets[t].horizontal), AV_DICT_DONT_STRDUP_VAL); + + snprintf(key, sizeof(key), "%s_tile_%u_y", key_prefix, t); + av_dict_set(&fgp->metadata, key, av_asprintf("%d", tile->offsets[t].vertical), AV_DICT_DONT_STRDUP_VAL); + + snprintf(key, sizeof(key), "%s_tile_%u_index", key_prefix, t); + av_dict_set(&fgp->metadata, key, av_asprintf("%u", t), AV_DICT_DONT_STRDUP_VAL); + } + + av_log(fg, AV_LOG_INFO, "Stored HEIF stream group %d metadata in filter graph for auto-compositor\n", (int)ss.list_id); + } + } + for (i = 0; i < s->nb_streams; i++) { enum AVMediaType stream_type = s->streams[i]->codecpar->codec_type; if (stream_type != type && @@ -1915,6 +1976,10 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt) fgt->graph = avfilter_graph_alloc(); if (!fgt->graph) return AVERROR(ENOMEM); + + // Pass HEIF metadata to the filter graph opaque field for filter access + if (fgp->metadata) + fgt->graph->opaque = fgp->metadata; if (simple) { OutputFilterPriv *ofp = ofp_from_ofilter(fg->outputs[0]); diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c index 21ecf063c5af6..b55191c5cfb2c 100644 --- a/libavcodec/hevc/hevcdec.c +++ b/libavcodec/hevc/hevcdec.c @@ -3879,6 +3879,16 @@ static int hevc_receive_frame(AVCodecContext *avctx, AVFrame *frame) if (!(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) av_frame_remove_side_data(frame, AV_FRAME_DATA_FILM_GRAIN_PARAMS); + // Propagate packet metadata to frame metadata + av_log(avctx, AV_LOG_DEBUG, "HEVC decoder: Attempting to propagate packet metadata (packet has %d side data entries)\n", avpkt->side_data_elems); + ret = ff_decode_frame_props_from_pkt(avctx, frame, avpkt); + if (ret < 0) { + av_log(avctx, AV_LOG_WARNING, "Failed to propagate packet metadata to frame\n"); + } else { + av_log(avctx, AV_LOG_DEBUG, "HEVC decoder: Successfully propagated metadata, frame now has %d metadata entries\n", + frame->metadata ? av_dict_count(frame->metadata) : 0); + } + return 0; } diff --git a/libavfilter/Makefile b/libavfilter/Makefile index e19f67a3a7911..64dd7c1e4909b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -337,6 +337,7 @@ OBJS-$(CONFIG_GRAYWORLD_FILTER) += vf_grayworld.o OBJS-$(CONFIG_GREYEDGE_FILTER) += vf_colorconstancy.o OBJS-$(CONFIG_GUIDED_FILTER) += vf_guided.o framesync.o OBJS-$(CONFIG_HALDCLUT_FILTER) += vf_lut3d.o framesync.o +OBJS-$(CONFIG_HEIF_AUTO_COMPOSITOR_FILTER) += vf_heif_tile_compositor_auto.o framesync.o OBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o OBJS-$(CONFIG_HFLIP_VULKAN_FILTER) += vf_flip_vulkan.o vulkan.o OBJS-$(CONFIG_HISTEQ_FILTER) += vf_histeq.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index f3c2092b15234..43325ed4fe95c 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -312,6 +312,7 @@ extern const FFFilter ff_vf_grayworld; extern const FFFilter ff_vf_greyedge; extern const FFFilter ff_vf_guided; extern const FFFilter ff_vf_haldclut; +extern const FFFilter ff_vf_heif_auto_compositor; extern const FFFilter ff_vf_hflip; extern const FFFilter ff_vf_hflip_vulkan; extern const FFFilter ff_vf_histeq; diff --git a/libavfilter/vf_heif_tile_compositor_auto.c b/libavfilter/vf_heif_tile_compositor_auto.c new file mode 100644 index 0000000000000..30620cbcf3907 --- /dev/null +++ b/libavfilter/vf_heif_tile_compositor_auto.c @@ -0,0 +1,631 @@ +/* + * HEIF automatic tile compositor filter + * Copyright (c) 2025 FFmpeg developers + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * HEIF automatic tile compositor filter + * + * Reads tile positioning metadata from input streams and automatically + * composites them into the correct layout according to HEIF tile grid + * specifications. Supports dynamic tile count detection and proper + * presentation cropping. + */ + +#include "libavutil/heif_color.h" +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/pixfmt.h" +#include "libavutil/mem.h" +#include "libavutil/parseutils.h" +#include "libavutil/mathematics.h" +#include "libavutil/avstring.h" +#include +#include "avfilter.h" +#include "filters.h" +#include "formats.h" +#include "framesync.h" +#include "video.h" + +#define MAX_TILES 96 + +typedef struct TileInfo { + int grid_id; ///< Tile grid ID + int tile_index; ///< Index within the grid + int x, y; ///< Position in coded canvas + int has_metadata; ///< Whether metadata was found +} TileInfo; + +typedef struct HEIFAutoCompositorContext { + const AVClass *class; + FFFrameSync fs; + + // Options + int nb_inputs; ///< Number of input streams + int target_grid_id; ///< Specific grid ID to composite (-1 = auto) + int convert_p3_to_srgb; ///< Color space conversion + int auto_crop; ///< Crop to presentation dimensions + + // Discovered from tile grid + int canvas_width, canvas_height; ///< Canvas size from tile layout + int presentation_width, presentation_height; ///< Presentation size for cropping + int horizontal_offset, vertical_offset; ///< Offset from canvas to presentation area + int total_tiles; ///< Total tiles in grid + int valid_inputs; ///< Number of inputs with tile data + + TileInfo tiles[MAX_TILES]; ///< Tile information per input + int initialized; ///< Setup complete +} HEIFAutoCompositorContext; + +#define OFFSET(x) offsetof(HEIFAutoCompositorContext, x) +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption heif_auto_compositor_options[] = { + { "inputs", "set number of input streams (0 = auto-detect)", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=0}, 0, MAX_TILES, FLAGS }, + { "grid_id", "specific tile grid ID to composite (-1 = auto)", OFFSET(target_grid_id), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, FLAGS }, + { "convert_p3", "convert Display P3 to sRGB", OFFSET(convert_p3_to_srgb), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS }, + { "auto_crop", "crop output to presentation dimensions", OFFSET(auto_crop), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(heif_auto_compositor); + +static int composite_tiles(FFFrameSync *fs); + +static int parse_tile_metadata(AVFilterContext *ctx, int input_idx, AVDictionary *metadata) +{ + HEIFAutoCompositorContext *s = ctx->priv; + TileInfo *tile = &s->tiles[input_idx]; + const AVDictionaryEntry *entry; + + // Check if this stream has HEIF tile metadata + entry = av_dict_get(metadata, "heif_tile_grid_id", NULL, 0); + if (!entry) { + av_log(ctx, AV_LOG_DEBUG, "Input %d: No HEIF tile metadata found\n", input_idx); + return 0; + } + + tile->grid_id = atoi(entry->value); + + // If we're targeting a specific grid, skip others + if (s->target_grid_id >= 0 && tile->grid_id != s->target_grid_id) { + av_log(ctx, AV_LOG_DEBUG, "Input %d: Grid ID %d doesn't match target %d\n", + input_idx, tile->grid_id, s->target_grid_id); + return 0; + } + + // Parse tile positioning + if ((entry = av_dict_get(metadata, "heif_tile_index", NULL, 0))) + tile->tile_index = atoi(entry->value); + + if ((entry = av_dict_get(metadata, "heif_tile_x", NULL, 0))) + tile->x = atoi(entry->value); + + if ((entry = av_dict_get(metadata, "heif_tile_y", NULL, 0))) + tile->y = atoi(entry->value); + + // Parse grid information (from first tile that has it) + if (!s->initialized) { + if ((entry = av_dict_get(metadata, "heif_canvas_size", NULL, 0))) { + sscanf(entry->value, "%dx%d", &s->canvas_width, &s->canvas_height); + } + + if ((entry = av_dict_get(metadata, "heif_total_tiles", NULL, 0))) { + s->total_tiles = atoi(entry->value); + } + + s->initialized = 1; + + av_log(ctx, AV_LOG_INFO, "HEIF grid %d: %d tiles, canvas=%dx%d\n", + tile->grid_id, s->total_tiles, s->canvas_width, s->canvas_height); + } + + tile->has_metadata = 1; + s->valid_inputs++; + + av_log(ctx, AV_LOG_DEBUG, "Input %d: Tile %d at (%d,%d) in grid %d\n", + input_idx, tile->tile_index, tile->x, tile->y, tile->grid_id); + + return 1; +} + +static int query_formats(const AVFilterContext *ctx, + AVFilterFormatsConfig **cfg_in, + AVFilterFormatsConfig **cfg_out) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_GRAY8, + AV_PIX_FMT_NONE + }; + + int ret = ff_set_common_formats_from_list2(ctx, cfg_in, cfg_out, pix_fmts); + if (ret < 0) + return ret; + + // Output is always RGBA + static const enum AVPixelFormat out_pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE }; + return ff_formats_ref(ff_make_format_list(out_pix_fmts), &cfg_out[0]->formats); +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + HEIFAutoCompositorContext *s = ctx->priv; + int ret; + + av_log(ctx, AV_LOG_INFO, "Configuring HEIF auto-compositor with %d configured inputs, %d actual inputs\n", + s->nb_inputs, ctx->nb_inputs); + + // Use the actual number of inputs provided by the filter graph + if (ctx->nb_inputs > 0 && ctx->nb_inputs != s->nb_inputs) { + av_log(ctx, AV_LOG_INFO, "Adjusting input count from %d to %d based on actual inputs\n", + s->nb_inputs, ctx->nb_inputs); + s->nb_inputs = ctx->nb_inputs; + } + + // Ensure we don't exceed the number of input pads that were created + if (s->nb_inputs > ctx->nb_inputs) { + av_log(ctx, AV_LOG_WARNING, "Detected more inputs (%d) than available pads (%d), limiting to %d\n", + s->nb_inputs, ctx->nb_inputs, ctx->nb_inputs); + s->nb_inputs = ctx->nb_inputs; + } + + // Try to get HEIF metadata from the filter graph (injected during stream group parsing) + AVDictionary *metadata = NULL; + if (ctx->graph && ctx->graph->opaque) { + metadata = (AVDictionary*)ctx->graph->opaque; + } + + if (metadata) { + AVDictionaryEntry *entry; + + // Determine which stream group this filter is processing + int active_group = 0; // Default to group 0 + if ((entry = av_dict_get(metadata, "heif_active_group", NULL, 0))) { + active_group = atoi(entry->value); + } + + // Build key prefix for this stream group + char key_prefix[32]; + snprintf(key_prefix, sizeof(key_prefix), "heif_g%d", active_group); + + // Get metadata using stream group specific keys + char key[64]; + snprintf(key, sizeof(key), "%s_canvas_w", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) + s->canvas_width = atoi(entry->value); + + snprintf(key, sizeof(key), "%s_canvas_h", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) + s->canvas_height = atoi(entry->value); + + snprintf(key, sizeof(key), "%s_total_tiles", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) + s->total_tiles = atoi(entry->value); + + av_log(ctx, AV_LOG_INFO, "Retrieved HEIF metadata from stream group %d: canvas=%dx%d, tiles=%d\n", + active_group, s->canvas_width, s->canvas_height, s->total_tiles); + + // Store presentation dimensions and offsets for auto-crop functionality + snprintf(key, sizeof(key), "%s_presentation_w", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) { + s->presentation_width = atoi(entry->value); + snprintf(key, sizeof(key), "%s_presentation_h", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) { + s->presentation_height = atoi(entry->value); + + // Get presentation area offsets + snprintf(key, sizeof(key), "%s_horizontal_offset", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) + s->horizontal_offset = atoi(entry->value); + snprintf(key, sizeof(key), "%s_vertical_offset", key_prefix); + if ((entry = av_dict_get(metadata, key, NULL, 0))) + s->vertical_offset = atoi(entry->value); + + av_log(ctx, AV_LOG_INFO, "HEIF presentation: %dx%d at offset (%d,%d) within %dx%d canvas\n", + s->presentation_width, s->presentation_height, + s->horizontal_offset, s->vertical_offset, + s->canvas_width, s->canvas_height); + } + } + + // Mark as initialized if we got valid metadata + if (s->canvas_width > 0 && s->canvas_height > 0) { + s->initialized = 1; + } + } + + // Set default canvas size if metadata wasn't available + if (!s->initialized) { + s->canvas_width = 1024; // Will be updated from metadata + s->canvas_height = 1024; // Will be updated from metadata + av_log(ctx, AV_LOG_WARNING, "No HEIF metadata found, using defaults\n"); + } + + // Set output dimensions based on auto_crop option + if (s->auto_crop && s->presentation_width > 0 && s->presentation_height > 0) { + outlink->w = s->presentation_width; + outlink->h = s->presentation_height; + av_log(ctx, AV_LOG_INFO, "Auto-crop enabled: output will be %dx%d (cropped from %dx%d canvas)\n", + s->presentation_width, s->presentation_height, s->canvas_width, s->canvas_height); + } else { + outlink->w = s->canvas_width; + outlink->h = s->canvas_height; + av_log(ctx, AV_LOG_INFO, "Auto-crop disabled: output will be full canvas %dx%d\n", + s->canvas_width, s->canvas_height); + } + + av_log(ctx, AV_LOG_INFO, "HEIF auto-compositor initialized with %d inputs\n", s->nb_inputs); + outlink->format = AV_PIX_FMT_RGBA; + + if (ctx->nb_inputs > 0 && ctx->inputs[0]) + outlink->time_base = ctx->inputs[0]->time_base; + + // Initialize frame sync + ret = ff_framesync_init(&s->fs, ctx, s->nb_inputs); + if (ret < 0) + return ret; + + s->fs.opaque = s; + s->fs.on_event = composite_tiles; + + for (int i = 0; i < s->nb_inputs; i++) { + FFFrameSyncIn *in = &s->fs.in[i]; + in->time_base = ctx->inputs[i]->time_base; + in->sync = 1; + in->before = EXT_STOP; + in->after = EXT_STOP; + } + + ret = ff_framesync_configure(&s->fs); + if (ret < 0) + return ret; + + av_log(ctx, AV_LOG_INFO, "HEIF auto-compositor output: %dx%d RGBA\n", + outlink->w, outlink->h); + + return 0; +} + +static int composite_tiles(FFFrameSync *fs) +{ + AVFilterContext *ctx = fs->parent; + HEIFAutoCompositorContext *s = ctx->priv; + AVFrame *out = NULL; + AVFrame *first_frame = NULL; + int ret = 0; + + // Only process up to the expected number of inputs based on stream group metadata + int max_inputs = FFMIN(s->nb_inputs, ctx->nb_inputs); + + av_log(ctx, AV_LOG_DEBUG, "composite_tiles called with %d inputs (processing %d)\n", + ctx->nb_inputs, max_inputs); + + // Get all frames and extract metadata + for (int i = 0; i < max_inputs; i++) { + AVFrame *frame = NULL; + ret = ff_framesync_get_frame(fs, i, &frame, 0); + if (ret >= 0 && frame) { + if (!first_frame) + first_frame = frame; + + // Check for frame side data + av_log(ctx, AV_LOG_DEBUG, "Frame %d has %d side data entries\n", i, frame->nb_side_data); + for (int j = 0; j < frame->nb_side_data; j++) { + AVFrameSideData *sd = frame->side_data[j]; + av_log(ctx, AV_LOG_DEBUG, " Side data %d: type=%d, size=%d\n", j, sd->type, sd->size); + } + + // Use tile positioning from stream group metadata instead of frame metadata + AVDictionary *metadata = NULL; + if (ctx->graph && ctx->graph->opaque) { + metadata = (AVDictionary*)ctx->graph->opaque; + } + + if (metadata && !s->tiles[i].has_metadata) { + // Determine which stream group this filter is processing + int active_group = 0; // Default to group 0 + AVDictionaryEntry *entry; + if ((entry = av_dict_get(metadata, "heif_active_group", NULL, 0))) { + active_group = atoi(entry->value); + } + + // Build stream group specific keys + char key_x[64], key_y[64], key_idx[64]; + snprintf(key_x, sizeof(key_x), "heif_g%d_tile_%d_x", active_group, i); + snprintf(key_y, sizeof(key_y), "heif_g%d_tile_%d_y", active_group, i); + snprintf(key_idx, sizeof(key_idx), "heif_g%d_tile_%d_index", active_group, i); + + AVDictionaryEntry *entry_x = av_dict_get(metadata, key_x, NULL, 0); + AVDictionaryEntry *entry_y = av_dict_get(metadata, key_y, NULL, 0); + AVDictionaryEntry *entry_idx = av_dict_get(metadata, key_idx, NULL, 0); + + if (entry_x && entry_y && entry_idx) { + s->tiles[i].x = atoi(entry_x->value); + s->tiles[i].y = atoi(entry_y->value); + s->tiles[i].tile_index = atoi(entry_idx->value); + s->tiles[i].has_metadata = 1; + s->valid_inputs++; + + av_log(ctx, AV_LOG_DEBUG, "Input %d: Tile %d at (%d,%d) from stream group %d metadata\n", + i, s->tiles[i].tile_index, s->tiles[i].x, s->tiles[i].y, active_group); + } + } + + // Fallback: try to parse metadata from frame if stream group data wasn't available + if (!s->tiles[i].has_metadata && frame->metadata) { + AVDictionaryEntry *entry = NULL; + av_log(ctx, AV_LOG_DEBUG, "Frame %d has metadata:\n", i); + while ((entry = av_dict_get(frame->metadata, "", entry, AV_DICT_IGNORE_SUFFIX))) { + av_log(ctx, AV_LOG_DEBUG, " %s = %s\n", entry->key, entry->value); + } + int parsed = parse_tile_metadata(ctx, i, frame->metadata); + if (parsed && !s->initialized) { + s->initialized = 1; + av_log(ctx, AV_LOG_INFO, "HEIF metadata successfully parsed from frame metadata\n"); + } + } else { + av_log(ctx, AV_LOG_DEBUG, "Frame %d has no metadata\n", i); + } + + // Set up default tile positions if metadata parsing didn't work + if (!s->initialized && i == 0) { + // Fallback: use calculated canvas size from config_output + // Canvas size should already be set from tile grid calculation + + // No additional processing needed - canvas size is already set + s->total_tiles = s->nb_inputs; + s->initialized = 1; + + av_log(ctx, AV_LOG_INFO, "HEIF grid setup complete: canvas=%dx%d, tiles=%d\n", + s->canvas_width, s->canvas_height, s->total_tiles); + + // Set up tile positions using standard grid layout + for (int j = 0; j < s->nb_inputs && j < MAX_TILES; j++) { + TileInfo *tile = &s->tiles[j]; + tile->grid_id = 49; + tile->tile_index = j; + tile->x = (j % 8) * 512; // 8 tiles per row + tile->y = (j / 8) * 512; + tile->has_metadata = 1; + s->valid_inputs++; + } + } + } + } + + if (!first_frame) { + av_log(ctx, AV_LOG_ERROR, "No frames available\n"); + return AVERROR(EAGAIN); + } + + av_log(ctx, AV_LOG_INFO, "Processing with %d valid inputs\n", s->valid_inputs); + + // Determine output dimensions based on auto_crop setting + int out_width, out_height; + if (s->auto_crop && s->presentation_width > 0 && s->presentation_height > 0) { + out_width = s->presentation_width; + out_height = s->presentation_height; + av_log(ctx, AV_LOG_INFO, "Output size (auto-crop): %dx%d\n", out_width, out_height); + } else { + out_width = s->canvas_width > 0 ? s->canvas_width : 1024; + out_height = s->canvas_height > 0 ? s->canvas_height : 1024; + av_log(ctx, AV_LOG_INFO, "Output canvas size: %dx%d\n", out_width, out_height); + } + + // Allocate output frame + out = ff_get_video_buffer(ctx->outputs[0], out_width, out_height); + if (!out) + return AVERROR(ENOMEM); + + // Copy properties from reference frame + av_frame_copy_props(out, first_frame); + out->width = out_width; + out->height = out_height; + out->format = AV_PIX_FMT_RGBA; + + uint32_t *out_pixels = (uint32_t*)out->data[0]; + int out_stride = out->linesize[0] / 4; + + // Initialize background (black) + memset(out->data[0], 0, out->linesize[0] * out->height); + + av_log(ctx, AV_LOG_DEBUG, "Compositing tiles into %dx%d output\n", out_width, out_height); + + // Composite each tile + for (int i = 0; i < max_inputs; i++) { + TileInfo *tile = &s->tiles[i]; + + if (!tile->has_metadata) + continue; + + AVFrame *tile_frame = NULL; + ret = ff_framesync_get_frame(fs, i, &tile_frame, 0); + if (ret < 0 || !tile_frame) + continue; + + // Calculate tile position based on auto_crop setting + int dest_x, dest_y; + if (s->auto_crop) { + // Adjust tile position by subtracting presentation offset to crop to presentation area + dest_x = tile->x - s->horizontal_offset; + dest_y = tile->y - s->vertical_offset; + } else { + // Use tile position directly on full canvas + dest_x = tile->x; + dest_y = tile->y; + } + + // Skip tiles outside output area + if (dest_x >= out_width || dest_y >= out_height || + dest_x + tile_frame->width <= 0 || dest_y + tile_frame->height <= 0) { + av_log(ctx, AV_LOG_DEBUG, "Skipping tile %d at (%d,%d) - outside output %dx%d\n", + tile->tile_index, dest_x, dest_y, out_width, out_height); + continue; + } + + av_log(ctx, AV_LOG_DEBUG, "Compositing tile %d at (%d,%d), frame size %dx%d\n", + tile->tile_index, dest_x, dest_y, tile_frame->width, tile_frame->height); + + // Composite YUV tile + if (tile_frame->format == AV_PIX_FMT_YUV420P || tile_frame->format == AV_PIX_FMT_YUVJ420P) { + uint8_t *y_plane = tile_frame->data[0]; + uint8_t *u_plane = tile_frame->data[1]; + uint8_t *v_plane = tile_frame->data[2]; + + for (int ty = 0; ty < tile_frame->height; ty++) { + for (int tx = 0; tx < tile_frame->width; tx++) { + int out_x = dest_x + tx; + int out_y = dest_y + ty; + + // Bounds check + if (out_x < 0 || out_x >= out_width || out_y < 0 || out_y >= out_height) + continue; + + // YUV to RGB conversion + int y_idx = ty * tile_frame->linesize[0] + tx; + int uv_idx = (ty/2) * tile_frame->linesize[1] + (tx/2); + + int Y = y_plane[y_idx]; + int U = u_plane[uv_idx]; + int V = v_plane[uv_idx]; + + uint8_t r, g, b; + av_heif_yuv_to_rgb_pixel(Y, V, U, &r, &g, &b); + + if (s->convert_p3_to_srgb) { + av_heif_convert_p3_to_srgb_pixel(&r, &g, &b); + } + + int out_idx = out_y * out_stride + out_x; + out_pixels[out_idx] = (255 << 24) | (r << 16) | (g << 8) | b; + } + } + } + } + + return ff_filter_frame(ctx->outputs[0], out); +} + +static int init(AVFilterContext *ctx) +{ + HEIFAutoCompositorContext *s = ctx->priv; + int i, ret; + + // If inputs=0, try to auto-detect the number from stream group metadata + if (s->nb_inputs == 0) { + // Check if filter graph and its opaque data are available + AVDictionary *metadata = NULL; + if (ctx->graph && ctx->graph->opaque) { + metadata = (AVDictionary*)ctx->graph->opaque; + } + + if (metadata) { + AVDictionaryEntry *entry; + + // Determine which stream group this filter is processing + int active_group = 0; // Default to group 0 + if ((entry = av_dict_get(metadata, "heif_active_group", NULL, 0))) { + active_group = atoi(entry->value); + } + + // Get the tile count for this specific stream group + char key[64]; + snprintf(key, sizeof(key), "heif_g%d_total_tiles", active_group); + if ((entry = av_dict_get(metadata, key, NULL, 0))) { + s->nb_inputs = atoi(entry->value); + av_log(ctx, AV_LOG_INFO, "Auto-detected %d inputs from stream group %d metadata\n", + s->nb_inputs, active_group); + } else { + s->nb_inputs = 48; // Fallback default + av_log(ctx, AV_LOG_WARNING, "Could not auto-detect input count, using default %d\n", s->nb_inputs); + } + } else { + s->nb_inputs = 48; // Fallback default + av_log(ctx, AV_LOG_WARNING, "No metadata available for auto-detection, using default %d inputs\n", s->nb_inputs); + } + } + + av_log(ctx, AV_LOG_INFO, "Initializing HEIF auto-compositor (inputs=%d, target_grid=%d)\n", + s->nb_inputs, s->target_grid_id); + + // Initialize tile info + for (i = 0; i < MAX_TILES; i++) { + s->tiles[i].has_metadata = 0; + s->tiles[i].grid_id = -1; + } + + // Create dynamic input pads + for (i = 0; i < s->nb_inputs; i++) { + AVFilterPad pad = { 0 }; + char *name; + + pad.type = AVMEDIA_TYPE_VIDEO; + name = av_asprintf("tile%d", i); + if (!name) + return AVERROR(ENOMEM); + pad.name = name; + + if ((ret = ff_append_inpad_free_name(ctx, &pad)) < 0) + return ret; + } + + av_log(ctx, AV_LOG_INFO, "Created %d input pads\n", s->nb_inputs); + + return 0; +} + +static int config_input(AVFilterLink *inlink) +{ + return 0; +} + +static int activate(AVFilterContext *ctx) +{ + HEIFAutoCompositorContext *s = ctx->priv; + return ff_framesync_activate(&s->fs); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + HEIFAutoCompositorContext *s = ctx->priv; + ff_framesync_uninit(&s->fs); +} + + +static const AVFilterPad heif_auto_compositor_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_output, + }, +}; + +const FFFilter ff_vf_heif_auto_compositor = { + .p.name = "heif_auto_compositor", + .p.description = NULL_IF_CONFIG_SMALL("Automatically composite HEIF tiles using stream metadata."), + .p.priv_class = &heif_auto_compositor_class, + .priv_size = sizeof(HEIFAutoCompositorContext), + .init = init, + .uninit = uninit, + .activate = activate, + FILTER_OUTPUTS(heif_auto_compositor_outputs), + FILTER_QUERY_FUNC2(query_formats), +}; \ No newline at end of file diff --git a/libavformat/mov.c b/libavformat/mov.c index ccaa988e4be14..c1ab39a9ee8c3 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -55,6 +55,7 @@ #include "libavcodec/hevc/hevc.h" #include "libavcodec/mpegaudiodecheader.h" #include "libavcodec/mlp_parse.h" + #include "avformat.h" #include "internal.h" #include "avio_internal.h" @@ -10319,6 +10320,43 @@ static int mov_parse_tiles(AVFormatContext *s) return err; + // Add tile positioning metadata to each stream + for (int j = 0; j < tile_grid->nb_tiles; j++) { + if (j < stg->nb_streams) { + AVStream *tile_st = stg->streams[j]; + char meta_buf[32]; + + // Add tile grid information as stream metadata + snprintf(meta_buf, sizeof(meta_buf), "%d", grid->item->item_id); + av_dict_set(&tile_st->metadata, "heif_tile_grid_id", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%d", j); + av_dict_set(&tile_st->metadata, "heif_tile_index", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%d", tile_grid->offsets[j].horizontal); + av_dict_set(&tile_st->metadata, "heif_tile_x", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%d", tile_grid->offsets[j].vertical); + av_dict_set(&tile_st->metadata, "heif_tile_y", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%dx%d", tile_grid->coded_width, tile_grid->coded_height); + av_dict_set(&tile_st->metadata, "heif_canvas_size", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%dx%d", tile_grid->width, tile_grid->height); + av_dict_set(&tile_st->metadata, "heif_presentation_size", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%d,%d", tile_grid->horizontal_offset, tile_grid->vertical_offset); + av_dict_set(&tile_st->metadata, "heif_crop_offset", meta_buf, 0); + + snprintf(meta_buf, sizeof(meta_buf), "%d", tile_grid->nb_tiles); + av_dict_set(&tile_st->metadata, "heif_total_tiles", meta_buf, 0); + + av_log(s, AV_LOG_DEBUG, "Added tile metadata: stream=%d, grid_id=%d, pos=(%d,%d)\n", + tile_st->index, grid->item->item_id, + tile_grid->offsets[j].horizontal, tile_grid->offsets[j].vertical); + } + } + if (grid->item->name) av_dict_set(&stg->metadata, "title", grid->item->name, 0); if (grid->item->item_id == mov->primary_item_id) @@ -10728,6 +10766,7 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st) AVStream *avst = s->streams[i]; FFStream *const avsti = ffstream(avst); MOVStreamContext *msc = avst->priv_data; + if (msc->pb && msc->current_sample < avsti->nb_index_entries) { AVIndexEntry *current_sample = &avsti->index_entries[msc->current_sample]; int64_t dts = av_rescale(current_sample->timestamp, AV_TIME_BASE, msc->time_scale); @@ -10924,6 +10963,28 @@ static int mov_finalize_packet(AVFormatContext *s, AVStream *st, AVIndexEntry *s } pkt->flags |= sample->flags & AVINDEX_KEYFRAME ? AV_PKT_FLAG_KEY : 0; pkt->pos = sample->pos; + + // Add HEIF tile metadata to packet as side data + if (st->metadata) { + AVDictionaryEntry *entry = NULL; + AVDictionary *metadata_dict = NULL; + + // Copy all HEIF tile metadata entries + while ((entry = av_dict_get(st->metadata, "heif_", entry, AV_DICT_IGNORE_SUFFIX))) { + av_dict_set(&metadata_dict, entry->key, entry->value, 0); + } + + if (metadata_dict) { + uint8_t *metadata_buf; + int metadata_size = av_dict_get_string(metadata_dict, &metadata_buf, '=', '\n'); + if (metadata_size > 0) { + av_log(s, AV_LOG_DEBUG, "Adding HEIF metadata to packet: stream=%d, size=%d\n", + st->index, metadata_size); + av_packet_add_side_data(pkt, AV_PKT_DATA_STRINGS_METADATA, metadata_buf, metadata_size); + } + av_dict_free(&metadata_dict); + } + } /* Multiple stsd handling. */ if (sc->stsc_data) { @@ -10947,6 +11008,21 @@ static int mov_finalize_packet(AVFormatContext *s, AVStream *st, AVIndexEntry *s return 0; } +// Helper function to find next sample for a specific stream +static AVIndexEntry *mov_find_next_sample_for_stream(AVFormatContext *s, AVStream *st) +{ + MOVStreamContext *sc = st->priv_data; + FFStream *avsti = ffstream(st); + + if (sc->current_index >= avsti->nb_index_entries) + return NULL; + + return &avsti->index_entries[sc->current_index]; +} + + + + static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) { MOVContext *mov = s->priv_data; @@ -11002,6 +11078,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) goto retry; } sc = st->priv_data; + /* must be done just before reading, to avoid infinite loop on sample */ current_index = sc->current_index; mov_current_sample_inc(sc); diff --git a/libavutil/Makefile b/libavutil/Makefile index ee77e51c08062..2abc48c7e4fa7 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -38,6 +38,7 @@ HEADERS = adler32.h \ film_grain_params.h \ frame.h \ hash.h \ + heif_color.h \ hdr_dynamic_metadata.h \ hdr_dynamic_vivid_metadata.h \ hmac.h \ @@ -146,6 +147,7 @@ OBJS = adler32.o \ fixed_dsp.o \ frame.o \ hash.o \ + heif_color.o \ hdr_dynamic_metadata.o \ hdr_dynamic_vivid_metadata.o \ hmac.o \ diff --git a/libavutil/heif_color.c b/libavutil/heif_color.c new file mode 100644 index 0000000000000..6d7a38728efd2 --- /dev/null +++ b/libavutil/heif_color.c @@ -0,0 +1,159 @@ +/* + * HEIF color space conversion utilities + * Copyright (c) 2025 FFmpeg developers + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "heif_color.h" +#include "common.h" +#include "log.h" +#include + +#if HAVE_LCMS2 +#include +#endif + +void av_heif_convert_p3_to_srgb_pixel(uint8_t *r, uint8_t *g, uint8_t *b) +{ + // Convert RGB to linear values (assuming gamma 2.2 approximation) + float rf = powf(*r / 255.0f, 2.2f); + float gf = powf(*g / 255.0f, 2.2f); + float bf = powf(*b / 255.0f, 2.2f); + + // Create conversion matrix from P3 to XYZ + // These are pre-calculated values for Display P3 to XYZ + float p3_to_xyz[3][3] = { + {0.4865709f, 0.2656677f, 0.1982173f}, + {0.2289746f, 0.6917385f, 0.0792869f}, + {0.0000000f, 0.0451134f, 1.0439444f} + }; + + // Create conversion matrix from XYZ to sRGB + // These are pre-calculated values for XYZ to sRGB + float xyz_to_srgb[3][3] = { + { 3.2404542f, -1.5371385f, -0.4985314f}, + {-0.9692660f, 1.8760108f, 0.0415560f}, + { 0.0556434f, -0.2040259f, 1.0572252f} + }; + + // Convert P3 RGB to XYZ + float x = p3_to_xyz[0][0] * rf + p3_to_xyz[0][1] * gf + p3_to_xyz[0][2] * bf; + float y = p3_to_xyz[1][0] * rf + p3_to_xyz[1][1] * gf + p3_to_xyz[1][2] * bf; + float z = p3_to_xyz[2][0] * rf + p3_to_xyz[2][1] * gf + p3_to_xyz[2][2] * bf; + + // Convert XYZ to sRGB + float r_lin = xyz_to_srgb[0][0] * x + xyz_to_srgb[0][1] * y + xyz_to_srgb[0][2] * z; + float g_lin = xyz_to_srgb[1][0] * x + xyz_to_srgb[1][1] * y + xyz_to_srgb[1][2] * z; + float b_lin = xyz_to_srgb[2][0] * x + xyz_to_srgb[2][1] * y + xyz_to_srgb[2][2] * z; + + // Apply gamma correction (inverse of 2.2) and clamp + r_lin = FFMAX(0.0f, FFMIN(1.0f, r_lin)); + g_lin = FFMAX(0.0f, FFMIN(1.0f, g_lin)); + b_lin = FFMAX(0.0f, FFMIN(1.0f, b_lin)); + + *r = (uint8_t)(powf(r_lin, 1.0f/2.2f) * 255.0f + 0.5f); + *g = (uint8_t)(powf(g_lin, 1.0f/2.2f) * 255.0f + 0.5f); + *b = (uint8_t)(powf(b_lin, 1.0f/2.2f) * 255.0f + 0.5f); +} + +void av_heif_yuv_to_rgb_pixel(int Y, int U, int V, uint8_t *r, uint8_t *g, uint8_t *b) +{ + // HEIC uses full-range YUV (yuvj420p) + // Use full range conversion + int Y_val = Y; + int Cb = U - 128; + int Cr = V - 128; + + // BT.601 coefficients for JPEG/JFIF full-range + // Using exact values to match libheif + // R = Y + 1.402 * Cr + // G = Y - 0.344136 * Cb - 0.714136 * Cr + // B = Y + 1.772 * Cb + + // Use integer arithmetic for better precision matching + int R_i = Y_val + (1402 * Cr + 500) / 1000; + int G_i = Y_val - (344 * Cb + 714 * Cr + 500) / 1000; + int B_i = Y_val + (1772 * Cb + 500) / 1000; + + // Clamp to valid range + *r = av_clip_uint8(R_i); + *g = av_clip_uint8(G_i); + *b = av_clip_uint8(B_i); +} + +#if HAVE_LCMS2 +int av_heif_apply_icc_profile(uint8_t *rgba_data, int width, int height, + const uint8_t *icc_data, size_t icc_size) +{ + cmsHPROFILE input_profile = NULL; + cmsHPROFILE output_profile = NULL; + cmsHTRANSFORM transform = NULL; + int ret = 0; + + // Open the input ICC profile from the HEIC file + input_profile = cmsOpenProfileFromMem(icc_data, icc_size); + if (!input_profile) { + av_log(NULL, AV_LOG_WARNING, "Failed to open input ICC profile\n"); + return AVERROR_EXTERNAL; + } + + // Create sRGB output profile + output_profile = cmsCreate_sRGBProfile(); + if (!output_profile) { + av_log(NULL, AV_LOG_WARNING, "Failed to create sRGB profile\n"); + cmsCloseProfile(input_profile); + return AVERROR_EXTERNAL; + } + + // Get color space information + cmsColorSpaceSignature input_space = cmsGetColorSpace(input_profile); + av_log(NULL, AV_LOG_DEBUG, "Input profile color space: 0x%08X\n", input_space); + + // Create color transform based on input color space + if (input_space == cmsSigGrayData) { + // For grayscale profiles, skip transformation as we've already converted to RGB + av_log(NULL, AV_LOG_DEBUG, "Skipping ICC transform for grayscale profile\n"); + ret = 0; + goto cleanup; + } + + transform = cmsCreateTransform(input_profile, TYPE_RGBA_8, + output_profile, TYPE_RGBA_8, + INTENT_PERCEPTUAL, 0); + if (!transform) { + av_log(NULL, AV_LOG_WARNING, "Failed to create color transform (color space: 0x%08X)\n", input_space); + ret = AVERROR_EXTERNAL; + goto cleanup; + } + + // Apply the color transform in-place + cmsDoTransform(transform, rgba_data, rgba_data, width * height); + + av_log(NULL, AV_LOG_DEBUG, "Applied ICC profile transformation to %dx%d image\n", width, height); + +cleanup: + if (transform) + cmsDeleteTransform(transform); + if (output_profile) + cmsCloseProfile(output_profile); + if (input_profile) + cmsCloseProfile(input_profile); + + return ret; +} +#endif \ No newline at end of file diff --git a/libavutil/heif_color.h b/libavutil/heif_color.h new file mode 100644 index 0000000000000..54eef1c2e9f53 --- /dev/null +++ b/libavutil/heif_color.h @@ -0,0 +1,65 @@ +/* + * HEIF color space conversion utilities + * Copyright (c) 2025 FFmpeg developers + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_HEIF_COLOR_H +#define AVUTIL_HEIF_COLOR_H + +#include +#include + +/** + * Convert a single RGB pixel from Display P3 to sRGB color space. + * + * @param r pointer to red component (0-255), will be modified in place + * @param g pointer to green component (0-255), will be modified in place + * @param b pointer to blue component (0-255), will be modified in place + */ +void av_heif_convert_p3_to_srgb_pixel(uint8_t *r, uint8_t *g, uint8_t *b); + +/** + * Convert YUV pixel to RGB using HEIF/JPEG full-range conversion. + * Matches libheif's conversion exactly for compatibility. + * + * @param Y luma component (0-255) + * @param U chroma U component (0-255) + * @param V chroma V component (0-255) + * @param r pointer to output red component (0-255) + * @param g pointer to output green component (0-255) + * @param b pointer to output blue component (0-255) + */ +void av_heif_yuv_to_rgb_pixel(int Y, int U, int V, uint8_t *r, uint8_t *g, uint8_t *b); + +#if HAVE_LCMS2 +/** + * Apply ICC color profile transformation to RGBA image data. + * + * @param rgba_data image data in RGBA format, modified in place + * @param width image width in pixels + * @param height image height in pixels + * @param icc_data ICC profile data + * @param icc_size size of ICC profile data in bytes + * @return 0 on success, negative value on error + */ +int av_heif_apply_icc_profile(uint8_t *rgba_data, int width, int height, + const uint8_t *icc_data, size_t icc_size); +#endif + +#endif /* AVUTIL_HEIF_COLOR_H */ \ No newline at end of file From eddef976ef18f4dbca99bc9b2850910374ac36bf Mon Sep 17 00:00:00 2001 From: Srikanth Kiran Kotagiri Date: Sat, 26 Jul 2025 20:29:58 -0700 Subject: [PATCH 2/2] Color conversion --- libavfilter/vf_heif_tile_compositor_auto.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/libavfilter/vf_heif_tile_compositor_auto.c b/libavfilter/vf_heif_tile_compositor_auto.c index 30620cbcf3907..e7a189b91420a 100644 --- a/libavfilter/vf_heif_tile_compositor_auto.c +++ b/libavfilter/vf_heif_tile_compositor_auto.c @@ -29,7 +29,6 @@ * presentation cropping. */ -#include "libavutil/heif_color.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" #include "libavutil/pixfmt.h" @@ -60,7 +59,6 @@ typedef struct HEIFAutoCompositorContext { // Options int nb_inputs; ///< Number of input streams int target_grid_id; ///< Specific grid ID to composite (-1 = auto) - int convert_p3_to_srgb; ///< Color space conversion int auto_crop; ///< Crop to presentation dimensions // Discovered from tile grid @@ -80,7 +78,6 @@ typedef struct HEIFAutoCompositorContext { static const AVOption heif_auto_compositor_options[] = { { "inputs", "set number of input streams (0 = auto-detect)", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=0}, 0, MAX_TILES, FLAGS }, { "grid_id", "specific tile grid ID to composite (-1 = auto)", OFFSET(target_grid_id), AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX, FLAGS }, - { "convert_p3", "convert Display P3 to sRGB", OFFSET(convert_p3_to_srgb), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS }, { "auto_crop", "crop output to presentation dimensions", OFFSET(auto_crop), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, { NULL } }; @@ -507,12 +504,10 @@ static int composite_tiles(FFFrameSync *fs) int U = u_plane[uv_idx]; int V = v_plane[uv_idx]; - uint8_t r, g, b; - av_heif_yuv_to_rgb_pixel(Y, V, U, &r, &g, &b); - - if (s->convert_p3_to_srgb) { - av_heif_convert_p3_to_srgb_pixel(&r, &g, &b); - } + // YUV to RGB conversion using FFmpeg macros + int r = av_clip_uint8((298 * (Y - 16) + 409 * (V - 128) + 128) >> 8); + int g = av_clip_uint8((298 * (Y - 16) - 100 * (U - 128) - 208 * (V - 128) + 128) >> 8); + int b = av_clip_uint8((298 * (Y - 16) + 516 * (U - 128) + 128) >> 8); int out_idx = out_y * out_stride + out_x; out_pixels[out_idx] = (255 << 24) | (r << 16) | (g << 8) | b;