From 9b1db5bcdb6814cf9f21ad6bb52b9fb04d36e60a Mon Sep 17 00:00:00 2001 From: dregsist Date: Sun, 5 Apr 2026 09:03:12 +0900 Subject: [PATCH 1/5] feat: add Menon (2007) and ARI demosaicing algorithms Add two new Bayer demosaicing methods: - Menon (2007) DDFAPD: directional filtering with a posteriori decision - ARI: adaptive residual interpolation with quality levels (fast/balanced/quality) Both are CPU-only, OpenMP parallelized. Introspection v6->v7 migration added. Co-Authored-By: Claude Opus 4.6 --- src/iop/demosaic.c | 64 ++++- src/iop/demosaicing/ari.c | 553 ++++++++++++++++++++++++++++++++++++ src/iop/demosaicing/menon.c | 551 +++++++++++++++++++++++++++++++++++ 3 files changed, 1165 insertions(+), 3 deletions(-) create mode 100644 src/iop/demosaicing/ari.c create mode 100644 src/iop/demosaicing/menon.c diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c index 2c3ebc8b609a..36e4b2e2230a 100644 --- a/src/iop/demosaic.c +++ b/src/iop/demosaic.c @@ -47,7 +47,7 @@ #include #include -DT_MODULE_INTROSPECTION(6, dt_iop_demosaic_params_t) +DT_MODULE_INTROSPECTION(7, dt_iop_demosaic_params_t) #define DT_DEMOSAIC_XTRANS 1024 // masks for non-Bayer demosaic ops #define DT_DEMOSAIC_DUAL 2048 // masks for dual demosaicing methods @@ -60,6 +60,8 @@ typedef enum dt_iop_demosaic_method_t DT_IOP_DEMOSAIC_VNG4 = 2, // $DESCRIPTION: "VNG4" DT_IOP_DEMOSAIC_RCD = 5, // $DESCRIPTION: "RCD" DT_IOP_DEMOSAIC_LMMSE = 6, // $DESCRIPTION: "LMMSE" + DT_IOP_DEMOSAIC_MENON = 8, // $DESCRIPTION: "Menon" + DT_IOP_DEMOSAIC_ARI = 9, // $DESCRIPTION: "ARI" DT_IOP_DEMOSAIC_RCD_DUAL = DT_DEMOSAIC_DUAL | DT_IOP_DEMOSAIC_RCD, // $DESCRIPTION: "RCD (dual)" DT_IOP_DEMOSAIC_AMAZE_DUAL = DT_DEMOSAIC_DUAL | DT_IOP_DEMOSAIC_AMAZE, // $DESCRIPTION: "AMaZE (dual)"" DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME = 3, // $DESCRIPTION: "passthrough (monochrome)" @@ -90,6 +92,10 @@ static const char *_method_str(const int method) return "RCD"; case DT_IOP_DEMOSAIC_LMMSE: return "LMMSE"; + case DT_IOP_DEMOSAIC_MENON: + return "MENON"; + case DT_IOP_DEMOSAIC_ARI: + return "ARI"; case DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME: return "PASSTHROUGH_MONOCHROME"; case DT_IOP_DEMOSAIC_PASSTHROUGH_COLOR: @@ -140,6 +146,13 @@ typedef enum dt_iop_demosaic_lmmse_t DT_LMMSE_REFINE_4 = 4, // $DESCRIPTION: "2x refine + medians" } dt_iop_demosaic_lmmse_t; +typedef enum dt_iop_demosaic_ari_quality_t +{ + DT_ARI_QUALITY_FAST = 1, // $DESCRIPTION: "fast" + DT_ARI_QUALITY_BALANCED = 2, // $DESCRIPTION: "balanced" + DT_ARI_QUALITY_FULL = 3, // $DESCRIPTION: "quality" +} dt_iop_demosaic_ari_quality_t; + typedef struct dt_iop_demosaic_params_t { dt_iop_demosaic_greeneq_t green_eq; // $DEFAULT: DT_IOP_GREEN_EQ_NO $DESCRIPTION: "match greens" @@ -154,6 +167,7 @@ typedef struct dt_iop_demosaic_params_t int cs_iter; // $MIN: 1 $MAX: 25 $DEFAULT: 8 $DESCRIPTION: "iterations" float cs_center; // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.0 $DESCRIPTION: "sharp center" gboolean cs_enabled; // $DEFAULT: FALSE $DESCRIPTION: "capture sharpen" + dt_iop_demosaic_ari_quality_t ari_quality; // $DEFAULT: DT_ARI_QUALITY_BALANCED $DESCRIPTION: "ARI quality" } dt_iop_demosaic_params_t; typedef struct dt_iop_demosaic_gui_data_t @@ -167,6 +181,7 @@ typedef struct dt_iop_demosaic_gui_data_t GtkWidget *demosaic_method_mono; GtkWidget *dual_thrs; GtkWidget *lmmse_refine; + GtkWidget *ari_quality; GtkWidget *cs_thrs; GtkWidget *cs_radius; GtkWidget *cs_boost; @@ -257,6 +272,7 @@ typedef struct dt_iop_demosaic_data_t int cs_iter; float cs_center; gboolean cs_enabled; + dt_iop_demosaic_ari_quality_t ari_quality; } dt_iop_demosaic_data_t; static gboolean _get_thumb_quality(const int width, const int height) @@ -303,6 +319,8 @@ void amaze_demosaic(const float *const in, #include "iop/demosaicing/ppg.c" #include "iop/demosaicing/rcd.c" #include "iop/demosaicing/lmmse.c" +#include "iop/demosaicing/menon.c" +#include "iop/demosaicing/ari.c" #include "iop/demosaicing/capture.c" #include "iop/demosaicing/dual.c" @@ -462,6 +480,19 @@ int legacy_params(dt_iop_module_t *self, return 0; } + if(old_version == 6) + { + const dt_iop_demosaic_params_v6_t *o = (dt_iop_demosaic_params_v6_t *)old_params; + dt_iop_demosaic_params_t *n = malloc(sizeof(dt_iop_demosaic_params_t)); + memcpy(n, o, sizeof *o); + n->ari_quality = DT_ARI_QUALITY_BALANCED; + + *new_params = n; + *new_params_size = sizeof(dt_iop_demosaic_params_t); + *new_version = 7; + return 0; + } + return 1; } @@ -576,6 +607,14 @@ static gboolean _tiling_requirements(dt_iop_module_t *self, perpix = 2; border = 10; break; + case DT_IOP_DEMOSAIC_MENON: + perpix = 12; + border = 8; + break; + case DT_IOP_DEMOSAIC_ARI: + perpix = 40; /* multiple candidate buffers + guided filter temps */ + border = 10; + break; case DT_IOP_DEMOSAIC_PPG: perpix = 4; border = 8; @@ -865,6 +904,10 @@ void process(dt_iop_module_t *self, dt_colorspaces_cygm_to_rgb(pipe->dsc.processed_maximum, 1, d->CAM_to_RGB); } } + else if(method == DT_IOP_DEMOSAIC_MENON) + menon_demosaic(t_out, t_in, width, t_rows, filters); + else if(method == DT_IOP_DEMOSAIC_ARI) + ari_demosaic(t_out, t_in, width, t_rows, filters, d->ari_quality); else if(method == DT_IOP_DEMOSAIC_RCD) rcd_demosaic(t_out, t_in, width, t_rows, filters, procmax); else if(method == DT_IOP_DEMOSAIC_LMMSE) @@ -1373,6 +1416,7 @@ void commit_params(dt_iop_module_t *self, d->median_thrs = p->median_thrs; d->dual_thrs = p->dual_thrs; d->lmmse_refine = p->lmmse_refine; + d->ari_quality = p->ari_quality; dt_iop_demosaic_method_t use_method = p->demosaicing_method; d->cs_radius = p->cs_radius; d->cs_thrs = p->cs_thrs; @@ -1433,6 +1477,12 @@ void commit_params(dt_iop_module_t *self, case DT_IOP_DEMOSAIC_FDC: piece->process_cl_ready = FALSE; break; + case DT_IOP_DEMOSAIC_MENON: + piece->process_cl_ready = FALSE; + break; + case DT_IOP_DEMOSAIC_ARI: + piece->process_cl_ready = FALSE; + break; default: piece->process_cl_ready = TRUE; } @@ -1580,6 +1630,7 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous) gtk_widget_set_visible(g->color_smoothing, !passing && !bayer4 && !isdual && !true_monochrome); gtk_widget_set_visible(g->dual_thrs, isdual); gtk_widget_set_visible(g->lmmse_refine, islmmse); + gtk_widget_set_visible(g->ari_quality, use_method == DT_IOP_DEMOSAIC_ARI); const gboolean monomode = use_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME || use_method == DT_IOP_DEMOSAIC_PASSTHR_MONOX; @@ -1737,7 +1788,7 @@ void gui_init(dt_iop_module_t *self) const int xtranspos = dt_bauhaus_combobox_get_from_value(g->demosaic_method_bayer, DT_DEMOSAIC_XTRANS); for(int i=0;i<8;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayer, xtranspos); - gtk_widget_set_tooltip_text(g->demosaic_method_bayer, _("Bayer sensor demosaicing method, PPG and RCD are fast, AMaZE and LMMSE are slow.\nLMMSE is suited best for high ISO images.\nVNG4 is good in rare cases avoiding maze patterns.\ndual demosaicers increase processing time by blending a VNG variant in a second pass.")); + gtk_widget_set_tooltip_text(g->demosaic_method_bayer, _("Bayer sensor demosaicing method, PPG and RCD are fast, AMaZE, LMMSE and Menon are slow.\nLMMSE is suited best for high ISO images.\nMenon uses directional filtering with a posteriori decision.\ndual demosaicers increase processing time by blending a VNG variant in a second pass.")); g->demosaic_method_xtrans = dt_bauhaus_combobox_from_params(self, "demosaicing_method"); for(int i=0;idemosaic_method_xtrans, 0); @@ -1747,7 +1798,7 @@ void gui_init(dt_iop_module_t *self) g->demosaic_method_bayerfour = dt_bauhaus_combobox_from_params(self, "demosaicing_method"); for(int i=0;i<8;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, xtranspos); for(int i=0;i<2;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, 0); - for(int i=0;i<4;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, 1); + for(int i=0;i<5;i++) dt_bauhaus_combobox_remove_at(g->demosaic_method_bayerfour, 1); gtk_widget_set_tooltip_text(g->demosaic_method_bayerfour, _("Bayer4 sensor demosaicing methods.")); g->demosaic_method_mono = dt_bauhaus_combobox_from_params(self, "demosaicing_method"); @@ -1769,6 +1820,13 @@ void gui_init(dt_iop_module_t *self) g->lmmse_refine = dt_bauhaus_combobox_from_params(self, "lmmse_refine"); gtk_widget_set_tooltip_text(g->lmmse_refine, _("LMMSE refinement steps. the median steps average the output,\nrefine adds some recalculation of red & blue channels")); + g->ari_quality = dt_bauhaus_combobox_from_params(self, "ari_quality"); + gtk_widget_set_tooltip_text(g->ari_quality, + _("ARI quality level:\n" + "fast: MLRI only (sharpest, fastest)\n" + "balanced: MLRI + guided-filter RI adaptive selection\n" + "quality: full 3-candidate adaptive selection")); + g->color_smoothing = dt_bauhaus_combobox_from_params(self, "color_smoothing"); gtk_widget_set_tooltip_text(g->color_smoothing, _("how many color smoothing median steps after demosaicing")); diff --git a/src/iop/demosaicing/ari.c b/src/iop/demosaicing/ari.c new file mode 100644 index 000000000000..5638d51edf12 --- /dev/null +++ b/src/iop/demosaicing/ari.c @@ -0,0 +1,553 @@ +/* + ARI (Adaptive Residual Interpolation) Custom Demosaic + + Based on Monno et al. 2017 "Adaptive Residual Interpolation for Color + and Multispectral Image Demosaicking" with customizations: + - Three candidate generators: MLRI, RI, GF-RI + - Per-pixel adaptive selection based on residual variance + - Automatic noise estimation for adaptive threshold + + References: + - Kiku et al. 2013 "Residual Interpolation for Color Image Demosaicking" + - Kiku et al. 2014 "Minimized-Laplacian Residual Interpolation..." + - Monno et al. 2017 "Adaptive Residual Interpolation..." + - Hamilton & Adams 1997 "Adaptive color plan interpolation..." + - He et al. 2013 "Guided Image Filtering" +*/ + +#define ARI_BORDER 10 +#define ARI_GF_RADIUS 2 +#define ARI_SEL_RADIUS 2 + +/* ===== float comparator for qsort ===== */ +static int _ari_compare_float(const void *a, const void *b) +{ + const float fa = *(const float *)a, fb = *(const float *)b; + return (fa > fb) - (fa < fb); +} + +/* ===== Noise estimation from dark pixels ===== */ +static float _ari_estimate_noise(const float *const restrict in, + const int width, const int height, + const uint32_t filters) +{ + /* Find 10th percentile intensity */ + float vmin = FLT_MAX, vmax = 0.0f; + for(int y = 0; y < height; y += 8) + for(int x = 0; x < width; x += 8) + { + const float v = in[(size_t)y * width + x]; + if(v > 1e-8f && v < vmin) vmin = v; + if(v > vmax) vmax = v; + } + if(vmax <= vmin) return 0.001f; + + /* Histogram for percentile */ +#define ARI_NOISE_BINS 256 + int hist[ARI_NOISE_BINS]; + memset(hist, 0, sizeof(hist)); + const float hscale = (float)(ARI_NOISE_BINS - 1) / fmaxf(vmax - vmin, 1e-10f); + int total = 0; + + for(int y = 0; y < height; y += 4) + for(int x = 0; x < width; x += 4) + { + const float v = in[(size_t)y * width + x]; + if(v > 0.0f) + { + hist[CLAMP((int)((v - vmin) * hscale), 0, ARI_NOISE_BINS - 1)]++; + total++; + } + } + + const int target = total / 10; + int cumsum = 0; + float p10 = vmin; + for(int b = 0; b < ARI_NOISE_BINS; b++) + { + cumsum += hist[b]; + if(cumsum >= target) { p10 = vmin + (float)b / hscale; break; } + } +#undef ARI_NOISE_BINS + + /* Collect squared same-color differences in dark region */ +#define ARI_MAX_NOISE_SAMPLES 50000 + float *dsq = dt_alloc_align_float(ARI_MAX_NOISE_SAMPLES); + if(!dsq) return 0.001f; + + int ns = 0; + for(int y = 2; y < height - 2 && ns < ARI_MAX_NOISE_SAMPLES; y++) + for(int x = 2; x < width - 4 && ns < ARI_MAX_NOISE_SAMPLES; x += 2) + { + const float v1 = in[(size_t)y * width + x]; + const float v2 = in[(size_t)y * width + x + 2]; + if(v1 > 0.0f && v1 < p10 && v2 > 0.0f && v2 < p10) + { + const float d = v2 - v1; + dsq[ns++] = d * d; + } + } + + if(ns < 100) { dt_free_align(dsq); return 0.001f; } + + qsort(dsq, ns, sizeof(float), _ari_compare_float); + const float sigma = sqrtf(dsq[ns / 2] / 0.9098f); + dt_free_align(dsq); + return fmaxf(sigma, 1e-6f); +#undef ARI_MAX_NOISE_SAMPLES +} + +/* ===== Box filter via integral image ===== */ +static void _ari_box_filter(const float *const restrict src, + float *const restrict dst, + const int width, const int height, + const int radius) +{ + double *integral = calloc((size_t)(width + 1) * (height + 1), sizeof(double)); + if(!integral) { memcpy(dst, src, sizeof(float) * (size_t)width * height); return; } + + /* Build integral image */ + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + integral[(size_t)(y + 1) * (width + 1) + (x + 1)] = + (double)src[(size_t)y * width + x] + + integral[(size_t)y * (width + 1) + (x + 1)] + + integral[(size_t)(y + 1) * (width + 1) + x] + - integral[(size_t)y * (width + 1) + x]; + + /* Query */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + { + const int y0 = MAX(0, y - radius), y1 = MIN(height - 1, y + radius); + for(int x = 0; x < width; x++) + { + const int x0 = MAX(0, x - radius), x1 = MIN(width - 1, x + radius); + const double sum = integral[(size_t)(y1 + 1) * (width + 1) + (x1 + 1)] + - integral[(size_t)y0 * (width + 1) + (x1 + 1)] + - integral[(size_t)(y1 + 1) * (width + 1) + x0] + + integral[(size_t)y0 * (width + 1) + x0]; + const int cnt = (y1 - y0 + 1) * (x1 - x0 + 1); + dst[(size_t)y * width + x] = (float)(sum / cnt); + } + } + free(integral); +} + +/* ===== Guided filter -- single channel (He et al. 2013) ===== */ +static void _ari_guided_filter_ch(const float *const restrict input, + const float *const restrict guide, + float *const restrict output, + const int width, const int height, + const int radius, const float eps) +{ + const size_t npix = (size_t)width * height; + + float *mean_I = dt_alloc_align_float(npix); + float *mean_p = dt_alloc_align_float(npix); + float *mean_Ip = dt_alloc_align_float(npix); + float *mean_II = dt_alloc_align_float(npix); + float *aa = dt_alloc_align_float(npix); + float *bb = dt_alloc_align_float(npix); + float *mean_a = dt_alloc_align_float(npix); + float *mean_b = dt_alloc_align_float(npix); + float *tmp1 = dt_alloc_align_float(npix); + float *tmp2 = dt_alloc_align_float(npix); + + if(!mean_I || !mean_p || !mean_Ip || !mean_II || !aa || !bb + || !mean_a || !mean_b || !tmp1 || !tmp2) + { + memcpy(output, input, sizeof(float) * npix); + goto cleanup_gf; + } + + /* Products */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + tmp1[i] = guide[i] * input[i]; + tmp2[i] = guide[i] * guide[i]; + } + + /* Box-filtered means */ + _ari_box_filter(guide, mean_I, width, height, radius); + _ari_box_filter(input, mean_p, width, height, radius); + _ari_box_filter(tmp1, mean_Ip, width, height, radius); + _ari_box_filter(tmp2, mean_II, width, height, radius); + + /* a, b coefficients */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float cov = mean_Ip[i] - mean_I[i] * mean_p[i]; + const float var = mean_II[i] - mean_I[i] * mean_I[i]; + aa[i] = cov / (var + eps); + bb[i] = mean_p[i] - aa[i] * mean_I[i]; + } + + /* Final: mean(a) * guide + mean(b) */ + _ari_box_filter(aa, mean_a, width, height, radius); + _ari_box_filter(bb, mean_b, width, height, radius); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + output[i] = mean_a[i] * guide[i] + mean_b[i]; + +cleanup_gf: + dt_free_align(mean_I); dt_free_align(mean_p); + dt_free_align(mean_Ip); dt_free_align(mean_II); + dt_free_align(aa); dt_free_align(bb); + dt_free_align(mean_a); dt_free_align(mean_b); + dt_free_align(tmp1); dt_free_align(tmp2); +} + +/* ===== Green interpolation: Hamilton-Adams (for RI) ===== */ +static void _ari_green_ha(float *const restrict green, + const float *const restrict in, + const int width, const int height, + const uint32_t filters) +{ + /* Copy known G values, zero non-G */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t pos = (size_t)y * width + x; + green[pos] = (FC(y, x, filters) == 1) ? in[pos] : 0.0f; + } + + /* Interpolate G at R/B positions (HA: gradient + Laplacian correction) */ + DT_OMP_FOR() + for(int y = 2; y < height - 2; y++) + for(int x = 2; x < width - 2; x++) + { + if(FC(y, x, filters) == 1) continue; + const size_t p = (size_t)y * width + x; + + const float dH = fabsf(in[p - 2] - in[p]) + + fabsf(in[p - 1] - in[p + 1]); + const float dV = fabsf(in[p - 2 * width] - in[p]) + + fabsf(in[p - width] - in[p + width]); + + const float Gh = (in[p - 1] + in[p + 1]) * 0.5f + + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; + const float Gv = (in[p - width] + in[p + width]) * 0.5f + + (2.0f * in[p] - in[p - 2 * width] - in[p + 2 * width]) * 0.25f; + + green[p] = (dH < dV) ? Gh : (dV < dH) ? Gv : (Gh + Gv) * 0.5f; + } +} + +/* ===== Green interpolation: MLRI (Laplacian energy direction) ===== */ +static void _ari_green_mlri(float *const restrict green, + const float *const restrict in, + const int width, const int height, + const uint32_t filters) +{ + /* Copy known G values */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t p = (size_t)y * width + x; + green[p] = (FC(y, x, filters) == 1) ? in[p] : 0.0f; + } + + /* Border region: HA (needs only 2-pixel border) */ + for(int y = 2; y < height - 2; y++) + for(int x = 2; x < width - 2; x++) + { + if(y >= 4 && y < height - 4 && x >= 4 && x < width - 4) continue; + if(FC(y, x, filters) == 1) continue; + const size_t p = (size_t)y * width + x; + const float dH = fabsf(in[p - 2] - in[p]) + fabsf(in[p - 1] - in[p + 1]); + const float dV = fabsf(in[p - 2 * width] - in[p]) + fabsf(in[p - width] - in[p + width]); + const float Gh = (in[p - 1] + in[p + 1]) * 0.5f + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; + const float Gv = (in[p - width] + in[p + width]) * 0.5f + (2.0f * in[p] - in[p - 2 * width] - in[p + 2 * width]) * 0.25f; + green[p] = (dH < dV) ? Gh : (dV < dH) ? Gv : (Gh + Gv) * 0.5f; + } + + /* Interior: MLRI (5x5 Laplacian energy direction selection) */ + DT_OMP_FOR() + for(int y = 4; y < height - 4; y++) + for(int x = 4; x < width - 4; x++) + { + if(FC(y, x, filters) == 1) continue; + const size_t p = (size_t)y * width + x; + const int w = width; + + /* Directional green estimates with Laplacian correction */ + const float Gh = (in[p - 1] + in[p + 1]) * 0.5f + + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; + const float Gv = (in[p - w] + in[p + w]) * 0.5f + + (2.0f * in[p] - in[p - 2*w] - in[p + 2*w]) * 0.25f; + + /* Horizontal residual at center and same-color neighbors */ + const float rh = in[p] - Gh; + const float rh_l = in[p - 2] - (in[p - 3] + in[p - 1]) * 0.5f; + const float rh_r = in[p + 2] - (in[p + 1] + in[p + 3]) * 0.5f; + + /* Vertical residual */ + const float rv = in[p] - Gv; + const float rv_u = in[p - 2*w] - (in[p - 3*w] + in[p - w]) * 0.5f; + const float rv_d = in[p + 2*w] - (in[p + w] + in[p + 3*w]) * 0.5f; + + /* Laplacian energy of residuals + 2nd difference of raw */ + const float Lh = fabsf(rh_l - 2.0f * rh + rh_r) + + fabsf(in[p - 2] - 2.0f * in[p] + in[p + 2]); + const float Lv = fabsf(rv_u - 2.0f * rv + rv_d) + + fabsf(in[p - 2*w] - 2.0f * in[p] + in[p + 2*w]); + + green[p] = (Lh < Lv) ? Gh : (Lv < Lh) ? Gv : (Gh + Gv) * 0.5f; + } +} + +/* ===== Bilinear color-difference interpolation ===== */ +static void _ari_interpolate_cd(float *const restrict diff_r, + float *const restrict diff_b, + const float *const restrict in, + const float *const restrict green, + const int width, const int height, + const uint32_t filters) +{ + const size_t npix = (size_t)width * height; + memset(diff_r, 0, sizeof(float) * npix); + memset(diff_b, 0, sizeof(float) * npix); + + /* Compute known color differences */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t p = (size_t)y * width + x; + const int c = FC(y, x, filters); + if(c == 0) diff_r[p] = in[p] - green[p]; + else if(c == 2) diff_b[p] = in[p] - green[p]; + } + + /* Interpolate R-G at non-Red positions + * All interpolation uses only KNOWN Red positions: + * Gr (Green in Red row): horizontal R neighbors + * Gb (Green in Blue row): vertical R neighbors + * B: diagonal R neighbors */ + DT_OMP_FOR() + for(int y = 2; y < height - 2; y++) + for(int x = 2; x < width - 2; x++) + { + const size_t p = (size_t)y * width + x; + const int c = FC(y, x, filters); + if(c == 0) continue; /* R: known */ + + if(c == 1) /* Green */ + { + if(FC(y, x - 1, filters) == 0) /* Gr: R left/right */ + diff_r[p] = (diff_r[p - 1] + diff_r[p + 1]) * 0.5f; + else /* Gb: R above/below */ + diff_r[p] = (diff_r[p - width] + diff_r[p + width]) * 0.5f; + } + else /* Blue: diagonal R */ + { + diff_r[p] = (diff_r[p - width - 1] + diff_r[p - width + 1] + + diff_r[p + width - 1] + diff_r[p + width + 1]) * 0.25f; + } + } + + /* Interpolate B-G at non-Blue positions */ + DT_OMP_FOR() + for(int y = 2; y < height - 2; y++) + for(int x = 2; x < width - 2; x++) + { + const size_t p = (size_t)y * width + x; + const int c = FC(y, x, filters); + if(c == 2) continue; /* B: known */ + + if(c == 1) /* Green */ + { + if(FC(y, x - 1, filters) == 2) /* Gb: B left/right */ + diff_b[p] = (diff_b[p - 1] + diff_b[p + 1]) * 0.5f; + else /* Gr: B above/below */ + diff_b[p] = (diff_b[p - width] + diff_b[p + width]) * 0.5f; + } + else /* Red: diagonal B */ + { + diff_b[p] = (diff_b[p - width - 1] + diff_b[p - width + 1] + + diff_b[p + width - 1] + diff_b[p + width + 1]) * 0.25f; + } + } +} + +/* ===== Write RGB output from green + color differences ===== */ +static void _ari_write_rgb(float *const restrict out, + const float *const restrict green, + const float *const restrict diff_r, + const float *const restrict diff_b, + const int width, const int height) +{ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t p = (size_t)y * width + x; + const size_t o = 4 * p; + out[o + 0] = fmaxf(green[p] + diff_r[p], 0.0f); + out[o + 1] = fmaxf(green[p], 0.0f); + out[o + 2] = fmaxf(green[p] + diff_b[p], 0.0f); + out[o + 3] = 0.0f; + } +} + +/* ===== Per-pixel adaptive selection ===== */ +/* Selects per-pixel among n_candidates based on local color-difference variance. + * Candidate arrays: greens[i], diff_rs[i], diff_bs[i] for i in [0, n_candidates). + * Candidates are ordered sharpest-first: MLRI, RI, GF-RI. + * When variance difference < noise_thr^2, prefer the smoother (later) candidate. */ +static void _ari_adaptive_select(float *const restrict out, + float *const *const greens, + float *const *const diff_rs, + float *const *const diff_bs, + const int n_candidates, + const int width, const int height, + const float noise_thr) +{ + const int r = ARI_SEL_RADIUS; + const float thr_sq = noise_thr * noise_thr * 4.0f; + + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t p = (size_t)y * width + x; + + /* Compute local variance of color differences for each candidate */ + float best_var = FLT_MAX; + int best = n_candidates - 1; /* default: smoothest */ + + for(int ci = 0; ci < n_candidates; ci++) + { + const float *dr = diff_rs[ci]; + const float *db = diff_bs[ci]; + + /* Local variance in (2r+1)x(2r+1) window */ + float sum_r = 0, sum_r2 = 0, sum_b = 0, sum_b2 = 0; + int cnt = 0; + const int y0 = MAX(0, y - r), y1 = MIN(height - 1, y + r); + const int x0 = MAX(0, x - r), x1 = MIN(width - 1, x + r); + for(int yy = y0; yy <= y1; yy++) + for(int xx = x0; xx <= x1; xx++) + { + const size_t pp = (size_t)yy * width + xx; + sum_r += dr[pp]; sum_r2 += dr[pp] * dr[pp]; + sum_b += db[pp]; sum_b2 += db[pp] * db[pp]; + cnt++; + } + const float inv = 1.0f / (float)cnt; + const float var_r = sum_r2 * inv - (sum_r * inv) * (sum_r * inv); + const float var_b = sum_b2 * inv - (sum_b * inv) * (sum_b * inv); + const float var = fmaxf(var_r, 0.0f) + fmaxf(var_b, 0.0f); + + if(var < best_var - thr_sq) + { + best_var = var; + best = ci; + } + } + + /* Write selected candidate to output */ + const size_t o = 4 * p; + out[o + 0] = fmaxf(greens[best][p] + diff_rs[best][p], 0.0f); + out[o + 1] = fmaxf(greens[best][p], 0.0f); + out[o + 2] = fmaxf(greens[best][p] + diff_bs[best][p], 0.0f); + out[o + 3] = 0.0f; + } +} + +/* ===== Main entry point ===== */ +static void ari_demosaic(float *const restrict out, + const float *const restrict in, + const int width, const int height, + const uint32_t filters, + const int quality) +{ + const size_t npix = (size_t)width * height; + + /* === Quality 1: MLRI only, no selection === */ + if(quality <= 1) + { + float *green = dt_alloc_align_float(npix); + float *dr = dt_alloc_align_float(npix); + float *db = dt_alloc_align_float(npix); + if(!green || !dr || !db) + { + dt_free_align(green); dt_free_align(dr); dt_free_align(db); + memset(out, 0, sizeof(float) * npix * 4); + return; + } + _ari_green_mlri(green, in, width, height, filters); + _ari_interpolate_cd(dr, db, in, green, width, height, filters); + _ari_write_rgb(out, green, dr, db, width, height); + dt_free_align(green); dt_free_align(dr); dt_free_align(db); + return; + } + + /* === Quality 2-3: adaptive selection === */ + const float noise_sigma = _ari_estimate_noise(in, width, height, filters); + const float gf_eps = noise_sigma * noise_sigma * 4.0f; + + /* Candidate 0: MLRI (sharpest) */ + float *green_mlri = dt_alloc_align_float(npix); + float *dr_mlri = dt_alloc_align_float(npix); + float *db_mlri = dt_alloc_align_float(npix); + + /* Candidate for RI / GF-RI */ + float *green_ri = dt_alloc_align_float(npix); + float *dr_ri = dt_alloc_align_float(npix); + float *db_ri = dt_alloc_align_float(npix); + float *dr_gfri = dt_alloc_align_float(npix); + float *db_gfri = dt_alloc_align_float(npix); + + if(!green_mlri || !dr_mlri || !db_mlri || !green_ri || !dr_ri || !db_ri + || !dr_gfri || !db_gfri) + { + dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); + dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); + dt_free_align(dr_gfri); dt_free_align(db_gfri); + memset(out, 0, sizeof(float) * npix * 4); + return; + } + + /* Generate MLRI candidate */ + _ari_green_mlri(green_mlri, in, width, height, filters); + _ari_interpolate_cd(dr_mlri, db_mlri, in, green_mlri, width, height, filters); + + /* Generate RI candidate (HA green + bilinear CD) */ + _ari_green_ha(green_ri, in, width, height, filters); + _ari_interpolate_cd(dr_ri, db_ri, in, green_ri, width, height, filters); + + /* Generate GF-RI: guided filter on RI color differences */ + _ari_guided_filter_ch(dr_ri, green_ri, dr_gfri, width, height, ARI_GF_RADIUS, gf_eps); + _ari_guided_filter_ch(db_ri, green_ri, db_gfri, width, height, ARI_GF_RADIUS, gf_eps); + + /* Adaptive selection */ + if(quality >= 3) + { + /* 3 candidates: MLRI, RI, GF-RI (sharpest to smoothest) */ + float *greens[3] = { green_mlri, green_ri, green_ri }; + float *diff_rs[3] = { dr_mlri, dr_ri, dr_gfri }; + float *diff_bs[3] = { db_mlri, db_ri, db_gfri }; + _ari_adaptive_select(out, greens, diff_rs, diff_bs, 3, + width, height, noise_sigma); + } + else + { + /* 2 candidates: MLRI, GF-RI */ + float *greens[2] = { green_mlri, green_ri }; + float *diff_rs[2] = { dr_mlri, dr_gfri }; + float *diff_bs[2] = { db_mlri, db_gfri }; + _ari_adaptive_select(out, greens, diff_rs, diff_bs, 2, + width, height, noise_sigma); + } + + dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); + dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); + dt_free_align(dr_gfri); dt_free_align(db_gfri); +} diff --git a/src/iop/demosaicing/menon.c b/src/iop/demosaicing/menon.c new file mode 100644 index 000000000000..175a19afc6a5 --- /dev/null +++ b/src/iop/demosaicing/menon.c @@ -0,0 +1,551 @@ +/* + This file is part of darktable, + Copyright (C) 2024-2025 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . + + Menon (2007) DDFAPD demosaicing algorithm. + Ported from the colour-demosaicing Python library (BSD-3-Clause). + Reference: D. Menon, S. Andriani and G. Calvagno, + "Demosaicing With Directional Filtering and a posteriori Decision", + IEEE Trans. Image Processing, vol. 16, no. 1, pp. 132-141, Jan. 2007. +*/ + +#define MENON_BORDER 6 + +/* Helper: mirror-boundary 1D horizontal convolution with a 5-tap kernel. + kernel[] is indexed [-2..+2] stored as kernel[0..4]. */ +static inline float _menon_cnv_h5(const float *const restrict buf, + const int row, const int col, + const int width, const int height, + const float *const restrict kernel) +{ + float sum = 0.0f; + for(int k = -2; k <= 2; k++) + { + int c = col + k; + if(c < 0) c = -c; + if(c >= width) c = 2 * (width - 1) - c; + sum += buf[(size_t)row * width + c] * kernel[k + 2]; + } + return sum; +} + +/* Helper: mirror-boundary 1D vertical convolution with a 5-tap kernel. */ +static inline float _menon_cnv_v5(const float *const restrict buf, + const int row, const int col, + const int width, const int height, + const float *const restrict kernel) +{ + float sum = 0.0f; + for(int k = -2; k <= 2; k++) + { + int r = row + k; + if(r < 0) r = -r; + if(r >= height) r = 2 * (height - 1) - r; + sum += buf[(size_t)r * width + col] * kernel[k + 2]; + } + return sum; +} + +/* Helper: mirror-boundary 1D horizontal convolution with a 3-tap kernel. + kernel[] is indexed [-1..+1] stored as kernel[0..2]. */ +static inline float _menon_cnv_h3(const float *const restrict buf, + const int row, const int col, + const int width, const int height, + const float *const restrict kernel) +{ + float sum = 0.0f; + for(int k = -1; k <= 1; k++) + { + int c = col + k; + if(c < 0) c = -c; + if(c >= width) c = 2 * (width - 1) - c; + sum += buf[(size_t)row * width + c] * kernel[k + 1]; + } + return sum; +} + +/* Helper: mirror-boundary 1D vertical convolution with a 3-tap kernel. */ +static inline float _menon_cnv_v3(const float *const restrict buf, + const int row, const int col, + const int width, const int height, + const float *const restrict kernel) +{ + float sum = 0.0f; + for(int k = -1; k <= 1; k++) + { + int r = row + k; + if(r < 0) r = -r; + if(r >= height) r = 2 * (height - 1) - r; + sum += buf[(size_t)r * width + col] * kernel[k + 1]; + } + return sum; +} + +/* Helper: 5x5 2D convolution with constant (zero-pad) boundary for the classifier kernel. */ +static inline float _menon_cnv_2d(const float *const restrict buf, + const int row, const int col, + const int width, const int height, + const float kernel[5][5]) +{ + float sum = 0.0f; + for(int dr = -2; dr <= 2; dr++) + { + const int r = row + dr; + if(r < 0 || r >= height) continue; + for(int dc = -2; dc <= 2; dc++) + { + const int c = col + dc; + if(c < 0 || c >= width) continue; + sum += buf[(size_t)r * width + c] * kernel[dr + 2][dc + 2]; + } + } + return sum; +} + +DT_OMP_DECLARE_SIMD(aligned(in, out : 64)) +static void menon_demosaic(float *const restrict out, + const float *const restrict in, + const int width, + const int height, + const uint32_t filters) +{ + const size_t numpix = (size_t)width * height; + + /* ---- Allocate working buffers ---- */ + float *const restrict CFA = dt_alloc_align_float(numpix); + float *const restrict R = dt_alloc_align_float(numpix); + float *const restrict G = dt_alloc_align_float(numpix); + float *const restrict B = dt_alloc_align_float(numpix); + float *const restrict G_H = dt_alloc_align_float(numpix); + float *const restrict G_V = dt_alloc_align_float(numpix); + float *const restrict C_H = dt_alloc_align_float(numpix); + float *const restrict C_V = dt_alloc_align_float(numpix); + float *const restrict D_H = dt_alloc_align_float(numpix); + float *const restrict D_V = dt_alloc_align_float(numpix); + uint8_t *const restrict M = dt_alloc_aligned(numpix); // direction mask: 1=horizontal, 0=vertical + + if(!CFA || !R || !G || !B || !G_H || !G_V || !C_H || !C_V || !D_H || !D_V || !M) + { + dt_free_align(CFA); dt_free_align(R); dt_free_align(G); dt_free_align(B); + dt_free_align(G_H); dt_free_align(G_V); + dt_free_align(C_H); dt_free_align(C_V); + dt_free_align(D_H); dt_free_align(D_V); + dt_free_align(M); + // fallback: zero output + memset(out, 0, sizeof(float) * numpix * 4); + return; + } + + /* ---- Filter kernels ---- */ + const float h_0[5] = { 0.0f, 0.5f, 0.0f, 0.5f, 0.0f }; + const float h_1[5] = { -0.25f, 0.0f, 0.5f, 0.0f, -0.25f }; + const float k_b[3] = { 0.5f, 0.0f, 0.5f }; + const float FIR[3] = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f }; + + /* Directional classifier kernel (5x5) for horizontal. + scipy.ndimage.convolve flips the kernel (true convolution), but our + _menon_cnv_2d does correlation, so we store the 180-degree-rotated + version of the original kernel k from the paper / reference code. */ + const float clf_h[5][5] = { + { 1, 0, 1, 0, 0 }, + { 0, 1, 0, 0, 0 }, + { 3, 0, 3, 0, 0 }, + { 0, 1, 0, 0, 0 }, + { 1, 0, 1, 0, 0 } + }; + /* Transposed kernel for vertical (also rotated 180) */ + const float clf_v[5][5] = { + { 1, 0, 3, 0, 1 }, + { 0, 1, 0, 1, 0 }, + { 1, 0, 3, 0, 1 }, + { 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0 } + }; + + /* ---- Step 1: Extract CFA and initial R, G, B channels ---- */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(in, CFA, R, G, B, width, height, filters, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const float val = in[idx]; // raw data is 1 channel per pixel for Bayer + CFA[idx] = val; + const int fc = FC(row, col, filters); + R[idx] = (fc == 0) ? val : 0.0f; + G[idx] = (fc == 1) ? val : 0.0f; + B[idx] = (fc == 2) ? val : 0.0f; + } + + /* ---- Step 2: Tentative horizontal and vertical green estimates ---- */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(CFA, G, G_H, G_V, width, height, filters, h_0, h_1, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + if(fc == 1) + { + // Green pixel: keep original + G_H[idx] = G[idx]; + G_V[idx] = G[idx]; + } + else + { + // Non-green pixel: interpolate green directionally + G_H[idx] = _menon_cnv_h5(CFA, row, col, width, height, h_0) + + _menon_cnv_h5(CFA, row, col, width, height, h_1); + G_V[idx] = _menon_cnv_v5(CFA, row, col, width, height, h_0) + + _menon_cnv_v5(CFA, row, col, width, height, h_1); + } + } + + /* ---- Step 3: Compute color differences for classification ---- */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, B, G_H, G_V, C_H, C_V, width, height, filters, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + if(fc == 0) // Red pixel + { + C_H[idx] = R[idx] - G_H[idx]; + C_V[idx] = R[idx] - G_V[idx]; + } + else if(fc == 2) // Blue pixel + { + C_H[idx] = B[idx] - G_H[idx]; + C_V[idx] = B[idx] - G_V[idx]; + } + else + { + C_H[idx] = 0.0f; + C_V[idx] = 0.0f; + } + } + + /* ---- Step 4: Compute directional gradients D_H, D_V ---- */ + /* D_H[row][col] = |C_H[row][col] - C_H[row][col+2]| (forward shift by 2) + D_V[row][col] = |C_V[row][col] - C_V[row+2][col]| (forward shift by 2) + Boundary: reflect (numpy pad mode='reflect') */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(C_H, C_V, D_H, D_V, width, height, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + + // Horizontal gradient with reflect padding + { + int c2 = col + 2; + if(c2 >= width) c2 = 2 * (width - 1) - c2; // reflect + D_H[idx] = fabsf(C_H[idx] - C_H[(size_t)row * width + c2]); + } + // Vertical gradient with reflect padding + { + int r2 = row + 2; + if(r2 >= height) r2 = 2 * (height - 1) - r2; // reflect + D_V[idx] = fabsf(C_V[idx] - C_V[(size_t)r2 * width + col]); + } + } + + /* ---- Step 5: Classify direction using 5x5 weighted sum ---- */ + /* d_H = convolve2d(D_H, clf_h), d_V = convolve2d(D_V, clf_v) */ + /* Step 6: Choose direction: if d_V >= d_H => horizontal (M=1), else vertical (M=0) */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(D_H, D_V, G_H, G_V, G, M, width, height, clf_h, clf_v, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const float d_h = _menon_cnv_2d(D_H, row, col, width, height, clf_h); + const float d_v = _menon_cnv_2d(D_V, row, col, width, height, clf_v); + if(d_v >= d_h) + { + M[idx] = 1; // choose horizontal + G[idx] = G_H[idx]; + } + else + { + M[idx] = 0; // choose vertical + G[idx] = G_V[idx]; + } + } + + /* ---- Step 7: Red/Blue channel interpolation ---- */ + /* We need row masks: R_r (row contains red), B_r (row contains blue). + For standard RGGB: row 0 has R (even rows), row 1 has B (odd rows). + We use FC to determine this per-row. */ + + /* 7a: At green pixels on red rows, interpolate R horizontally */ + /* 7b: At green pixels on blue rows, interpolate R vertically */ + /* 7c: At green pixels on blue rows, interpolate B horizontally */ + /* 7d: At green pixels on red rows, interpolate B vertically */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, B, width, height, filters, k_b, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + + if(fc != 1) continue; // only process green pixels + + // Determine if this row is a "red row" or "blue row" + // A "red row" has red pixels on it; a "blue row" has blue pixels + const gboolean red_row = (FC(row, 0, filters) == 0) || (FC(row, 1, filters) == 0); + + if(red_row) + { + // R at green pixel on red row: interpolate horizontally + R[idx] = G[idx] + + _menon_cnv_h3(R, row, col, width, height, k_b) + - _menon_cnv_h3(G, row, col, width, height, k_b); + // B at green pixel on red row: interpolate vertically + B[idx] = G[idx] + + _menon_cnv_v3(B, row, col, width, height, k_b) + - _menon_cnv_v3(G, row, col, width, height, k_b); + } + else // blue_row + { + // R at green pixel on blue row: interpolate vertically + R[idx] = G[idx] + + _menon_cnv_v3(R, row, col, width, height, k_b) + - _menon_cnv_v3(G, row, col, width, height, k_b); + // B at green pixel on blue row: interpolate horizontally + B[idx] = G[idx] + + _menon_cnv_h3(B, row, col, width, height, k_b) + - _menon_cnv_h3(G, row, col, width, height, k_b); + } + } + + /* 7e: At blue pixels, interpolate R using direction mask M + 7f: At red pixels, interpolate B using direction mask M */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, B, M, width, height, filters, k_b, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + + if(fc == 2) // Blue pixel: interpolate R + { + if(M[idx]) + R[idx] = B[idx] + + _menon_cnv_h3(R, row, col, width, height, k_b) + - _menon_cnv_h3(B, row, col, width, height, k_b); + else + R[idx] = B[idx] + + _menon_cnv_v3(R, row, col, width, height, k_b) + - _menon_cnv_v3(B, row, col, width, height, k_b); + } + else if(fc == 0) // Red pixel: interpolate B + { + if(M[idx]) + B[idx] = R[idx] + + _menon_cnv_h3(B, row, col, width, height, k_b) + - _menon_cnv_h3(R, row, col, width, height, k_b); + else + B[idx] = R[idx] + + _menon_cnv_v3(B, row, col, width, height, k_b) + - _menon_cnv_v3(R, row, col, width, height, k_b); + } + } + + /* ---- Step 8: Refinement ---- */ + + /* 8a: Update green at R/B locations using FIR filter on color differences */ + { + /* Recompute R-G and B-G into temporary buffers (reuse C_H for R_G, C_V for B_G) */ + float *const restrict R_G = C_H; + float *const restrict B_G = C_V; + + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, B, R_G, B_G, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + R_G[idx] = R[idx] - G[idx]; + B_G[idx] = B[idx] - G[idx]; + } + + /* At R pixels: G = R - directional_FIR(R_G) + At B pixels: G = B - directional_FIR(B_G) */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, B, R_G, B_G, M, width, height, filters, FIR, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + if(fc == 0) // Red pixel + { + const float r_g_m = M[idx] + ? _menon_cnv_h3(R_G, row, col, width, height, FIR) + : _menon_cnv_v3(R_G, row, col, width, height, FIR); + G[idx] = R[idx] - r_g_m; + } + else if(fc == 2) // Blue pixel + { + const float b_g_m = M[idx] + ? _menon_cnv_h3(B_G, row, col, width, height, FIR) + : _menon_cnv_v3(B_G, row, col, width, height, FIR); + G[idx] = B[idx] - b_g_m; + } + } + + /* Recompute R_G after green update */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, R_G, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + R_G[idx] = R[idx] - G[idx]; + + /* Recompute B_G after green update */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(B, G, B_G, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + B_G[idx] = B[idx] - G[idx]; + + /* 8b: Update R at green pixels on blue rows (vertical averaging of R-G) + Update R at green pixels on blue columns (horizontal averaging of R-G) + Update B at green pixels on red rows (vertical averaging of B-G) + Update B at green pixels on red columns (horizontal averaging of B-G) */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, G, B, R_G, B_G, width, height, filters, k_b, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + if(fc != 1) continue; // only green pixels + + const gboolean red_row = (FC(row, 0, filters) == 0) || (FC(row, 1, filters) == 0); + const gboolean red_col = (FC(0, col, filters) == 0) || (FC(1, col, filters) == 0); + + if(!red_row) // blue row: update R vertically + R[idx] = G[idx] + _menon_cnv_v3(R_G, row, col, width, height, k_b); + if(!red_col) // blue column: update R horizontally + R[idx] = G[idx] + _menon_cnv_h3(R_G, row, col, width, height, k_b); + + if(red_row) // red row: update B vertically + B[idx] = G[idx] + _menon_cnv_v3(B_G, row, col, width, height, k_b); + if(red_col) // red column: update B horizontally + B[idx] = G[idx] + _menon_cnv_h3(B_G, row, col, width, height, k_b); + } + + /* 8c: Update R at blue pixels and B at red pixels using R-B differences */ + /* Compute R-B into D_H (reuse buffer) */ + float *const restrict R_B = D_H; + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, B, R_B, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + R_B[idx] = R[idx] - B[idx]; + + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(R, B, R_B, M, width, height, filters, FIR, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + const int row = idx / width; + const int col = idx % width; + const int fc = FC(row, col, filters); + if(fc == 2) // Blue pixel: update R from R-B + { + const float r_b_m = M[idx] + ? _menon_cnv_h3(R_B, row, col, width, height, FIR) + : _menon_cnv_v3(R_B, row, col, width, height, FIR); + R[idx] = B[idx] + r_b_m; + } + else if(fc == 0) // Red pixel: update B from R-B + { + const float r_b_m = M[idx] + ? _menon_cnv_h3(R_B, row, col, width, height, FIR) + : _menon_cnv_v3(R_B, row, col, width, height, FIR); + B[idx] = R[idx] - r_b_m; + } + } + } + + /* ---- Step 9: Write output in planar RGBX format ---- */ + DT_OMP_PRAGMA(parallel for schedule(static) default(none) + dt_omp_firstprivate(out, R, G, B, numpix)) + for(size_t idx = 0; idx < numpix; idx++) + { + out[idx * 4 + 0] = R[idx]; + out[idx * 4 + 1] = G[idx]; + out[idx * 4 + 2] = B[idx]; + out[idx * 4 + 3] = 0.0f; + } + + /* ---- Border handling: simple bilinear for border pixels ---- */ + for(int row = 0; row < height; row++) + for(int col = 0; col < width; col++) + { + if(row == MENON_BORDER && col >= MENON_BORDER && col < width - MENON_BORDER) + { + col = width - MENON_BORDER - 1; + continue; + } + if(row >= MENON_BORDER && row < height - MENON_BORDER + && col == MENON_BORDER) + { + col = width - MENON_BORDER - 1; + continue; + } + + float sum[8] = { 0.0f }; + for(int y = row - 1; y <= row + 1; y++) + for(int x = col - 1; x <= col + 1; x++) + { + if(y >= 0 && x >= 0 && y < height && x < width) + { + const int f = FC(y, x, filters); + sum[f] += in[(size_t)y * width + x]; + sum[f + 4]++; + } + } + const int f = FC(row, col, filters); + const size_t oidx = ((size_t)row * width + col) * 4; + for_each_channel(c) + { + if(c == (size_t)f) + out[oidx + c] = in[(size_t)row * width + col]; + else + out[oidx + c] = sum[c] / MAX(1.0f, sum[c + 4]); + } + out[oidx + 3] = 0.0f; + } + + /* ---- Cleanup ---- */ + dt_free_align(CFA); + dt_free_align(R); + dt_free_align(G); + dt_free_align(B); + dt_free_align(G_H); + dt_free_align(G_V); + dt_free_align(C_H); + dt_free_align(C_V); + dt_free_align(D_H); + dt_free_align(D_V); + dt_free_align(M); +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on From 1806f96c86a266bcedd93207b4dba8254ccbc3eb Mon Sep 17 00:00:00 2001 From: dregsist Date: Mon, 13 Apr 2026 00:45:12 +0900 Subject: [PATCH 2/5] demosaic: clamp negative Bayer values in Menon demosaicing rawprepare can produce negative pixel values; clamp to zero at the start of Menon processing, consistent with how RCD handles this case. Co-Authored-By: Claude Sonnet 4.6 --- src/iop/demosaicing/menon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iop/demosaicing/menon.c b/src/iop/demosaicing/menon.c index 175a19afc6a5..69a30dc7d7cd 100644 --- a/src/iop/demosaicing/menon.c +++ b/src/iop/demosaicing/menon.c @@ -182,7 +182,7 @@ static void menon_demosaic(float *const restrict out, { const int row = idx / width; const int col = idx % width; - const float val = in[idx]; // raw data is 1 channel per pixel for Bayer + const float val = fmaxf(in[idx], 0.0f); // clamp negative values from rawprepare CFA[idx] = val; const int fc = FC(row, col, filters); R[idx] = (fc == 0) ? val : 0.0f; From 7bb5b6a2d1e4365bfc0d4a2a79c633faf30e320d Mon Sep 17 00:00:00 2001 From: dregsist Date: Mon, 13 Apr 2026 00:48:39 +0900 Subject: [PATCH 3/5] demosaic: clamp negative Bayer values in ARI demosaicing rawprepare can produce negative pixel values. Create a clamped copy of the input at the entry of ari_demosaic so that all gradient and residual computations (which depend on consistent non-negative data) see a uniform view of the CFA. Mirrors the approach used in RCD. Co-Authored-By: Claude Sonnet 4.6 --- src/iop/demosaicing/ari.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/iop/demosaicing/ari.c b/src/iop/demosaicing/ari.c index 5638d51edf12..4a99aa19eec3 100644 --- a/src/iop/demosaicing/ari.c +++ b/src/iop/demosaicing/ari.c @@ -470,6 +470,13 @@ static void ari_demosaic(float *const restrict out, { const size_t npix = (size_t)width * height; + /* Clamp negative Bayer values (can occur after rawprepare) so all + * subsequent gradient and residual computations see consistent data. */ + float *cfa = dt_alloc_align_float(npix); + if(!cfa) { memset(out, 0, sizeof(float) * npix * 4); return; } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) cfa[i] = fmaxf(in[i], 0.0f); + /* === Quality 1: MLRI only, no selection === */ if(quality <= 1) { @@ -479,18 +486,20 @@ static void ari_demosaic(float *const restrict out, if(!green || !dr || !db) { dt_free_align(green); dt_free_align(dr); dt_free_align(db); + dt_free_align(cfa); memset(out, 0, sizeof(float) * npix * 4); return; } - _ari_green_mlri(green, in, width, height, filters); - _ari_interpolate_cd(dr, db, in, green, width, height, filters); + _ari_green_mlri(green, cfa, width, height, filters); + _ari_interpolate_cd(dr, db, cfa, green, width, height, filters); _ari_write_rgb(out, green, dr, db, width, height); dt_free_align(green); dt_free_align(dr); dt_free_align(db); + dt_free_align(cfa); return; } /* === Quality 2-3: adaptive selection === */ - const float noise_sigma = _ari_estimate_noise(in, width, height, filters); + const float noise_sigma = _ari_estimate_noise(cfa, width, height, filters); const float gf_eps = noise_sigma * noise_sigma * 4.0f; /* Candidate 0: MLRI (sharpest) */ @@ -511,17 +520,18 @@ static void ari_demosaic(float *const restrict out, dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); dt_free_align(dr_gfri); dt_free_align(db_gfri); + dt_free_align(cfa); memset(out, 0, sizeof(float) * npix * 4); return; } /* Generate MLRI candidate */ - _ari_green_mlri(green_mlri, in, width, height, filters); - _ari_interpolate_cd(dr_mlri, db_mlri, in, green_mlri, width, height, filters); + _ari_green_mlri(green_mlri, cfa, width, height, filters); + _ari_interpolate_cd(dr_mlri, db_mlri, cfa, green_mlri, width, height, filters); /* Generate RI candidate (HA green + bilinear CD) */ - _ari_green_ha(green_ri, in, width, height, filters); - _ari_interpolate_cd(dr_ri, db_ri, in, green_ri, width, height, filters); + _ari_green_ha(green_ri, cfa, width, height, filters); + _ari_interpolate_cd(dr_ri, db_ri, cfa, green_ri, width, height, filters); /* Generate GF-RI: guided filter on RI color differences */ _ari_guided_filter_ch(dr_ri, green_ri, dr_gfri, width, height, ARI_GF_RADIUS, gf_eps); @@ -550,4 +560,5 @@ static void ari_demosaic(float *const restrict out, dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); dt_free_align(dr_gfri); dt_free_align(db_gfri); + dt_free_align(cfa); } From 3ebae5b6c11203a420b4ece296b7f1c79e98dbd6 Mon Sep 17 00:00:00 2001 From: dregsist Date: Mon, 13 Apr 2026 09:03:02 +0900 Subject: [PATCH 4/5] demosaic: fix style and copyright header per review feedback - Update copyright year to 2026 in menon.c - Replace custom header in ari.c with standard darktable GPL header - Split multi-parameter function signatures to one parameter per line in both ari.c and menon.c (style requirement) Co-Authored-By: Claude Sonnet 4.6 --- src/iop/demosaicing/ari.c | 59 ++++++++++++++++++++++--------------- src/iop/demosaicing/menon.c | 32 +++++++++++++------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/iop/demosaicing/ari.c b/src/iop/demosaicing/ari.c index 4a99aa19eec3..91eeb75c85b0 100644 --- a/src/iop/demosaicing/ari.c +++ b/src/iop/demosaicing/ari.c @@ -1,18 +1,19 @@ /* - ARI (Adaptive Residual Interpolation) Custom Demosaic - - Based on Monno et al. 2017 "Adaptive Residual Interpolation for Color - and Multispectral Image Demosaicking" with customizations: - - Three candidate generators: MLRI, RI, GF-RI - - Per-pixel adaptive selection based on residual variance - - Automatic noise estimation for adaptive threshold - - References: - - Kiku et al. 2013 "Residual Interpolation for Color Image Demosaicking" - - Kiku et al. 2014 "Minimized-Laplacian Residual Interpolation..." - - Monno et al. 2017 "Adaptive Residual Interpolation..." - - Hamilton & Adams 1997 "Adaptive color plan interpolation..." - - He et al. 2013 "Guided Image Filtering" + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . */ #define ARI_BORDER 10 @@ -28,7 +29,8 @@ static int _ari_compare_float(const void *a, const void *b) /* ===== Noise estimation from dark pixels ===== */ static float _ari_estimate_noise(const float *const restrict in, - const int width, const int height, + const int width, + const int height, const uint32_t filters) { /* Find 10th percentile intensity */ @@ -100,7 +102,8 @@ static float _ari_estimate_noise(const float *const restrict in, /* ===== Box filter via integral image ===== */ static void _ari_box_filter(const float *const restrict src, float *const restrict dst, - const int width, const int height, + const int width, + const int height, const int radius) { double *integral = calloc((size_t)(width + 1) * (height + 1), sizeof(double)); @@ -138,8 +141,10 @@ static void _ari_box_filter(const float *const restrict src, static void _ari_guided_filter_ch(const float *const restrict input, const float *const restrict guide, float *const restrict output, - const int width, const int height, - const int radius, const float eps) + const int width, + const int height, + const int radius, + const float eps) { const size_t npix = (size_t)width * height; @@ -204,7 +209,8 @@ static void _ari_guided_filter_ch(const float *const restrict input, /* ===== Green interpolation: Hamilton-Adams (for RI) ===== */ static void _ari_green_ha(float *const restrict green, const float *const restrict in, - const int width, const int height, + const int width, + const int height, const uint32_t filters) { /* Copy known G values, zero non-G */ @@ -241,7 +247,8 @@ static void _ari_green_ha(float *const restrict green, /* ===== Green interpolation: MLRI (Laplacian energy direction) ===== */ static void _ari_green_mlri(float *const restrict green, const float *const restrict in, - const int width, const int height, + const int width, + const int height, const uint32_t filters) { /* Copy known G values */ @@ -307,7 +314,8 @@ static void _ari_interpolate_cd(float *const restrict diff_r, float *const restrict diff_b, const float *const restrict in, const float *const restrict green, - const int width, const int height, + const int width, + const int height, const uint32_t filters) { const size_t npix = (size_t)width * height; @@ -381,7 +389,8 @@ static void _ari_write_rgb(float *const restrict out, const float *const restrict green, const float *const restrict diff_r, const float *const restrict diff_b, - const int width, const int height) + const int width, + const int height) { DT_OMP_FOR() for(int y = 0; y < height; y++) @@ -406,7 +415,8 @@ static void _ari_adaptive_select(float *const restrict out, float *const *const diff_rs, float *const *const diff_bs, const int n_candidates, - const int width, const int height, + const int width, + const int height, const float noise_thr) { const int r = ARI_SEL_RADIUS; @@ -464,7 +474,8 @@ static void _ari_adaptive_select(float *const restrict out, /* ===== Main entry point ===== */ static void ari_demosaic(float *const restrict out, const float *const restrict in, - const int width, const int height, + const int width, + const int height, const uint32_t filters, const int quality) { diff --git a/src/iop/demosaicing/menon.c b/src/iop/demosaicing/menon.c index 69a30dc7d7cd..bf67797c2074 100644 --- a/src/iop/demosaicing/menon.c +++ b/src/iop/demosaicing/menon.c @@ -1,6 +1,6 @@ /* This file is part of darktable, - Copyright (C) 2024-2025 darktable developers. + Copyright (C) 2026 darktable developers. darktable is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,8 +27,10 @@ /* Helper: mirror-boundary 1D horizontal convolution with a 5-tap kernel. kernel[] is indexed [-2..+2] stored as kernel[0..4]. */ static inline float _menon_cnv_h5(const float *const restrict buf, - const int row, const int col, - const int width, const int height, + const int row, + const int col, + const int width, + const int height, const float *const restrict kernel) { float sum = 0.0f; @@ -44,8 +46,10 @@ static inline float _menon_cnv_h5(const float *const restrict buf, /* Helper: mirror-boundary 1D vertical convolution with a 5-tap kernel. */ static inline float _menon_cnv_v5(const float *const restrict buf, - const int row, const int col, - const int width, const int height, + const int row, + const int col, + const int width, + const int height, const float *const restrict kernel) { float sum = 0.0f; @@ -62,8 +66,10 @@ static inline float _menon_cnv_v5(const float *const restrict buf, /* Helper: mirror-boundary 1D horizontal convolution with a 3-tap kernel. kernel[] is indexed [-1..+1] stored as kernel[0..2]. */ static inline float _menon_cnv_h3(const float *const restrict buf, - const int row, const int col, - const int width, const int height, + const int row, + const int col, + const int width, + const int height, const float *const restrict kernel) { float sum = 0.0f; @@ -79,8 +85,10 @@ static inline float _menon_cnv_h3(const float *const restrict buf, /* Helper: mirror-boundary 1D vertical convolution with a 3-tap kernel. */ static inline float _menon_cnv_v3(const float *const restrict buf, - const int row, const int col, - const int width, const int height, + const int row, + const int col, + const int width, + const int height, const float *const restrict kernel) { float sum = 0.0f; @@ -96,8 +104,10 @@ static inline float _menon_cnv_v3(const float *const restrict buf, /* Helper: 5x5 2D convolution with constant (zero-pad) boundary for the classifier kernel. */ static inline float _menon_cnv_2d(const float *const restrict buf, - const int row, const int col, - const int width, const int height, + const int row, + const int col, + const int width, + const int height, const float kernel[5][5]) { float sum = 0.0f; From de30883f714c8e85280ad2d82954e168f238a70d Mon Sep 17 00:00:00 2001 From: dregsist Date: Sun, 19 Apr 2026 22:27:44 +0900 Subject: [PATCH 5/5] demosaic: rewrite ARI to paper-exact, optimize; honest about outcome The previous ARI implementation had quality issues (color fringing, maze patterns on mire1) because it cut too many corners in the adaptive selection heuristic. This rewrites ARI to match the Monno 2015 paper line-by-line (structurally equivalent to the authors' MATLAB reference, within 0.002 dB CPSNR of a Python port). Quality is indeed better: - Kodak low-ISO CPSNR 39.94 dB, +0.8 dB over amaze - Chroma zipper smallest across all tested methods and ISOs - SIDD real raw iso_clean: +0.38 dB over amaze+dual However, the paper-exact pipeline is slow: - 30-80x slower than existing methods - 53s per 15 MP patch at 16 threads (standalone) - 113s through darktable pipeline on mire1.cr2 - Memory-bandwidth bound; OpenCL is unlikely to reach practical speeds Optimizations applied (all preserve paper-exact output): - Rolling sum box filter replacing double integral image - Row-major vertical pass for cache-line reuse on wide images - Paired GF sum sharing (~33 % box-call reduction) - Separable Gaussian 5x5 for criterion smoothing - Separable bicubic 7x7 for R/B residual upsample - Removed redundant refined-estimate recomputation in RI-H - Criterion-side adjacent-loop fusion Menon (2007) is separately included as a reference implementation. It is fast (~amaze speed), but benchmarking across Kodak-24 + SIDD-medium did not show a clear advantage over amaze - chroma zipper is similar or worse (especially cz_peak), and CPSNR is within 0.1 dB. Both algorithms looked strong on paper; extensive benchmarking showed neither clearly beats the existing methods across the tested conditions. I'll leave the merge decision to maintainers. The benchmark numbers themselves should be useful regardless; full writeup + reproducible scripts + Python reference ported from the original MATLAB are available in the sibling darktable-rawforge/demosaic_test/ directory (BENCHMARK.md). menon.c: keep the unused FIR/k_b locals (they're referenced via OpenMP firstprivate clauses) to fix -Werror=unused-variable with GCC 15. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/iop/demosaic.c | 48 +- src/iop/demosaicing/ari.c | 2153 ++++++++++++++++++++++++++++------- src/iop/demosaicing/menon.c | 502 ++++---- 3 files changed, 2087 insertions(+), 616 deletions(-) diff --git a/src/iop/demosaic.c b/src/iop/demosaic.c index 36e4b2e2230a..6c6b16c646a0 100644 --- a/src/iop/demosaic.c +++ b/src/iop/demosaic.c @@ -47,7 +47,7 @@ #include #include -DT_MODULE_INTROSPECTION(7, dt_iop_demosaic_params_t) +DT_MODULE_INTROSPECTION(8, dt_iop_demosaic_params_t) #define DT_DEMOSAIC_XTRANS 1024 // masks for non-Bayer demosaic ops #define DT_DEMOSAIC_DUAL 2048 // masks for dual demosaicing methods @@ -168,6 +168,7 @@ typedef struct dt_iop_demosaic_params_t float cs_center; // $MIN: 0.0 $MAX: 1.0 $DEFAULT: 0.0 $DESCRIPTION: "sharp center" gboolean cs_enabled; // $DEFAULT: FALSE $DESCRIPTION: "capture sharpen" dt_iop_demosaic_ari_quality_t ari_quality; // $DEFAULT: DT_ARI_QUALITY_BALANCED $DESCRIPTION: "ARI quality" + gboolean menon_refine; // $DEFAULT: TRUE $DESCRIPTION: "Menon refining step" } dt_iop_demosaic_params_t; typedef struct dt_iop_demosaic_gui_data_t @@ -182,6 +183,7 @@ typedef struct dt_iop_demosaic_gui_data_t GtkWidget *dual_thrs; GtkWidget *lmmse_refine; GtkWidget *ari_quality; + GtkWidget *menon_refine; GtkWidget *cs_thrs; GtkWidget *cs_radius; GtkWidget *cs_boost; @@ -273,6 +275,7 @@ typedef struct dt_iop_demosaic_data_t float cs_center; gboolean cs_enabled; dt_iop_demosaic_ari_quality_t ari_quality; + gboolean menon_refine; } dt_iop_demosaic_data_t; static gboolean _get_thumb_quality(const int width, const int height) @@ -480,19 +483,49 @@ int legacy_params(dt_iop_module_t *self, return 0; } + typedef struct dt_iop_demosaic_params_v7_t + { + dt_iop_demosaic_greeneq_t green_eq; + float median_thrs; + dt_iop_demosaic_smooth_t color_smoothing; + dt_iop_demosaic_method_t demosaicing_method; + dt_iop_demosaic_lmmse_t lmmse_refine; + float dual_thrs; + float cs_radius; + float cs_thrs; + float cs_boost; + int cs_iter; + float cs_center; + gboolean cs_enabled; + dt_iop_demosaic_ari_quality_t ari_quality; + } dt_iop_demosaic_params_v7_t; + if(old_version == 6) { const dt_iop_demosaic_params_v6_t *o = (dt_iop_demosaic_params_v6_t *)old_params; - dt_iop_demosaic_params_t *n = malloc(sizeof(dt_iop_demosaic_params_t)); + dt_iop_demosaic_params_v7_t *n = malloc(sizeof(dt_iop_demosaic_params_v7_t)); memcpy(n, o, sizeof *o); n->ari_quality = DT_ARI_QUALITY_BALANCED; *new_params = n; - *new_params_size = sizeof(dt_iop_demosaic_params_t); + *new_params_size = sizeof(dt_iop_demosaic_params_v7_t); *new_version = 7; return 0; } + if(old_version == 7) + { + const dt_iop_demosaic_params_v7_t *o = (dt_iop_demosaic_params_v7_t *)old_params; + dt_iop_demosaic_params_t *n = malloc(sizeof(dt_iop_demosaic_params_t)); + memcpy(n, o, sizeof *o); + n->menon_refine = TRUE; // default: always refine (preserves previous behavior) + + *new_params = n; + *new_params_size = sizeof(dt_iop_demosaic_params_t); + *new_version = 8; + return 0; + } + return 1; } @@ -905,7 +938,7 @@ void process(dt_iop_module_t *self, } } else if(method == DT_IOP_DEMOSAIC_MENON) - menon_demosaic(t_out, t_in, width, t_rows, filters); + menon_demosaic(t_out, t_in, width, t_rows, filters, d->menon_refine); else if(method == DT_IOP_DEMOSAIC_ARI) ari_demosaic(t_out, t_in, width, t_rows, filters, d->ari_quality); else if(method == DT_IOP_DEMOSAIC_RCD) @@ -1417,6 +1450,7 @@ void commit_params(dt_iop_module_t *self, d->dual_thrs = p->dual_thrs; d->lmmse_refine = p->lmmse_refine; d->ari_quality = p->ari_quality; + d->menon_refine = p->menon_refine; dt_iop_demosaic_method_t use_method = p->demosaicing_method; d->cs_radius = p->cs_radius; d->cs_thrs = p->cs_thrs; @@ -1631,6 +1665,7 @@ void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous) gtk_widget_set_visible(g->dual_thrs, isdual); gtk_widget_set_visible(g->lmmse_refine, islmmse); gtk_widget_set_visible(g->ari_quality, use_method == DT_IOP_DEMOSAIC_ARI); + gtk_widget_set_visible(g->menon_refine, use_method == DT_IOP_DEMOSAIC_MENON); const gboolean monomode = use_method == DT_IOP_DEMOSAIC_PASSTHROUGH_MONOCHROME || use_method == DT_IOP_DEMOSAIC_PASSTHR_MONOX; @@ -1827,6 +1862,11 @@ void gui_init(dt_iop_module_t *self) "balanced: MLRI + guided-filter RI adaptive selection\n" "quality: full 3-candidate adaptive selection")); + g->menon_refine = dt_bauhaus_toggle_from_params(self, "menon_refine"); + gtk_widget_set_tooltip_text(g->menon_refine, + _("Menon refining step: iterative refinement of G, R, and B channels.\n" + "Improves quality at the cost of slightly more processing time.")); + g->color_smoothing = dt_bauhaus_combobox_from_params(self, "color_smoothing"); gtk_widget_set_tooltip_text(g->color_smoothing, _("how many color smoothing median steps after demosaicing")); diff --git a/src/iop/demosaicing/ari.c b/src/iop/demosaicing/ari.c index 91eeb75c85b0..d744e8a6b374 100644 --- a/src/iop/demosaicing/ari.c +++ b/src/iop/demosaicing/ari.c @@ -14,464 +14,1798 @@ You should have received a copy of the GNU General Public License along with darktable. If not, see . -*/ -#define ARI_BORDER 10 -#define ARI_GF_RADIUS 2 -#define ARI_SEL_RADIUS 2 + Adaptive Residual Interpolation (ARI) demosaicing, following: + Y. Monno, D. Kiku, M. Tanaka, M. Okutomi, + "Adaptive Residual Interpolation for Color and Multispectral Image + Demosaicking", IEEE ICIP 2015 (and the extended Sensors 2017 paper). + The implementation is a direct C port of the authors' MATLAB reference + code (ok.sc.e.titech.ac.jp/res/DM/ARI.zip), keeping the same 11-iteration + per-pixel argmin selection and the error-weighted a/b smoothing inside + the guided filters that is the paper's key contribution. +*/ -/* ===== float comparator for qsort ===== */ -static int _ari_compare_float(const void *a, const void *b) +#define ARI_K_MAX 11 /* paper default iteration count */ +#define ARI_EPS 1e-10f + +/* ---------- rectangular box filter via 2-pass separable rolling sum ---------- + * Replaces integral-image approach for better cache behavior on large images. + * + * Pass 1 horizontal: for each row, sliding-window sum over [x-rh, x+rh] with + * replicate-boundary. Output to `tmp` (float). + * Pass 2 vertical: for each column, sliding sum over [y-rv, y+rv] reading + * tmp and writing dst (float). Column strides are handled + * per-column; the acc is a double register for precision. + * + * Memory BW per call: src read + tmp write + tmp read + dst write + * ~ 4 passes × npix × 4 B (vs integral's ~15 passes × 8 B). + * Working set per tile naturally fits L2/L3 since we only need 2 float buffers. + * + * `integral` is the scratch buffer previously used for the double integral + * image; we reinterpret it as a float temp big enough for npix floats + * (it's allocated as (W+1)*(H+1) doubles, i.e. >> W*H floats). */ +static void _ari_box_sum_rect(const float *const restrict src, + float *const restrict dst, + const int width, + const int height, + const int rh, + const int rv, + double *const restrict integral) { - const float fa = *(const float *)a, fb = *(const float *)b; - return (fa > fb) - (fa < fb); -} + float *const restrict tmp = (float *)integral; /* enough space */ + const int W1 = width - 1; + const int H1 = height - 1; -/* ===== Noise estimation from dark pixels ===== */ -static float _ari_estimate_noise(const float *const restrict in, - const int width, - const int height, - const uint32_t filters) -{ - /* Find 10th percentile intensity */ - float vmin = FLT_MAX, vmax = 0.0f; - for(int y = 0; y < height; y += 8) - for(int x = 0; x < width; x += 8) + /* Pass 1: horizontal rolling sum, src -> tmp. */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + { + const float *const restrict sr = &src[(size_t)y * width]; + float *const restrict dr = &tmp[(size_t)y * width]; + double acc = 0.0; + for(int k = -rh; k <= rh; k++) { - const float v = in[(size_t)y * width + x]; - if(v > 1e-8f && v < vmin) vmin = v; - if(v > vmax) vmax = v; + const int xx = k < 0 ? 0 : (k > W1 ? W1 : k); + acc += (double)sr[xx]; } - if(vmax <= vmin) return 0.001f; + dr[0] = (float)acc; + for(int x = 1; x < width; x++) + { + const int drop = x - 1 - rh; + const int add = x + rh; + const float v_drop = sr[drop < 0 ? 0 : drop]; + const float v_add = sr[add > W1 ? W1 : add]; + acc += (double)v_add - (double)v_drop; + dr[x] = (float)acc; + } + } - /* Histogram for percentile */ -#define ARI_NOISE_BINS 256 - int hist[ARI_NOISE_BINS]; - memset(hist, 0, sizeof(hist)); - const float hscale = (float)(ARI_NOISE_BINS - 1) / fmaxf(vmax - vmin, 1e-10f); - int total = 0; + /* Pass 2: vertical rolling sum, tmp -> dst. + * Cache-friendly variant: within each thread's column strip, process + * row-by-row (row-major inner loop) so we read/write contiguous memory. + * Previous column-major variant had 21KB stride per element and paid full + * cache-line cost per read. Thread-local `acc` array carries rolling state + * across rows for each of the thread's assigned columns. */ + #pragma omp parallel + { + const int tid = omp_get_thread_num(); + const int nt = omp_get_num_threads(); + const int x_start = (int)(((long long)width * tid) / nt); + const int x_end = (int)(((long long)width * (tid + 1)) / nt); + const int ncols = x_end - x_start; - for(int y = 0; y < height; y += 4) - for(int x = 0; x < width; x += 4) + if(ncols > 0) { - const float v = in[(size_t)y * width + x]; - if(v > 0.0f) + double *const acc = (double *)malloc((size_t)ncols * sizeof(double)); + if(acc) { - hist[CLAMP((int)((v - vmin) * hscale), 0, ARI_NOISE_BINS - 1)]++; - total++; + for(int i = 0; i < ncols; i++) acc[i] = 0.0; + + /* Initial row sum = sum over k=-rv..rv of tmp[clamp(k,0,H1), x]. */ + for(int k = -rv; k <= rv; k++) + { + const int yy = k < 0 ? 0 : (k > H1 ? H1 : k); + const float *const restrict t_row = &tmp[(size_t)yy * width]; + for(int x = x_start; x < x_end; x++) + acc[x - x_start] += (double)t_row[x]; + } + { + float *const restrict d_row = &dst[0]; + for(int x = x_start; x < x_end; x++) + d_row[x] = (float)acc[x - x_start]; + } + + /* Rolling slide for rows y=1..H-1. */ + for(int y = 1; y < height; y++) + { + const int drop = y - 1 - rv; + const int add = y + rv; + const int drop_y = drop < 0 ? 0 : drop; + const int add_y = add > H1 ? H1 : add; + const float *const restrict t_add = &tmp[(size_t)add_y * width]; + const float *const restrict t_drop = &tmp[(size_t)drop_y * width]; + float *const restrict d_row = &dst[(size_t)y * width]; + for(int x = x_start; x < x_end; x++) + { + const double a = acc[x - x_start] + (double)t_add[x] - (double)t_drop[x]; + acc[x - x_start] = a; + d_row[x] = (float)a; + } + } + free(acc); } } - - const int target = total / 10; - int cumsum = 0; - float p10 = vmin; - for(int b = 0; b < ARI_NOISE_BINS; b++) - { - cumsum += hist[b]; - if(cumsum >= target) { p10 = vmin + (float)b / hscale; break; } } -#undef ARI_NOISE_BINS - - /* Collect squared same-color differences in dark region */ -#define ARI_MAX_NOISE_SAMPLES 50000 - float *dsq = dt_alloc_align_float(ARI_MAX_NOISE_SAMPLES); - if(!dsq) return 0.001f; +} - int ns = 0; - for(int y = 2; y < height - 2 && ns < ARI_MAX_NOISE_SAMPLES; y++) - for(int x = 2; x < width - 4 && ns < ARI_MAX_NOISE_SAMPLES; x += 2) +/* ---------- utility: apply 1D horizontal kernel ---------- */ +static void _ari_imfilter_h1d(const float *const restrict src, + float *const restrict dst, + const int width, + const int height, + const float *const restrict kernel, + const int taps) +{ + const int half = taps / 2; + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) { - const float v1 = in[(size_t)y * width + x]; - const float v2 = in[(size_t)y * width + x + 2]; - if(v1 > 0.0f && v1 < p10 && v2 > 0.0f && v2 < p10) + float s = 0.0f; + for(int k = 0; k < taps; k++) { - const float d = v2 - v1; - dsq[ns++] = d * d; + int xx = x + k - half; + if(xx < 0) xx = -xx; + else if(xx >= width) xx = 2 * width - 2 - xx; + s += kernel[k] * src[(size_t)y * width + xx]; } + dst[(size_t)y * width + x] = s; } - - if(ns < 100) { dt_free_align(dsq); return 0.001f; } - - qsort(dsq, ns, sizeof(float), _ari_compare_float); - const float sigma = sqrtf(dsq[ns / 2] / 0.9098f); - dt_free_align(dsq); - return fmaxf(sigma, 1e-6f); -#undef ARI_MAX_NOISE_SAMPLES } -/* ===== Box filter via integral image ===== */ -static void _ari_box_filter(const float *const restrict src, - float *const restrict dst, - const int width, - const int height, - const int radius) +static void _ari_imfilter_v1d(const float *const restrict src, + float *const restrict dst, + const int width, + const int height, + const float *const restrict kernel, + const int taps) { - double *integral = calloc((size_t)(width + 1) * (height + 1), sizeof(double)); - if(!integral) { memcpy(dst, src, sizeof(float) * (size_t)width * height); return; } + const int half = taps / 2; + DT_OMP_FOR() + for(int y = 0; y < height; y++) + { + for(int x = 0; x < width; x++) + { + float s = 0.0f; + for(int k = 0; k < taps; k++) + { + int yy = y + k - half; + if(yy < 0) yy = -yy; + else if(yy >= height) yy = 2 * height - 2 - yy; + s += kernel[k] * src[(size_t)yy * width + x]; + } + dst[(size_t)y * width + x] = s; + } + } +} - /* Build integral image */ +/* ---------- 5x5 cross Laplacian (red/blue interpolation) ---------- */ +static void _ari_laplacian_5x5_cross(const float *const restrict src, + float *const restrict dst, + const int width, + const int height) +{ + DT_OMP_FOR() for(int y = 0; y < height; y++) for(int x = 0; x < width; x++) - integral[(size_t)(y + 1) * (width + 1) + (x + 1)] = - (double)src[(size_t)y * width + x] - + integral[(size_t)y * (width + 1) + (x + 1)] - + integral[(size_t)(y + 1) * (width + 1) + x] - - integral[(size_t)y * (width + 1) + x]; + { + const int xl = (x >= 2) ? x - 2 : x + 2; + const int xr = (x < width - 2) ? x + 2 : x - 2; + const int yu = (y >= 2) ? y - 2 : y + 2; + const int yd = (y < height - 2) ? y + 2 : y - 2; + dst[(size_t)y * width + x] = + 4.0f * src[(size_t)y * width + x] + - src[(size_t)y * width + xl] + - src[(size_t)y * width + xr] + - src[(size_t)yu * width + x] + - src[(size_t)yd * width + x]; + } +} - /* Query */ +/* ---------- 2D convolution with an arbitrary kernel (mirror boundary) ---------- */ +__attribute__((unused)) static void _ari_conv2d(const float *const restrict src, + float *const restrict dst, + const int width, + const int height, + const float *const restrict kernel, + const int kh, + const int kw) +{ + const int half_h = kh / 2; + const int half_w = kw / 2; DT_OMP_FOR() for(int y = 0; y < height; y++) - { - const int y0 = MAX(0, y - radius), y1 = MIN(height - 1, y + radius); for(int x = 0; x < width; x++) { - const int x0 = MAX(0, x - radius), x1 = MIN(width - 1, x + radius); - const double sum = integral[(size_t)(y1 + 1) * (width + 1) + (x1 + 1)] - - integral[(size_t)y0 * (width + 1) + (x1 + 1)] - - integral[(size_t)(y1 + 1) * (width + 1) + x0] - + integral[(size_t)y0 * (width + 1) + x0]; - const int cnt = (y1 - y0 + 1) * (x1 - x0 + 1); - dst[(size_t)y * width + x] = (float)(sum / cnt); + float s = 0.0f; + for(int ky = 0; ky < kh; ky++) + { + int yy = y + ky - half_h; + if(yy < 0) yy = -yy; + else if(yy >= height) yy = 2 * height - 2 - yy; + for(int kx = 0; kx < kw; kx++) + { + int xx = x + kx - half_w; + if(xx < 0) xx = -xx; + else if(xx >= width) xx = 2 * width - 2 - xx; + s += kernel[ky * kw + kx] * src[(size_t)yy * width + xx]; + } + } + dst[(size_t)y * width + x] = s; } +} + +/* ---------- ARI's guided filter (Eq. 2) ===================================== + * Port of ARI.zip/guidedfilter.m. Signature mirrors the MATLAB one: + * guidedfilter(I, p, M, rh, rv, eps) + * with I = guide, p = input, M = sample-validity mask for the regression, + * and (rh, rv) the rectangular radius of the support window. + * Differences vs the textbook He et al. guided filter: + * - masked statistics: mean_I = box(I*M)/N where N = box(M); + * - output-phase a,b smoothing is weighted by the per-pixel inverse + * regression residual (the `dif` block below), which is what the + * paper calls "weighted averaging" and is the crucial piece for + * reaching the paper's CPSNR numbers. + */ +__attribute__((unused)) static void _ari_guidedfilter(const float *const restrict gI, + const float *const restrict p, + const float *const restrict M, + float *const restrict out, + const int width, + const int height, + const int rh, + const int rv, + const float eps, + /* workspace buffers, caller-managed */ + float *const restrict work0, + float *const restrict work1, + float *const restrict work2, + float *const restrict work3, + float *const restrict work4, + float *const restrict work5, + float *const restrict work6, + float *const restrict work7, + float *const restrict work8, + float *const restrict work9, + float *const restrict work10, + double *const restrict integral) +{ + const size_t npix = (size_t)width * height; + + /* Cache the pre-division box sums so the dif computation can reuse them + * instead of boxing the same products a second time. */ + float *N = work0; /* box(M) */ + float *sI = work1; /* box(I*M), reused as mean_a late */ + float *sp = work2; /* box(p*M), reused as mean_b late */ + float *sIp = work3; /* box(I*p*M) */ + float *sII = work4; /* box(I*I*M) */ + float *spp = work5; /* box(p*p*M) */ + float *aa = work6; + float *bb = work7; + float *dif = work8; + float *wdif = work9; + float *tmp = work10; + + _ari_box_sum_rect(M, N, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) if(N[i] == 0.0f) N[i] = 1.0f; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = gI[i] * M[i]; + _ari_box_sum_rect(tmp, sI, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = p[i] * M[i]; + _ari_box_sum_rect(tmp, sp, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = gI[i] * p[i] * M[i]; + _ari_box_sum_rect(tmp, sIp, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = gI[i] * gI[i] * M[i]; + _ari_box_sum_rect(tmp, sII, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = p[i] * p[i] * M[i]; + _ari_box_sum_rect(tmp, spp, width, height, rh, rv, integral); + + /* a, b from cached sums (fused). */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float n = N[i]; + const float mI = sI[i] / n; + const float mp = sp[i] / n; + const float cov = sIp[i] / n - mI * mp; + const float var = sII[i] / n - mI * mI; + aa[i] = cov / (var + eps); + bb[i] = mp - aa[i] * mI; + } + + /* dif = E[(aI+b-p)^2 | mask] via cached sums, fused into a single pass. */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float a = aa[i], b = bb[i]; + float d = a*a*sII[i] + b*b*N[i] + spp[i] + + 2.0f*a*b*sI[i] - 2.0f*b*sp[i] - 2.0f*a*sIp[i]; + d /= N[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; + } + + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + + /* mean_a, mean_b via weighted average. Reuse sI/sp buffers. */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, sI /*= mean_a*/, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, sp /*= mean_b*/, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float denom = wdif[i] + 1e-4f; + const float ma = sI[i] / denom; + const float mb = sp[i] / denom; + out[i] = ma * gI[i] + mb; } - free(integral); } -/* ===== Guided filter -- single channel (He et al. 2013) ===== */ -static void _ari_guided_filter_ch(const float *const restrict input, - const float *const restrict guide, - float *const restrict output, - const int width, - const int height, - const int radius, - const float eps) +/* ---------- Paired RI guided filter (shares the 6 pre-aa box sums between + * GF(U,V,M) and GF(V,U,M) since those compute identical box sums + * modulo pointer relabelling). Used for iteration-loop calls where + * (riGrH, riRH) and (riGbH, riBH) etc. form natural swap pairs. */ +static void _ari_gf_pair(const float *const restrict U, + const float *const restrict V, + const float *const restrict M, + float *const restrict out_UV, /* GF(I=U, p=V, M) */ + float *const restrict out_VU, /* GF(I=V, p=U, M) */ + const int width, + const int height, + const int rh, + const int rv, + const float eps, + /* workspace: 13 npix buffers */ + float *const restrict N, /* box(M) */ + float *const restrict sU, /* box(U*M) */ + float *const restrict sV, /* box(V*M) */ + float *const restrict sUV, /* box(U*V*M) */ + float *const restrict sUU, /* box(U*U*M) */ + float *const restrict sVV, /* box(V*V*M) */ + float *const restrict aa, + float *const restrict bb, + float *const restrict dif, + float *const restrict wdif, + float *const restrict tmp, + float *const restrict mean_a, + float *const restrict mean_b, + double *const restrict integral) { const size_t npix = (size_t)width * height; - float *mean_I = dt_alloc_align_float(npix); - float *mean_p = dt_alloc_align_float(npix); - float *mean_Ip = dt_alloc_align_float(npix); - float *mean_II = dt_alloc_align_float(npix); - float *aa = dt_alloc_align_float(npix); - float *bb = dt_alloc_align_float(npix); - float *mean_a = dt_alloc_align_float(npix); - float *mean_b = dt_alloc_align_float(npix); - float *tmp1 = dt_alloc_align_float(npix); - float *tmp2 = dt_alloc_align_float(npix); + /* --- Shared box sums (6): computed once for both calls --- */ + _ari_box_sum_rect(M, N, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) if(N[i] == 0.0f) N[i] = 1.0f; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * M[i]; + _ari_box_sum_rect(tmp, sU, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = V[i] * M[i]; + _ari_box_sum_rect(tmp, sV, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * V[i] * M[i]; + _ari_box_sum_rect(tmp, sUV, width, height, rh, rv, integral); - if(!mean_I || !mean_p || !mean_Ip || !mean_II || !aa || !bb - || !mean_a || !mean_b || !tmp1 || !tmp2) + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * U[i] * M[i]; + _ari_box_sum_rect(tmp, sUU, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = V[i] * V[i] * M[i]; + _ari_box_sum_rect(tmp, sVV, width, height, rh, rv, integral); + + /* --- Call 1: I=U, p=V (sI=sU, sp=sV, sII=sUU, spp=sVV; sIp=sUV) --- */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - memcpy(output, input, sizeof(float) * npix); - goto cleanup_gf; + const float n = N[i]; + const float mI = sU[i] / n; + const float mp = sV[i] / n; + const float cov = sUV[i] / n - mI * mp; + const float var = sUU[i] / n - mI * mI; + aa[i] = cov / (var + eps); + bb[i] = mp - aa[i] * mI; } - /* Products */ DT_OMP_FOR() for(size_t i = 0; i < npix; i++) { - tmp1[i] = guide[i] * input[i]; - tmp2[i] = guide[i] * guide[i]; + const float a = aa[i], b = bb[i]; + float d = a*a*sUU[i] + b*b*N[i] + sVV[i] + + 2.0f*a*b*sU[i] - 2.0f*b*sV[i] - 2.0f*a*sUV[i]; + d /= N[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; } - /* Box-filtered means */ - _ari_box_filter(guide, mean_I, width, height, radius); - _ari_box_filter(input, mean_p, width, height, radius); - _ari_box_filter(tmp1, mean_Ip, width, height, radius); - _ari_box_filter(tmp2, mean_II, width, height, radius); + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, mean_a, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, mean_b, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float denom = wdif[i] + 1e-4f; + out_UV[i] = mean_a[i] / denom * U[i] + mean_b[i] / denom; + } - /* a, b coefficients */ + /* --- Call 2: I=V, p=U (swap: sI=sV, sp=sU, sII=sVV, spp=sUU; sIp=sUV) --- */ DT_OMP_FOR() for(size_t i = 0; i < npix; i++) { - const float cov = mean_Ip[i] - mean_I[i] * mean_p[i]; - const float var = mean_II[i] - mean_I[i] * mean_I[i]; + const float n = N[i]; + const float mI = sV[i] / n; + const float mp = sU[i] / n; + const float cov = sUV[i] / n - mI * mp; + const float var = sVV[i] / n - mI * mI; aa[i] = cov / (var + eps); - bb[i] = mean_p[i] - aa[i] * mean_I[i]; + bb[i] = mp - aa[i] * mI; + } + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float a = aa[i], b = bb[i]; + float d = a*a*sVV[i] + b*b*N[i] + sUU[i] + + 2.0f*a*b*sV[i] - 2.0f*b*sU[i] - 2.0f*a*sUV[i]; + d /= N[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; } - /* Final: mean(a) * guide + mean(b) */ - _ari_box_filter(aa, mean_a, width, height, radius); - _ari_box_filter(bb, mean_b, width, height, radius); + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + + /* mean_a, mean_b for call 2 reuse sU, sV buffers (no longer needed). */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, sU /*= mean_a2*/, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, sV /*= mean_b2*/, width, height, rh, rv, integral); DT_OMP_FOR() for(size_t i = 0; i < npix; i++) - output[i] = mean_a[i] * guide[i] + mean_b[i]; - -cleanup_gf: - dt_free_align(mean_I); dt_free_align(mean_p); - dt_free_align(mean_Ip); dt_free_align(mean_II); - dt_free_align(aa); dt_free_align(bb); - dt_free_align(mean_a); dt_free_align(mean_b); - dt_free_align(tmp1); dt_free_align(tmp2); + { + const float denom = wdif[i] + 1e-4f; + out_VU[i] = sU[i] / denom * V[i] + sV[i] / denom; + } } -/* ===== Green interpolation: Hamilton-Adams (for RI) ===== */ -static void _ari_green_ha(float *const restrict green, - const float *const restrict in, - const int width, - const int height, - const uint32_t filters) +/* ---------- ARI's MLRI guided filter (Kiku 2014) ============================ + * Port of ARI.zip/guidedfilter_MLRI.m. Signature: + * guidedfilter_MLRI(G, R, mask_intensity, I, p, mask_laplace, rh, rv, eps) + * where G, R are the intensity-domain guide and input (used for the b + * offset and the dif weighting), and I, p are the Laplacian-filtered + * guide and input (used to solve the origin-through slope + * a = box(I*p*Mdif) / (box(I*I*Mdif) + eps) ). + */ +static void _ari_guidedfilter_mlri(const float *const restrict G, + const float *const restrict R, + const float *const restrict mask_int, + const float *const restrict gI, + const float *const restrict p, + const float *const restrict mask_lap, + float *const restrict out, + const int width, + const int height, + const int rh, + const int rv, + const float eps, + float *const restrict work0, + float *const restrict work1, + float *const restrict work2, + float *const restrict work3, + float *const restrict work4, + float *const restrict work5, + float *const restrict work6, + float *const restrict work7, + float *const restrict work8, + float *const restrict work9, + float *const restrict work10, + double *const restrict integral) { - /* Copy known G values, zero non-G */ + const size_t npix = (size_t)width * height; + + float *N3 = work0; /* box(mask_int) */ + float *N = work1; /* box(mask_lap) */ + float *aa = work2; + float *bb = work3; + float *sG = work4; /* box(G*mask_int) - kept for dif and mean_G */ + float *sR = work5; /* box(R*mask_int) - kept for dif and mean_R */ + float *sGG = work6; /* box(G*G*mask_int) */ + float *dif = work7; + float *wdif = work8; + float *tmp = work9; + float *aux = work10; + + _ari_box_sum_rect(mask_int, N3, width, height, rh, rv, integral); + _ari_box_sum_rect(mask_lap, N, width, height, rh, rv, integral); DT_OMP_FOR() - for(int y = 0; y < height; y++) - for(int x = 0; x < width; x++) - { - const size_t pos = (size_t)y * width + x; - green[pos] = (FC(y, x, filters) == 1) ? in[pos] : 0.0f; - } + for(size_t i = 0; i < npix; i++) + { + if(N3[i] == 0.0f) N3[i] = 1.0f; + if(N[i] == 0.0f) N[i] = 1.0f; + } - /* Interpolate G at R/B positions (HA: gradient + Laplacian correction) */ + /* a = box(I*p*Mdif) / (box(I*I*Mdif) + eps) (origin-through) */ DT_OMP_FOR() - for(int y = 2; y < height - 2; y++) - for(int x = 2; x < width - 2; x++) - { - if(FC(y, x, filters) == 1) continue; - const size_t p = (size_t)y * width + x; + for(size_t i = 0; i < npix; i++) tmp[i] = gI[i] * p[i] * mask_lap[i]; + _ari_box_sum_rect(tmp, aa, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = gI[i] * gI[i] * mask_lap[i]; + _ari_box_sum_rect(tmp, aux, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float num = aa[i] / N[i]; + const float den = aux[i] / N[i]; + aa[i] = num / (den + eps); + } - const float dH = fabsf(in[p - 2] - in[p]) - + fabsf(in[p - 1] - in[p + 1]); - const float dV = fabsf(in[p - 2 * width] - in[p]) - + fabsf(in[p - width] - in[p + width]); + /* Cached sums over mask_int: sG, sR, sGG computed once and reused in dif. */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = G[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sG, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = R[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sR, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = G[i] * G[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sGG, width, height, rh, rv, integral); - const float Gh = (in[p - 1] + in[p + 1]) * 0.5f - + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; - const float Gv = (in[p - width] + in[p + width]) * 0.5f - + (2.0f * in[p] - in[p - 2 * width] - in[p + 2 * width]) * 0.25f; + /* bb = mean_R - a*mean_G. */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float n = N3[i]; + bb[i] = sR[i] / n - aa[i] * sG[i] / n; + } - green[p] = (dH < dV) ? Gh : (dV < dH) ? Gv : (Gh + Gv) * 0.5f; - } + /* dif accumulator. The aux buffer is reused for sRR and sRG one-shot sums. */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = R[i] * R[i] * mask_int[i]; + _ari_box_sum_rect(tmp, aux /* = sRR */, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float a = aa[i], b = bb[i]; + dif[i] = a*a*sGG[i] + b*b*N3[i] + aux[i] + 2.0f*a*b*sG[i] - 2.0f*b*sR[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = R[i] * G[i] * mask_int[i]; + _ari_box_sum_rect(tmp, aux /* = sRG */, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + float d = (dif[i] - 2.0f * aa[i] * aux[i]) / N3[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; + } + + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + + /* Weighted mean_a, mean_b. Reuse sG as mean_a, sR as mean_b (no longer needed). */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, sG /*= mean_a*/, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, sR /*= mean_b*/, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float denom = wdif[i] + 1e-4f; + const float ma = sG[i] / denom; + const float mb = sR[i] / denom; + out[i] = ma * G[i] + mb; + } } -/* ===== Green interpolation: MLRI (Laplacian energy direction) ===== */ -static void _ari_green_mlri(float *const restrict green, - const float *const restrict in, - const int width, - const int height, - const uint32_t filters) +/* ---------- Paired MLRI guided filter (shares the 6 intensity-mask box sums + * between GF_MLRI(U,V,...) and GF_MLRI(V,U,...). The Laplacian- + * domain sums differ per call (mask_lap, I, p change) and are + * computed separately. Used for (mlRH, mlGrH) etc. pairs. */ +static void _ari_gfmlri_pair(const float *const restrict U, /* intensity guide, call 1 */ + const float *const restrict V, /* intensity input, call 1 */ + const float *const restrict mask_int, + const float *const restrict I1, /* Laplacian guide, call 1 */ + const float *const restrict p1, /* Laplacian input, call 1 */ + const float *const restrict mask_lap1, + const float *const restrict I2, /* Laplacian guide, call 2 (swap) */ + const float *const restrict p2, /* Laplacian input, call 2 */ + const float *const restrict mask_lap2, + float *const restrict out_UV, /* MLRI(G=U, R=V, I=I1, p=p1, mask_lap=mask_lap1) */ + float *const restrict out_VU, /* MLRI(G=V, R=U, I=I2, p=p2, mask_lap=mask_lap2) */ + const int width, + const int height, + const int rh, + const int rv, + const float eps, + /* workspace: 14 npix buffers */ + float *const restrict N3, /* box(mask_int), shared */ + float *const restrict sU, /* box(U*mask_int), shared */ + float *const restrict sV, /* box(V*mask_int), shared */ + float *const restrict sUV, /* box(U*V*mask_int), shared */ + float *const restrict sUU, /* box(U*U*mask_int), shared */ + float *const restrict sVV, /* box(V*V*mask_int), shared */ + float *const restrict N, /* box(mask_lap), per-call */ + float *const restrict aa, + float *const restrict bb, + float *const restrict dif, + float *const restrict wdif, + float *const restrict tmp, + float *const restrict mean_a, + float *const restrict mean_b, + double *const restrict integral) +{ + const size_t npix = (size_t)width * height; + + /* --- Shared intensity-mask box sums (6): computed once for both calls --- */ + _ari_box_sum_rect(mask_int, N3, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) if(N3[i] == 0.0f) N3[i] = 1.0f; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sU, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = V[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sV, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * V[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sUV, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = U[i] * U[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sUU, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = V[i] * V[i] * mask_int[i]; + _ari_box_sum_rect(tmp, sVV, width, height, rh, rv, integral); + + /* --- Call 1: G=U, R=V, I=I1, p=p1, mask_lap=mask_lap1 --- */ + _ari_box_sum_rect(mask_lap1, N, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) if(N[i] == 0.0f) N[i] = 1.0f; + + /* aa = box(I1*p1*mask_lap1) / (box(I1*I1*mask_lap1) + eps*N) */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = I1[i] * p1[i] * mask_lap1[i]; + _ari_box_sum_rect(tmp, aa, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = I1[i] * I1[i] * mask_lap1[i]; + _ari_box_sum_rect(tmp, bb /* = aa_denom temp */, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float num = aa[i] / N[i]; + const float den = bb[i] / N[i]; + aa[i] = num / (den + eps); + bb[i] = sV[i] / N3[i] - aa[i] * sU[i] / N3[i]; + } + + /* dif = a²*sUU + b²*N3 + sVV + 2ab*sU - 2b*sV - 2a*sUV (G=U, R=V) */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float a = aa[i], b = bb[i]; + float d = a*a*sUU[i] + b*b*N3[i] + sVV[i] + + 2.0f*a*b*sU[i] - 2.0f*b*sV[i] - 2.0f*a*sUV[i]; + d /= N3[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; + } + + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, mean_a, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, mean_b, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float denom = wdif[i] + 1e-4f; + out_UV[i] = mean_a[i] / denom * U[i] + mean_b[i] / denom; + } + + /* --- Call 2: G=V, R=U, I=I2, p=p2, mask_lap=mask_lap2 --- */ + _ari_box_sum_rect(mask_lap2, N, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) if(N[i] == 0.0f) N[i] = 1.0f; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = I2[i] * p2[i] * mask_lap2[i]; + _ari_box_sum_rect(tmp, aa, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = I2[i] * I2[i] * mask_lap2[i]; + _ari_box_sum_rect(tmp, bb, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float num = aa[i] / N[i]; + const float den = bb[i] / N[i]; + aa[i] = num / (den + eps); + /* swap: G=V, R=U */ + bb[i] = sU[i] / N3[i] - aa[i] * sV[i] / N3[i]; + } + + /* dif = a²*sVV + b²*N3 + sUU + 2ab*sV - 2b*sU - 2a*sUV (G=V, R=U) */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float a = aa[i], b = bb[i]; + float d = a*a*sVV[i] + b*b*N3[i] + sUU[i] + + 2.0f*a*b*sV[i] - 2.0f*b*sU[i] - 2.0f*a*sUV[i]; + d /= N3[i]; + if(d < 0.0f) d = 0.0f; + d = sqrtf(d); + if(d < 1e-3f) d = 1e-3f; + dif[i] = 1.0f / d; + } + + _ari_box_sum_rect(dif, wdif, width, height, rh, rv, integral); + /* mean_a, mean_b for call 2 reuse sU, sV (shared sums no longer needed). */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = aa[i] * dif[i]; + _ari_box_sum_rect(tmp, sU /*= mean_a2*/, width, height, rh, rv, integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tmp[i] = bb[i] * dif[i]; + _ari_box_sum_rect(tmp, sV /*= mean_b2*/, width, height, rh, rv, integral); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float denom = wdif[i] + 1e-4f; + out_VU[i] = sU[i] / denom * V[i] + sV[i] / denom; + } +} + +/* ---------- Gaussian 5x5 sigma=2 ========================================== + * fspecial('gaussian', [5,5], 2) from MATLAB. Used for smoothing the + * per-iteration criterion maps before the argmin selection. */ +static const float ARI_GAUSS5_SIGMA2[25] = { + 0.02324684f, 0.03382395f, 0.03832756f, 0.03382395f, 0.02324684f, + 0.03382395f, 0.04921356f, 0.05576627f, 0.04921356f, 0.03382395f, + 0.03832756f, 0.05576627f, 0.06319146f, 0.05576627f, 0.03832756f, + 0.03382395f, 0.04921356f, 0.05576627f, 0.04921356f, 0.03382395f, + 0.02324684f, 0.03382395f, 0.03832756f, 0.03382395f, 0.02324684f, +}; + +/* 1D factorization of the above 5x5 Gaussian (outer product). Using this + * with _ari_imfilter_h1d + _ari_imfilter_v1d yields the exact same result + * with 10 MAC/pix instead of 25. */ +static const float ARI_GAUSS5_SIGMA2_1D[5] = { + 0.15246898f, 0.22184129f, 0.25137911f, 0.22184129f, 0.15246898f +}; + +/* Wrapper: separable Gaussian 5x5 sigma=2. Uses `scratch` as an intermediate + * npix buffer. Callers may pass src == dst (overwrite-in-place); the scratch + * buffer mediates. So src/dst are not restrict-qualified here. */ +static inline void _ari_gauss5_separable(const float *const src, + float *const dst, + float *const restrict scratch, + const int width, + const int height) +{ + _ari_imfilter_h1d(src, scratch, width, height, ARI_GAUSS5_SIGMA2_1D, 5); + _ari_imfilter_v1d(scratch, dst, width, height, ARI_GAUSS5_SIGMA2_1D, 5); +} + +/* ---------- Bayer masks ==================================================== */ +static void _ari_build_masks(float *const restrict mR, + float *const restrict mG, + float *const restrict mB, + float *const restrict mGr, + float *const restrict mGb, + const int width, + const int height, + const uint32_t filters) { - /* Copy known G values */ DT_OMP_FOR() for(int y = 0; y < height; y++) + { + /* A row is a "red row" if either of its first two pixels is R. */ + const gboolean red_row = + (FC(y, 0, filters) == 0) || (FC(y, 1, filters) == 0); for(int x = 0; x < width; x++) { const size_t p = (size_t)y * width + x; - green[p] = (FC(y, x, filters) == 1) ? in[p] : 0.0f; + const int fc = FC(y, x, filters); + mR[p] = (fc == 0) ? 1.0f : 0.0f; + mG[p] = (fc == 1) ? 1.0f : 0.0f; + mB[p] = (fc == 2) ? 1.0f : 0.0f; + mGr[p] = (fc == 1 && red_row) ? 1.0f : 0.0f; + mGb[p] = (fc == 1 && !red_row) ? 1.0f : 0.0f; } + } +} - /* Border region: HA (needs only 2-pixel border) */ - for(int y = 2; y < height - 2; y++) - for(int x = 2; x < width - 2; x++) - { - if(y >= 4 && y < height - 4 && x >= 4 && x < width - 4) continue; - if(FC(y, x, filters) == 1) continue; - const size_t p = (size_t)y * width + x; - const float dH = fabsf(in[p - 2] - in[p]) + fabsf(in[p - 1] - in[p + 1]); - const float dV = fabsf(in[p - 2 * width] - in[p]) + fabsf(in[p - width] - in[p + width]); - const float Gh = (in[p - 1] + in[p + 1]) * 0.5f + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; - const float Gv = (in[p - width] + in[p + width]) * 0.5f + (2.0f * in[p] - in[p - 2 * width] - in[p + 2 * width]) * 0.25f; - green[p] = (dH < dV) ? Gh : (dV < dH) ? Gv : (Gh + Gv) * 0.5f; - } +/* ---------- Color-channel split (sparse R, G, B) =========================== */ +static void _ari_split_channels(const float *const restrict raw, + const float *const restrict mR, + const float *const restrict mG, + const float *const restrict mB, + float *const restrict R, + float *const restrict G, + float *const restrict B, + const size_t npix) +{ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + R[i] = raw[i] * mR[i]; + G[i] = raw[i] * mG[i]; + B[i] = raw[i] * mB[i]; + } +} - /* Interior: MLRI (5x5 Laplacian energy direction selection) */ +/* ---------- Compose green at every pixel from the 4 best candidates ======== + * Final ARI green = weighted average of RI_Gh, RI_Gv, MLRI_Gh, MLRI_Gv + * with weights 1/(best_criterion + 1e-10). Measured G values are passed + * through unchanged. */ +static void _ari_combine_green(const float *const restrict raw, + const float *const restrict mG, + const float *const restrict RI_Gh, + const float *const restrict RI_Gv, + const float *const restrict MLRI_Gh, + const float *const restrict MLRI_Gv, + const float *const restrict RI_w2h, + const float *const restrict RI_w2v, + const float *const restrict MLRI_w2h, + const float *const restrict MLRI_w2v, + float *const restrict green, + const size_t npix) +{ DT_OMP_FOR() - for(int y = 4; y < height - 4; y++) - for(int x = 4; x < width - 4; x++) - { - if(FC(y, x, filters) == 1) continue; - const size_t p = (size_t)y * width + x; - const int w = width; - - /* Directional green estimates with Laplacian correction */ - const float Gh = (in[p - 1] + in[p + 1]) * 0.5f - + (2.0f * in[p] - in[p - 2] - in[p + 2]) * 0.25f; - const float Gv = (in[p - w] + in[p + w]) * 0.5f - + (2.0f * in[p] - in[p - 2*w] - in[p + 2*w]) * 0.25f; - - /* Horizontal residual at center and same-color neighbors */ - const float rh = in[p] - Gh; - const float rh_l = in[p - 2] - (in[p - 3] + in[p - 1]) * 0.5f; - const float rh_r = in[p + 2] - (in[p + 1] + in[p + 3]) * 0.5f; - - /* Vertical residual */ - const float rv = in[p] - Gv; - const float rv_u = in[p - 2*w] - (in[p - 3*w] + in[p - w]) * 0.5f; - const float rv_d = in[p + 2*w] - (in[p + w] + in[p + 3*w]) * 0.5f; - - /* Laplacian energy of residuals + 2nd difference of raw */ - const float Lh = fabsf(rh_l - 2.0f * rh + rh_r) - + fabsf(in[p - 2] - 2.0f * in[p] + in[p + 2]); - const float Lv = fabsf(rv_u - 2.0f * rv + rv_d) - + fabsf(in[p - 2*w] - 2.0f * in[p] + in[p + 2*w]); - - green[p] = (Lh < Lv) ? Gh : (Lv < Lh) ? Gv : (Gh + Gv) * 0.5f; - } + for(size_t i = 0; i < npix; i++) + { + const float wr_h = 1.0f / (RI_w2h[i] + 1e-10f); + const float wr_v = 1.0f / (RI_w2v[i] + 1e-10f); + const float wm_h = 1.0f / (MLRI_w2h[i] + 1e-10f); + const float wm_v = 1.0f / (MLRI_w2v[i] + 1e-10f); + const float wsum = wr_h + wr_v + wm_h + wm_v + 1e-32f; + const float gmix = (wr_h * RI_Gh[i] + wr_v * RI_Gv[i] + + wm_h * MLRI_Gh[i] + wm_v * MLRI_Gv[i]) / wsum; + green[i] = (mG[i] > 0.0f) ? raw[i] : gmix; + } } -/* ===== Bilinear color-difference interpolation ===== */ -static void _ari_interpolate_cd(float *const restrict diff_r, - float *const restrict diff_b, - const float *const restrict in, - const float *const restrict green, - const int width, - const int height, - const uint32_t filters) +/* ============================================================================ + * Paper-exact ARI green interpolation (port of green_interpolation.m). + * + * Runs 11 iterations of joint RI + MLRI directional tentative estimates + * (8 guided-filter calls per method per direction = 16 per iteration = 32 + * per iteration total). Each iteration: + * (1) Tentative estimates for Gr, Gb, R, B (both methods, both dirs) + * (2) Residuals at the observed sample sites, upsampled with a 3-tap + * [1/2, 1, 1/2] kernel (paper's Eq. 5) + * (3) Criterion map = (|guide - tentative|)^2 * |gradient(guide - tentative)| + * Summed across {Gr+R, Gb+B} pairs, h and v, Gaussian-smoothed (sigma=2) + * (4) Per-pixel argmin across iterations: if the current criterion is + * smaller, replace the best-green-so-far and best-criterion-so-far + * (5) Guide update: the next iteration uses the refined estimates + * (6) Window grows: RI (h+=2, v+=1), MLRI (h2+=2, v2+=1) + * Final output combines the 4 best-greens (RI h/v, MLRI h/v) by + * inverse-criterion weighted average. + * ============================================================================ */ +static void _ari_green_interpolation(float *const restrict green, + const float *const restrict raw, + const int width, + const int height, + const uint32_t filters, + const float eps, + const int itnum, + double *const restrict integral) { const size_t npix = (size_t)width * height; - memset(diff_r, 0, sizeof(float) * npix); - memset(diff_b, 0, sizeof(float) * npix); - /* Compute known color differences */ - DT_OMP_FOR() - for(int y = 0; y < height; y++) - for(int x = 0; x < width; x++) + /* --- mask channels & split raw per colour (sparse) --- */ + float *mR = dt_alloc_align_float(npix); + float *mG = dt_alloc_align_float(npix); + float *mB = dt_alloc_align_float(npix); + float *mGr = dt_alloc_align_float(npix); + float *mGb = dt_alloc_align_float(npix); + float *R_ch = dt_alloc_align_float(npix); + float *G_ch = dt_alloc_align_float(npix); + float *B_ch = dt_alloc_align_float(npix); + + /* --- regression masks: Mrh = R+Gr, Mbh = B+Gb, etc. --- */ + float *Mrh = dt_alloc_align_float(npix); + float *Mbh = dt_alloc_align_float(npix); + float *Mrv = dt_alloc_align_float(npix); + float *Mbv = dt_alloc_align_float(npix); + + /* --- initial 1D bilinear rawH, rawV (for guide construction) --- */ + float *rawH = dt_alloc_align_float(npix); + float *rawV = dt_alloc_align_float(npix); + + /* --- per-direction, per-method guides (8 per method = 16 total) --- */ + float *RI_Guidegrh = dt_alloc_align_float(npix); + float *RI_Guidegbh = dt_alloc_align_float(npix); + float *RI_Guiderh = dt_alloc_align_float(npix); + float *RI_Guidebh = dt_alloc_align_float(npix); + float *RI_Guidegrv = dt_alloc_align_float(npix); + float *RI_Guidegbv = dt_alloc_align_float(npix); + float *RI_Guiderv = dt_alloc_align_float(npix); + float *RI_Guidebv = dt_alloc_align_float(npix); + float *ML_Guidegrh = dt_alloc_align_float(npix); + float *ML_Guidegbh = dt_alloc_align_float(npix); + float *ML_Guiderh = dt_alloc_align_float(npix); + float *ML_Guidebh = dt_alloc_align_float(npix); + float *ML_Guidegrv = dt_alloc_align_float(npix); + float *ML_Guidegbv = dt_alloc_align_float(npix); + float *ML_Guiderv = dt_alloc_align_float(npix); + float *ML_Guidebv = dt_alloc_align_float(npix); + + /* --- best (per-pixel argmin) state --- */ + float *RI_Gh = dt_alloc_align_float(npix); + float *RI_Gv = dt_alloc_align_float(npix); + float *ML_Gh = dt_alloc_align_float(npix); + float *ML_Gv = dt_alloc_align_float(npix); + float *RI_w2h = dt_alloc_align_float(npix); + float *RI_w2v = dt_alloc_align_float(npix); + float *ML_w2h = dt_alloc_align_float(npix); + float *ML_w2v = dt_alloc_align_float(npix); + + /* --- transient per-iteration buffers (tentative, residual, refined, + * criterion, difcri, sums) --- reused across 4 channels, 2 dirs, + * 2 methods. We allocate a pool of 16 npix-sized floats and reuse. */ + float *t0 = dt_alloc_align_float(npix); + float *t1 = dt_alloc_align_float(npix); + float *t2 = dt_alloc_align_float(npix); + float *t3 = dt_alloc_align_float(npix); + float *t4 = dt_alloc_align_float(npix); + float *t5 = dt_alloc_align_float(npix); + float *t6 = dt_alloc_align_float(npix); + float *t7 = dt_alloc_align_float(npix); + /* guided-filter internal workspace (14 buffers: 11 legacy GF wrapper work + * slots + 3 extra for the shared-sums pair variants). */ + float *w0 = dt_alloc_align_float(npix); + float *w1 = dt_alloc_align_float(npix); + float *w2 = dt_alloc_align_float(npix); + float *w3 = dt_alloc_align_float(npix); + float *w4 = dt_alloc_align_float(npix); + float *w5 = dt_alloc_align_float(npix); + float *w6 = dt_alloc_align_float(npix); + float *w7 = dt_alloc_align_float(npix); + float *w8 = dt_alloc_align_float(npix); + float *w9 = dt_alloc_align_float(npix); + float *w10 = dt_alloc_align_float(npix); + float *w11 = dt_alloc_align_float(npix); + float *w12 = dt_alloc_align_float(npix); + float *w13 = dt_alloc_align_float(npix); + /* direction-sum and per-iter scratch */ + float *crih = dt_alloc_align_float(npix); + float *criv = dt_alloc_align_float(npix); + float *dcrih = dt_alloc_align_float(npix); + float *dcriv = dt_alloc_align_float(npix); + float *whMap = dt_alloc_align_float(npix); + float *wvMap = dt_alloc_align_float(npix); + + /* Group pointers so OOM handling can release them in bulk */ + float *const alloc_list[] = { + mR, mG, mB, mGr, mGb, R_ch, G_ch, B_ch, Mrh, Mbh, Mrv, Mbv, rawH, rawV, + RI_Guidegrh, RI_Guidegbh, RI_Guiderh, RI_Guidebh, + RI_Guidegrv, RI_Guidegbv, RI_Guiderv, RI_Guidebv, + ML_Guidegrh, ML_Guidegbh, ML_Guiderh, ML_Guidebh, + ML_Guidegrv, ML_Guidegbv, ML_Guiderv, ML_Guidebv, + RI_Gh, RI_Gv, ML_Gh, ML_Gv, + RI_w2h, RI_w2v, ML_w2h, ML_w2v, + t0, t1, t2, t3, t4, t5, t6, t7, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, + crih, criv, dcrih, dcriv, whMap, wvMap, + }; + const int n_alloc = (int)(sizeof(alloc_list) / sizeof(alloc_list[0])); + for(int i = 0; i < n_alloc; i++) + if(!alloc_list[i]) { - const size_t p = (size_t)y * width + x; - const int c = FC(y, x, filters); - if(c == 0) diff_r[p] = in[p] - green[p]; - else if(c == 2) diff_b[p] = in[p] - green[p]; + /* fallback: copy G at G sites, zero elsewhere */ + DT_OMP_FOR() + for(int y = 0; y < height; y++) + for(int x = 0; x < width; x++) + { + const size_t p = (size_t)y * width + x; + green[p] = (FC(y, x, filters) == 1) ? raw[p] : 0.0f; + } + for(int j = 0; j < n_alloc; j++) dt_free_align(alloc_list[j]); + return; } - /* Interpolate R-G at non-Red positions - * All interpolation uses only KNOWN Red positions: - * Gr (Green in Red row): horizontal R neighbors - * Gb (Green in Blue row): vertical R neighbors - * B: diagonal R neighbors */ + /* ---- Step 0: masks, split, initial guides ---- */ + _ari_build_masks(mR, mG, mB, mGr, mGb, width, height, filters); + _ari_split_channels(raw, mR, mG, mB, R_ch, G_ch, B_ch, npix); + DT_OMP_FOR() - for(int y = 2; y < height - 2; y++) - for(int x = 2; x < width - 2; x++) + for(size_t i = 0; i < npix; i++) + { + Mrh[i] = mR[i] + mGr[i]; + Mbh[i] = mB[i] + mGb[i]; + Mrv[i] = mR[i] + mGb[i]; + Mbv[i] = mB[i] + mGr[i]; + } + + const float kH_half01_half[3] = { 0.5f, 0.0f, 0.5f }; + _ari_imfilter_h1d(raw, rawH, width, height, kH_half01_half, 3); + _ari_imfilter_v1d(raw, rawV, width, height, kH_half01_half, 3); + + /* Initial guides (paper eq. 1 style) */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + RI_Guidegrh[i] = G_ch[i] * mGr[i] + rawH[i] * mR[i]; + RI_Guidegbh[i] = G_ch[i] * mGb[i] + rawH[i] * mB[i]; + RI_Guiderh[i] = R_ch[i] + rawH[i] * mGr[i]; + RI_Guidebh[i] = B_ch[i] + rawH[i] * mGb[i]; + RI_Guidegrv[i] = G_ch[i] * mGb[i] + rawV[i] * mR[i]; + RI_Guidegbv[i] = G_ch[i] * mGr[i] + rawV[i] * mB[i]; + RI_Guiderv[i] = R_ch[i] + rawV[i] * mGb[i]; + RI_Guidebv[i] = B_ch[i] + rawV[i] * mGr[i]; + + ML_Guidegrh[i] = RI_Guidegrh[i]; + ML_Guidegbh[i] = RI_Guidegbh[i]; + ML_Guiderh[i] = RI_Guiderh[i]; + ML_Guidebh[i] = RI_Guidebh[i]; + ML_Guidegrv[i] = RI_Guidegrv[i]; + ML_Guidegbv[i] = RI_Guidegbv[i]; + ML_Guiderv[i] = RI_Guiderv[i]; + ML_Guidebv[i] = RI_Guidebv[i]; + + RI_Gh[i] = RI_Guidegrh[i] + RI_Guidegbh[i]; + RI_Gv[i] = RI_Guidegrv[i] + RI_Guidegbv[i]; + ML_Gh[i] = ML_Guidegrh[i] + ML_Guidegbh[i]; + ML_Gv[i] = ML_Guidegrv[i] + ML_Guidegbv[i]; + + RI_w2h[i] = 1e32f; + RI_w2v[i] = 1e32f; + ML_w2h[i] = 1e32f; + ML_w2v[i] = 1e32f; + } + + /* Window sizes (paper) */ + int h_ri = 2, v_ri = 1; /* RI window */ + int h_ml = 4, v_ml = 0; /* MLRI window */ + + const float kRes3[3] = { 0.5f, 1.0f, 0.5f }; /* Eq. 5 residual upsample */ + const float kLap5[5] = { -1.0f, 0.0f, 2.0f, 0.0f, -1.0f }; /* Fh for MLRI */ + const float kGrad3[3] = { -1.0f, 0.0f, 1.0f }; + + for(int iter = 0; iter < itnum; iter++) + { + /* ========================================================= + * RI branch (h, v) — 8 GF calls per iter for R, B, Gr, Gb + * ========================================================= */ + + /* H direction: window (h_ri, v_ri) */ + /* We compute tentatives sequentially; need storage for Gr, Gb, R, B */ + float *riGrH = t0, *riGbH = t1, *riRH = t2, *riBH = t3; + float *riGrV = t4, *riGbV = t5, *riRV = t6, *riBV = t7; + + /* RI H: pair (riGrH, riRH) shares sums over Mrh; pair (riGbH, riBH) over Mbh */ + _ari_gf_pair(RI_Guiderh, RI_Guidegrh, Mrh, riGrH /*I=Guiderh*/, riRH /*I=Guidegrh*/, + width, height, h_ri, v_ri, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, integral); + _ari_gf_pair(RI_Guidebh, RI_Guidegbh, Mbh, riGbH, riBH, + width, height, h_ri, v_ri, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, integral); + + /* RI V: pair (riGrV, riRV) / (riGbV, riBV) with swapped window */ + _ari_gf_pair(RI_Guiderv, RI_Guidegrv, Mrv, riGrV, riRV, + width, height, v_ri, h_ri, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, integral); + _ari_gf_pair(RI_Guidebv, RI_Guidegbv, Mbv, riGbV, riBV, + width, height, v_ri, h_ri, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, integral); + + /* Process H RI channel: residual + bilinear up + final refined + criterion. + * Use crih/criv/dcrih/dcriv buffers to accumulate Gr+R / Gb+B (and h+v) sums. + * Local scratch reused through w0..w9. */ + + /* === RI Horizontal: Gr (cri/difcri/refined), R, Gb, B === */ { - const size_t p = (size_t)y * width + x; - const int c = FC(y, x, filters); - if(c == 0) continue; /* R: known */ + /* For Gr: res = (G - tentGrH) * mGr, upsampled, refined (tent+res)*mR + * cri = (Guide - tent) * Mrh, difcri = grad(cri, h), then abs. */ + float *res = w0; + float *resUp = w1; + float *refined_Grh = w2; /* kept for guide update below */ + float *cri = w3; + float *grad = w4; + float *refined_Rh = w5; /* kept for guide update below */ + + /* ---- Gr ---- */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - riGrH[i]) * mGr[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riGrH[i]; + refined_Grh[i] = (t + resUp[i]) * mR[i]; /* RI_Grh */ + cri[i] = (RI_Guidegrh[i] - t) * Mrh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + /* crih/dcrih accumulate: crih = (|cri_Gr| + |cri_R|)*Mrh summed with Gb+B later */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] = fabsf(cri[i]); /* start with Gr contribution */ + dcrih[i] = fabsf(grad[i]); + } + + /* ---- R (uses Mrh as mask, maskGr on refined) ---- */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (R_ch[i] - riRH[i]) * mR[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riRH[i]; + refined_Rh[i] = (t + resUp[i]) * mGr[i]; /* RI_Rh */ + cri[i] = (RI_Guiderh[i] - t) * Mrh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] = (crih[i] + fabsf(cri[i])) * Mrh[i]; /* (|cri_Gr|+|cri_R|)*Mrh */ + dcrih[i] = (dcrih[i] + fabsf(grad[i])) * Mrh[i]; + } - if(c == 1) /* Green */ + /* ---- Gb ---- */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - riGbH[i]) * mGb[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *refined_Gbh = w6; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - if(FC(y, x - 1, filters) == 0) /* Gr: R left/right */ - diff_r[p] = (diff_r[p - 1] + diff_r[p + 1]) * 0.5f; - else /* Gb: R above/below */ - diff_r[p] = (diff_r[p - width] + diff_r[p + width]) * 0.5f; + const float t = riGbH[i]; + refined_Gbh[i] = (t + resUp[i]) * mB[i]; + cri[i] = (RI_Guidegbh[i] - t) * Mbh[i]; } - else /* Blue: diagonal R */ + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + float *gbh_cri = w7; + float *gbh_dcri = w8; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - diff_r[p] = (diff_r[p - width - 1] + diff_r[p - width + 1] - + diff_r[p + width - 1] + diff_r[p + width + 1]) * 0.25f; + gbh_cri[i] = fabsf(cri[i]); + gbh_dcri[i] = fabsf(grad[i]); + } + + /* ---- B ---- */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (B_ch[i] - riBH[i]) * mB[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *refined_Bh = w9; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riBH[i]; + refined_Bh[i] = (t + resUp[i]) * mGb[i]; + cri[i] = (RI_Guidebh[i] - t) * Mbh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbh_cri[i] = (gbh_cri[i] + fabsf(cri[i])) * Mbh[i]; + gbh_dcri[i] = (gbh_dcri[i] + fabsf(grad[i])) * Mbh[i]; + } + /* Sum horizontal criteria across Gr+R and Gb+B */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] += gbh_cri[i]; + dcrih[i] += gbh_dcri[i]; + } + + /* Gaussian sigma=2 smoothing of crih and dcrih (separable, cri as scratch) */ + _ari_gauss5_separable(crih, crih, cri, width, height); + _ari_gauss5_separable(dcrih, dcrih, cri, width, height); + + /* RI_wh = crih^2 * dcrih */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) whMap[i] = crih[i] * crih[i] * dcrih[i]; + + /* Update guides using stored refined estimates (no recomputation). */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + RI_Guidegrh[i] = G_ch[i] * mGr[i] + refined_Grh[i]; + RI_Guiderh[i] = R_ch[i] + refined_Rh[i]; + RI_Guidegbh[i] = G_ch[i] * mGb[i] + refined_Gbh[i]; + RI_Guidebh[i] = B_ch[i] + refined_Bh[i]; + } + + /* New green estimate = RI_Guidegrh + RI_Guidegbh (dense green) */ + /* Per-pixel argmin: if whMap < RI_w2h, update RI_Gh and RI_w2h */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float new_g = RI_Guidegrh[i] + RI_Guidegbh[i]; + if(whMap[i] < RI_w2h[i]) + { + RI_Gh[i] = new_g; + RI_w2h[i] = whMap[i]; + } } } - /* Interpolate B-G at non-Blue positions */ - DT_OMP_FOR() - for(int y = 2; y < height - 2; y++) - for(int x = 2; x < width - 2; x++) + /* === RI Vertical: same as H but with V kernels === */ { - const size_t p = (size_t)y * width + x; - const int c = FC(y, x, filters); - if(c == 2) continue; /* B: known */ + float *res = w0; + float *resUp = w1; + float *cri = w3; + float *grad = w4; + + /* Accumulate criv / dcriv in the same manner as crih / dcrih */ + /* Gr vertical */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - riGrV[i]) * mGb[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ri_Grv = w2; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riGrV[i]; + ri_Grv[i] = (t + resUp[i]) * mR[i]; + cri[i] = (RI_Guidegrv[i] - t) * Mrv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + criv[i] = fabsf(cri[i]); + dcriv[i] = fabsf(grad[i]); + } + + /* R vertical */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (R_ch[i] - riRV[i]) * mR[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ri_Rv = w5; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riRV[i]; + ri_Rv[i] = (t + resUp[i]) * mGb[i]; + cri[i] = (RI_Guiderv[i] - t) * Mrv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + criv[i] = (criv[i] + fabsf(cri[i])) * Mrv[i]; + dcriv[i] = (dcriv[i] + fabsf(grad[i])) * Mrv[i]; + } + + /* Gb vertical */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - riGbV[i]) * mGr[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ri_Gbv = w6; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = riGbV[i]; + ri_Gbv[i] = (t + resUp[i]) * mB[i]; + cri[i] = (RI_Guidegbv[i] - t) * Mbv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + float *gbv_cri = w7; + float *gbv_dcri = w8; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbv_cri[i] = fabsf(cri[i]); + gbv_dcri[i] = fabsf(grad[i]); + } - if(c == 1) /* Green */ + /* B vertical */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (B_ch[i] - riBV[i]) * mB[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ri_Bv = w9; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - if(FC(y, x - 1, filters) == 2) /* Gb: B left/right */ - diff_b[p] = (diff_b[p - 1] + diff_b[p + 1]) * 0.5f; - else /* Gr: B above/below */ - diff_b[p] = (diff_b[p - width] + diff_b[p + width]) * 0.5f; + const float t = riBV[i]; + ri_Bv[i] = (t + resUp[i]) * mGr[i]; + cri[i] = (RI_Guidebv[i] - t) * Mbv[i]; } - else /* Red: diagonal B */ + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - diff_b[p] = (diff_b[p - width - 1] + diff_b[p - width + 1] - + diff_b[p + width - 1] + diff_b[p + width + 1]) * 0.25f; + gbv_cri[i] = (gbv_cri[i] + fabsf(cri[i])) * Mbv[i]; + gbv_dcri[i] = (gbv_dcri[i] + fabsf(grad[i])) * Mbv[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + criv[i] += gbv_cri[i]; + dcriv[i] += gbv_dcri[i]; + } + + /* Gaussian sigma=2 smoothing of criv and dcriv (separable) */ + _ari_gauss5_separable(criv, criv, cri, width, height); + _ari_gauss5_separable(dcriv, dcriv, cri, width, height); + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) wvMap[i] = criv[i] * criv[i] * dcriv[i]; + + /* Update V guides */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + RI_Guidegrv[i] = G_ch[i] * mGb[i] + ri_Grv[i]; + RI_Guidegbv[i] = G_ch[i] * mGr[i] + ri_Gbv[i]; + RI_Guiderv[i] = R_ch[i] + ri_Rv[i]; + RI_Guidebv[i] = B_ch[i] + ri_Bv[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float new_g = RI_Guidegrv[i] + RI_Guidegbv[i]; + if(wvMap[i] < RI_w2v[i]) + { + RI_Gv[i] = new_g; + RI_w2v[i] = wvMap[i]; + } } } -} -/* ===== Write RGB output from green + color differences ===== */ -static void _ari_write_rgb(float *const restrict out, - const float *const restrict green, - const float *const restrict diff_r, - const float *const restrict diff_b, - const int width, - const int height) -{ - DT_OMP_FOR() - for(int y = 0; y < height; y++) - for(int x = 0; x < width; x++) + /* ========================================================= + * MLRI branch (h, v) — same structure but Laplacian GF + * ========================================================= */ + /* H direction: need difR, difGr, difB, difGb via 1D Laplacian, then GF_MLRI */ + float *difR_ = t0, *difGr_ = t1, *difB_ = t2, *difGb_ = t3; + _ari_imfilter_h1d(ML_Guiderh, difR_, width, height, kLap5, 5); + _ari_imfilter_h1d(ML_Guidegrh, difGr_, width, height, kLap5, 5); + _ari_imfilter_h1d(ML_Guidebh, difB_, width, height, kLap5, 5); + _ari_imfilter_h1d(ML_Guidegbh, difGb_, width, height, kLap5, 5); + + float *mlRH = t4, *mlBH = t5, *mlGrH = t6, *mlGbH = t7; + /* MLRI-H: pair (mlRH, mlGrH) shares intensity sums over Mrh; (mlBH, mlGbH) over Mbh */ + _ari_gfmlri_pair(ML_Guidegrh /*U*/, ML_Guiderh /*V*/, Mrh, + difGr_, difR_, mR, /* call 1: G=U, R=V, I=difGr, p=difR, mask_lap=mR */ + difR_, difGr_, mGr, /* call 2: G=V, R=U, I=difR, p=difGr, mask_lap=mGr */ + mlRH, mlGrH, width, height, h_ml, v_ml, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, integral); + _ari_gfmlri_pair(ML_Guidegbh /*U*/, ML_Guidebh /*V*/, Mbh, + difGb_, difB_, mB, + difB_, difGb_, mGb, + mlBH, mlGbH, width, height, h_ml, v_ml, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, integral); + + /* Process MLRI-H: compute residuals, refined, criterion, update best */ { - const size_t p = (size_t)y * width + x; - const size_t o = 4 * p; - out[o + 0] = fmaxf(green[p] + diff_r[p], 0.0f); - out[o + 1] = fmaxf(green[p], 0.0f); - out[o + 2] = fmaxf(green[p] + diff_b[p], 0.0f); - out[o + 3] = 0.0f; + float *res = w0; + float *resUp = w1; + float *cri = w3; + float *grad = w4; + + /* Gr */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - mlGrH[i]) * mGr[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *ml_Grh = w2; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlGrH[i]; + ml_Grh[i] = (t + resUp[i]) * mR[i]; + cri[i] = (ML_Guidegrh[i] - t) * Mrh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] = fabsf(cri[i]); + dcrih[i] = fabsf(grad[i]); + } + + /* R */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (R_ch[i] - mlRH[i]) * mR[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *ml_Rh = w5; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlRH[i]; + ml_Rh[i] = (t + resUp[i]) * mGr[i]; + cri[i] = (ML_Guiderh[i] - t) * Mrh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] = (crih[i] + fabsf(cri[i])) * Mrh[i]; + dcrih[i] = (dcrih[i] + fabsf(grad[i])) * Mrh[i]; + } + + /* Gb */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - mlGbH[i]) * mGb[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *ml_Gbh = w6; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlGbH[i]; + ml_Gbh[i] = (t + resUp[i]) * mB[i]; + cri[i] = (ML_Guidegbh[i] - t) * Mbh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + float *gbh_cri = w7; + float *gbh_dcri = w8; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbh_cri[i] = fabsf(cri[i]); + gbh_dcri[i] = fabsf(grad[i]); + } + + /* B */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (B_ch[i] - mlBH[i]) * mB[i]; + _ari_imfilter_h1d(res, resUp, width, height, kRes3, 3); + float *ml_Bh = w9; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlBH[i]; + ml_Bh[i] = (t + resUp[i]) * mGb[i]; + cri[i] = (ML_Guidebh[i] - t) * Mbh[i]; + } + _ari_imfilter_h1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbh_cri[i] = (gbh_cri[i] + fabsf(cri[i])) * Mbh[i]; + gbh_dcri[i] = (gbh_dcri[i] + fabsf(grad[i])) * Mbh[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + crih[i] += gbh_cri[i]; + dcrih[i] += gbh_dcri[i]; + } + /* Gaussian sigma=2 smoothing (separable) */ + _ari_gauss5_separable(crih, crih, cri, width, height); + _ari_gauss5_separable(dcrih, dcrih, cri, width, height); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) whMap[i] = crih[i] * crih[i] * dcrih[i]; + + /* Update MLRI H guides and argmin */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + ML_Guidegrh[i] = G_ch[i] * mGr[i] + ml_Grh[i]; + ML_Guidegbh[i] = G_ch[i] * mGb[i] + ml_Gbh[i]; + ML_Guiderh[i] = R_ch[i] + ml_Rh[i]; + ML_Guidebh[i] = B_ch[i] + ml_Bh[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float new_g = ML_Guidegrh[i] + ML_Guidegbh[i]; + if(whMap[i] < ML_w2h[i]) + { + ML_Gh[i] = new_g; + ML_w2h[i] = whMap[i]; + } + } } -} -/* ===== Per-pixel adaptive selection ===== */ -/* Selects per-pixel among n_candidates based on local color-difference variance. - * Candidate arrays: greens[i], diff_rs[i], diff_bs[i] for i in [0, n_candidates). - * Candidates are ordered sharpest-first: MLRI, RI, GF-RI. - * When variance difference < noise_thr^2, prefer the smoother (later) candidate. */ -static void _ari_adaptive_select(float *const restrict out, - float *const *const greens, - float *const *const diff_rs, - float *const *const diff_bs, - const int n_candidates, - const int width, - const int height, - const float noise_thr) -{ - const int r = ARI_SEL_RADIUS; - const float thr_sq = noise_thr * noise_thr * 4.0f; + /* MLRI V: Laplacian of V guides */ + float *difR_v = t0, *difGr_v = t1, *difB_v = t2, *difGb_v = t3; + _ari_imfilter_v1d(ML_Guiderv, difR_v, width, height, kLap5, 5); + _ari_imfilter_v1d(ML_Guidegrv, difGr_v, width, height, kLap5, 5); + _ari_imfilter_v1d(ML_Guidebv, difB_v, width, height, kLap5, 5); + _ari_imfilter_v1d(ML_Guidegbv, difGb_v, width, height, kLap5, 5); + + float *mlRV = t4, *mlBV = t5, *mlGrV = t6, *mlGbV = t7; + /* MLRI-V: pair (mlRV, mlGrV) over Mrv; (mlBV, mlGbV) over Mbv. Note the V + * direction uses swapped (mGb, mGr) masks for the Gr/Gb-flavored calls. */ + _ari_gfmlri_pair(ML_Guidegrv /*U*/, ML_Guiderv /*V*/, Mrv, + difGr_v, difR_v, mR, /* call 1: G=U, R=V, mask_lap=mR */ + difR_v, difGr_v, mGb, /* call 2: G=V, R=U, mask_lap=mGb */ + mlRV, mlGrV, width, height, v_ml, h_ml, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, integral); + _ari_gfmlri_pair(ML_Guidegbv /*U*/, ML_Guidebv /*V*/, Mbv, + difGb_v, difB_v, mB, + difB_v, difGb_v, mGr, + mlBV, mlGbV, width, height, v_ml, h_ml, eps, + w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, integral); - DT_OMP_FOR() - for(int y = 0; y < height; y++) - for(int x = 0; x < width; x++) { - const size_t p = (size_t)y * width + x; - - /* Compute local variance of color differences for each candidate */ - float best_var = FLT_MAX; - int best = n_candidates - 1; /* default: smoothest */ + float *res = w0; + float *resUp = w1; + float *cri = w3; + float *grad = w4; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - mlGrV[i]) * mGb[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ml_Grv = w2; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlGrV[i]; + ml_Grv[i] = (t + resUp[i]) * mR[i]; + cri[i] = (ML_Guidegrv[i] - t) * Mrv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + criv[i] = fabsf(cri[i]); + dcriv[i] = fabsf(grad[i]); + } - for(int ci = 0; ci < n_candidates; ci++) + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (R_ch[i] - mlRV[i]) * mR[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ml_Rv = w5; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlRV[i]; + ml_Rv[i] = (t + resUp[i]) * mGb[i]; + cri[i] = (ML_Guiderv[i] - t) * Mrv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - const float *dr = diff_rs[ci]; - const float *db = diff_bs[ci]; + criv[i] = (criv[i] + fabsf(cri[i])) * Mrv[i]; + dcriv[i] = (dcriv[i] + fabsf(grad[i])) * Mrv[i]; + } - /* Local variance in (2r+1)x(2r+1) window */ - float sum_r = 0, sum_r2 = 0, sum_b = 0, sum_b2 = 0; - int cnt = 0; - const int y0 = MAX(0, y - r), y1 = MIN(height - 1, y + r); - const int x0 = MAX(0, x - r), x1 = MIN(width - 1, x + r); - for(int yy = y0; yy <= y1; yy++) - for(int xx = x0; xx <= x1; xx++) - { - const size_t pp = (size_t)yy * width + xx; - sum_r += dr[pp]; sum_r2 += dr[pp] * dr[pp]; - sum_b += db[pp]; sum_b2 += db[pp] * db[pp]; - cnt++; - } - const float inv = 1.0f / (float)cnt; - const float var_r = sum_r2 * inv - (sum_r * inv) * (sum_r * inv); - const float var_b = sum_b2 * inv - (sum_b * inv) * (sum_b * inv); - const float var = fmaxf(var_r, 0.0f) + fmaxf(var_b, 0.0f); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (G_ch[i] - mlGbV[i]) * mGr[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ml_Gbv = w6; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlGbV[i]; + ml_Gbv[i] = (t + resUp[i]) * mB[i]; + cri[i] = (ML_Guidegbv[i] - t) * Mbv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + float *gbv_cri = w7; + float *gbv_dcri = w8; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbv_cri[i] = fabsf(cri[i]); + gbv_dcri[i] = fabsf(grad[i]); + } - if(var < best_var - thr_sq) + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) res[i] = (B_ch[i] - mlBV[i]) * mB[i]; + _ari_imfilter_v1d(res, resUp, width, height, kRes3, 3); + float *ml_Bv = w9; + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float t = mlBV[i]; + ml_Bv[i] = (t + resUp[i]) * mGr[i]; + cri[i] = (ML_Guidebv[i] - t) * Mbv[i]; + } + _ari_imfilter_v1d(cri, grad, width, height, kGrad3, 3); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + gbv_cri[i] = (gbv_cri[i] + fabsf(cri[i])) * Mbv[i]; + gbv_dcri[i] = (gbv_dcri[i] + fabsf(grad[i])) * Mbv[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + criv[i] += gbv_cri[i]; + dcriv[i] += gbv_dcri[i]; + } + /* Gaussian sigma=2 smoothing of criv and dcriv (separable) */ + _ari_gauss5_separable(criv, criv, cri, width, height); + _ari_gauss5_separable(dcriv, dcriv, cri, width, height); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) wvMap[i] = criv[i] * criv[i] * dcriv[i]; + + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + ML_Guidegrv[i] = G_ch[i] * mGb[i] + ml_Grv[i]; + ML_Guidegbv[i] = G_ch[i] * mGr[i] + ml_Gbv[i]; + ML_Guiderv[i] = R_ch[i] + ml_Rv[i]; + ML_Guidebv[i] = B_ch[i] + ml_Bv[i]; + } + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + { + const float new_g = ML_Guidegrv[i] + ML_Guidegbv[i]; + if(wvMap[i] < ML_w2v[i]) { - best_var = var; - best = ci; + ML_Gv[i] = new_g; + ML_w2v[i] = wvMap[i]; } } - - /* Write selected candidate to output */ - const size_t o = 4 * p; - out[o + 0] = fmaxf(greens[best][p] + diff_rs[best][p], 0.0f); - out[o + 1] = fmaxf(greens[best][p], 0.0f); - out[o + 2] = fmaxf(greens[best][p] + diff_bs[best][p], 0.0f); - out[o + 3] = 0.0f; } + + /* Grow window sizes for next iteration */ + h_ri += 2; v_ri += 1; + h_ml += 2; v_ml += 1; + } + + /* Final combine by inverse-criterion weighted average */ + _ari_combine_green(raw, mG, + RI_Gh, RI_Gv, ML_Gh, ML_Gv, + RI_w2h, RI_w2v, ML_w2h, ML_w2v, + green, npix); + + /* Clamp to valid range */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + green[i] = fmaxf(fminf(green[i], 1.0f), 0.0f); + + for(int j = 0; j < n_alloc; j++) dt_free_align(alloc_list[j]); +} + +/* ---------- 7x7 bicubic residual kernel (red/blue_interpolation.m, a=-0.5) */ +static inline float _ari_cubic_s(const float x) +{ + const float a = -0.5f; + const float ax = fabsf(x); + if(ax < 1.0f) return (a + 2.0f) * ax * ax * ax - (a + 3.0f) * ax * ax + 1.0f; + if(ax < 2.0f) return a * ax * ax * ax - 5.0f * a * ax * ax + 8.0f * a * ax - 4.0f * a; + return 0.0f; +} + +__attribute__((unused)) static void _ari_build_bicubic7x7(float kernel[49]) +{ + const float s32 = _ari_cubic_s(1.5f); + const float s12 = _ari_cubic_s(0.5f); + const float s0 = _ari_cubic_s(0.0f); + const float s1 = _ari_cubic_s(1.0f); + const float A = s32 * s32; + const float B = s32 * s12; + const float C = s12 * s12; + const float D = s0 * s12; + const float E = s0 * s32; + const float F = s1 * s12; + const float G = s1 * s32; + const float H[49] = { + A, G, B, E, B, G, A, + G, 0.0f, F, 0.0f, F, 0.0f, G, + B, F, C, D, C, F, B, + E, 0.0f, D, 1.0f, D, 0.0f, E, + B, F, C, D, C, F, B, + G, 0.0f, F, 0.0f, F, 0.0f, G, + A, G, B, E, B, G, A, + }; + memcpy(kernel, H, sizeof(H)); +} + +/* 1D separable factorization: the above 7x7 is the outer product of + * k1d = [s32, 0, s12, s0, s12, 0, s32] (s1=0, s0=1). */ +static void _ari_build_bicubic1d(float kernel[7]) +{ + kernel[0] = _ari_cubic_s(1.5f); + kernel[1] = 0.0f; /* s_fn(1) = 0 */ + kernel[2] = _ari_cubic_s(0.5f); + kernel[3] = _ari_cubic_s(0.0f); /* = 1 */ + kernel[4] = kernel[2]; + kernel[5] = 0.0f; + kernel[6] = kernel[0]; } -/* ===== Main entry point ===== */ +/* ---------- R/B interpolation (port of red/blue_interpolation.m) ========== */ +static void _ari_rb_interpolation(float *const restrict out_channel, + const float *const restrict green, + const float *const restrict mosaic_c, + const float *const restrict mask_c, + const int width, + const int height, + const int rh, + const int rv, + const float eps, + float *const restrict work_buffers[17], + double *const restrict integral) +{ + const size_t npix = (size_t)width * height; + float *lap_c = work_buffers[0]; + float *lap_g = work_buffers[1]; + float *green_masked = work_buffers[2]; + float *tentative = work_buffers[3]; + float *residual = work_buffers[4]; + float *kernel_buf = work_buffers[5]; (void)kernel_buf; + + /* Laplacian of mosaic_c (Bayer sparse color) and green*mask_c */ + _ari_laplacian_5x5_cross(mosaic_c, lap_c, width, height); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) green_masked[i] = green[i] * mask_c[i]; + _ari_laplacian_5x5_cross(green_masked, lap_g, width, height); + + /* Tentative via MLRI guided filter: G guide, C input, Laplacians for slope */ + _ari_guidedfilter_mlri(green, mosaic_c, mask_c, lap_g, lap_c, mask_c, tentative, + width, height, rh, rv, eps, + work_buffers[6], work_buffers[7], work_buffers[8], + work_buffers[9], work_buffers[10], work_buffers[11], + work_buffers[12], work_buffers[13], work_buffers[14], + work_buffers[15], work_buffers[16], integral); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) tentative[i] = fmaxf(fminf(tentative[i], 1.0f), 0.0f); + + /* Residual at known C sites */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) residual[i] = mask_c[i] * (mosaic_c[i] - tentative[i]); + + /* 7x7 bicubic residual upsample via separable 1D (tensor product) */ + float bicubic1d[7]; + _ari_build_bicubic1d(bicubic1d); + _ari_imfilter_h1d(residual, work_buffers[5] /*scratch*/, width, height, bicubic1d, 7); + _ari_imfilter_v1d(work_buffers[5], work_buffers[6], width, height, bicubic1d, 7); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) + out_channel[i] = fmaxf(fminf(work_buffers[6][i] + tentative[i], 1.0f), 0.0f); +} + +/* ---------- ari_demosaic entry point ====================================== + * q1 = q2 = q3 all invoke the same paper-exact pipeline here. The quality + * parameter is retained for backward-compat with the existing enum but has + * no effect -- the paper's adaptive selection subsumes the tiered "fast / + * balanced / full" distinction the original simplified variant had. */ static void ari_demosaic(float *const restrict out, const float *const restrict in, const int width, @@ -479,97 +1813,76 @@ static void ari_demosaic(float *const restrict out, const uint32_t filters, const int quality) { + /* Interpret `quality` as the iteration count when >= 3 (experimental + * knob). Paper default is 11; this lets benchmark scripts sweep iter + * counts without rebuilding. */ + const int itnum = (quality >= 3 && quality <= 99) ? quality : ARI_K_MAX; const size_t npix = (size_t)width * height; - /* Clamp negative Bayer values (can occur after rawprepare) so all - * subsequent gradient and residual computations see consistent data. */ + /* Shared integral-image buffer reused across every box_sum_rect call. */ + double *integral = (double *)dt_alloc_aligned((size_t)(width + 1) * (height + 1) * sizeof(double)); + if(!integral) { memset(out, 0, sizeof(float) * npix * 4); return; } + + /* Clamp negative inputs (rawprepare can produce them) */ float *cfa = dt_alloc_align_float(npix); - if(!cfa) { memset(out, 0, sizeof(float) * npix * 4); return; } + if(!cfa) { dt_free_align(integral); memset(out, 0, sizeof(float) * npix * 4); return; } DT_OMP_FOR() for(size_t i = 0; i < npix; i++) cfa[i] = fmaxf(in[i], 0.0f); - /* === Quality 1: MLRI only, no selection === */ - if(quality <= 1) + /* Interpolate green via full paper ARI */ + float *green = dt_alloc_align_float(npix); + if(!green) { dt_free_align(cfa); dt_free_align(integral); memset(out, 0, sizeof(float) * npix * 4); return; } + _ari_green_interpolation(green, cfa, width, height, filters, ARI_EPS, itnum, integral); + + /* Prepare masks for R and B interpolation */ + float *mR = dt_alloc_align_float(npix); + float *mG = dt_alloc_align_float(npix); + float *mB = dt_alloc_align_float(npix); + float *mGr = dt_alloc_align_float(npix); + float *mGb = dt_alloc_align_float(npix); + float *R_ch = dt_alloc_align_float(npix); + float *B_ch = dt_alloc_align_float(npix); + float *red = dt_alloc_align_float(npix); + float *blue = dt_alloc_align_float(npix); + /* scratch pool for rb_interpolation (17 buffers: +1 for guidedfilter_mlri work10) */ + float *sb[17]; + for(int i = 0; i < 17; i++) sb[i] = dt_alloc_align_float(npix); + + if(!mR || !mG || !mB || !mGr || !mGb || !R_ch || !B_ch || !red || !blue) { - float *green = dt_alloc_align_float(npix); - float *dr = dt_alloc_align_float(npix); - float *db = dt_alloc_align_float(npix); - if(!green || !dr || !db) - { - dt_free_align(green); dt_free_align(dr); dt_free_align(db); - dt_free_align(cfa); - memset(out, 0, sizeof(float) * npix * 4); - return; - } - _ari_green_mlri(green, cfa, width, height, filters); - _ari_interpolate_cd(dr, db, cfa, green, width, height, filters); - _ari_write_rgb(out, green, dr, db, width, height); - dt_free_align(green); dt_free_align(dr); dt_free_align(db); - dt_free_align(cfa); - return; - } - - /* === Quality 2-3: adaptive selection === */ - const float noise_sigma = _ari_estimate_noise(cfa, width, height, filters); - const float gf_eps = noise_sigma * noise_sigma * 4.0f; - - /* Candidate 0: MLRI (sharpest) */ - float *green_mlri = dt_alloc_align_float(npix); - float *dr_mlri = dt_alloc_align_float(npix); - float *db_mlri = dt_alloc_align_float(npix); - - /* Candidate for RI / GF-RI */ - float *green_ri = dt_alloc_align_float(npix); - float *dr_ri = dt_alloc_align_float(npix); - float *db_ri = dt_alloc_align_float(npix); - float *dr_gfri = dt_alloc_align_float(npix); - float *db_gfri = dt_alloc_align_float(npix); - - if(!green_mlri || !dr_mlri || !db_mlri || !green_ri || !dr_ri || !db_ri - || !dr_gfri || !db_gfri) - { - dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); - dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); - dt_free_align(dr_gfri); dt_free_align(db_gfri); - dt_free_align(cfa); memset(out, 0, sizeof(float) * npix * 4); - return; + goto cleanup; } + for(int i = 0; i < 17; i++) if(!sb[i]) { memset(out, 0, sizeof(float) * npix * 4); goto cleanup; } - /* Generate MLRI candidate */ - _ari_green_mlri(green_mlri, cfa, width, height, filters); - _ari_interpolate_cd(dr_mlri, db_mlri, cfa, green_mlri, width, height, filters); - - /* Generate RI candidate (HA green + bilinear CD) */ - _ari_green_ha(green_ri, cfa, width, height, filters); - _ari_interpolate_cd(dr_ri, db_ri, cfa, green_ri, width, height, filters); - - /* Generate GF-RI: guided filter on RI color differences */ - _ari_guided_filter_ch(dr_ri, green_ri, dr_gfri, width, height, ARI_GF_RADIUS, gf_eps); - _ari_guided_filter_ch(db_ri, green_ri, db_gfri, width, height, ARI_GF_RADIUS, gf_eps); - - /* Adaptive selection */ - if(quality >= 3) + _ari_build_masks(mR, mG, mB, mGr, mGb, width, height, filters); + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - /* 3 candidates: MLRI, RI, GF-RI (sharpest to smoothest) */ - float *greens[3] = { green_mlri, green_ri, green_ri }; - float *diff_rs[3] = { dr_mlri, dr_ri, dr_gfri }; - float *diff_bs[3] = { db_mlri, db_ri, db_gfri }; - _ari_adaptive_select(out, greens, diff_rs, diff_bs, 3, - width, height, noise_sigma); + R_ch[i] = cfa[i] * mR[i]; + B_ch[i] = cfa[i] * mB[i]; } - else + + _ari_rb_interpolation(red, green, R_ch, mR, width, height, 5, 5, ARI_EPS, sb, integral); + _ari_rb_interpolation(blue, green, B_ch, mB, width, height, 5, 5, ARI_EPS, sb, integral); + + /* Write planar RGBX */ + DT_OMP_FOR() + for(size_t i = 0; i < npix; i++) { - /* 2 candidates: MLRI, GF-RI */ - float *greens[2] = { green_mlri, green_ri }; - float *diff_rs[2] = { dr_mlri, dr_gfri }; - float *diff_bs[2] = { db_mlri, db_gfri }; - _ari_adaptive_select(out, greens, diff_rs, diff_bs, 2, - width, height, noise_sigma); + const size_t o = 4 * i; + out[o + 0] = red[i]; + out[o + 1] = green[i]; + out[o + 2] = blue[i]; + out[o + 3] = 0.0f; } - dt_free_align(green_mlri); dt_free_align(dr_mlri); dt_free_align(db_mlri); - dt_free_align(green_ri); dt_free_align(dr_ri); dt_free_align(db_ri); - dt_free_align(dr_gfri); dt_free_align(db_gfri); - dt_free_align(cfa); +cleanup: + dt_free_align(cfa); dt_free_align(green); + dt_free_align(mR); dt_free_align(mG); dt_free_align(mB); + dt_free_align(mGr); dt_free_align(mGb); + dt_free_align(R_ch); dt_free_align(B_ch); + dt_free_align(red); dt_free_align(blue); + for(int i = 0; i < 17; i++) dt_free_align(sb[i]); + dt_free_align(integral); } diff --git a/src/iop/demosaicing/menon.c b/src/iop/demosaicing/menon.c index bf67797c2074..ec0df3f6f27b 100644 --- a/src/iop/demosaicing/menon.c +++ b/src/iop/demosaicing/menon.c @@ -125,12 +125,100 @@ static inline float _menon_cnv_2d(const float *const restrict buf, return sum; } +/* Specialized 3-tap k_b convolution: kernel = {0.5, 0, 0.5}. + For interior columns uses direct average; for boundary cols applies reflect. + Matches baseline _menon_cnv_h3(buf, row, col, w, h, k_b). */ +static inline float _menon_cnv_h3_kb(const float *const restrict buf, + const int row, const int col, + const int width) +{ + const size_t rs = (size_t)row * width; + if(__builtin_expect(col > 0 && col < width - 1, 1)) + return 0.5f * (buf[rs + col - 1] + buf[rs + col + 1]); + int cl = col - 1, cr = col + 1; + if(cl < 0) cl = -cl; + if(cr >= width) cr = 2*(width-1) - cr; + return 0.5f * (buf[rs + cl] + buf[rs + cr]); +} + +static inline float _menon_cnv_v3_kb(const float *const restrict buf, + const int row, const int col, + const int width, const int height) +{ + if(__builtin_expect(row > 0 && row < height - 1, 1)) + return 0.5f * (buf[(size_t)(row-1) * width + col] + buf[(size_t)(row+1) * width + col]); + int rt = row - 1, rb = row + 1; + if(rt < 0) rt = -rt; + if(rb >= height) rb = 2*(height-1) - rb; + return 0.5f * (buf[(size_t)rt * width + col] + buf[(size_t)rb * width + col]); +} + +/* Specialized 3-tap FIR convolution: kernel = {1/3, 1/3, 1/3}. + Matches baseline _menon_cnv_h3(buf, row, col, w, h, FIR). */ +static inline float _menon_cnv_h3_fir(const float *const restrict buf, + const int row, const int col, + const int width) +{ + const size_t rs = (size_t)row * width; + if(__builtin_expect(col > 0 && col < width - 1, 1)) + return (buf[rs + col - 1] + buf[rs + col] + buf[rs + col + 1]) * (1.0f/3.0f); + int cl = col - 1, cr = col + 1; + if(cl < 0) cl = -cl; + if(cr >= width) cr = 2*(width-1) - cr; + return (buf[rs + cl] + buf[rs + col] + buf[rs + cr]) * (1.0f/3.0f); +} + +static inline float _menon_cnv_v3_fir(const float *const restrict buf, + const int row, const int col, + const int width, const int height) +{ + const size_t rs = (size_t)row * width; + if(__builtin_expect(row > 0 && row < height - 1, 1)) + return (buf[rs - width + col] + buf[rs + col] + buf[rs + width + col]) * (1.0f/3.0f); + int rt = row - 1, rb = row + 1; + if(rt < 0) rt = -rt; + if(rb >= height) rb = 2*(height-1) - rb; + return (buf[(size_t)rt * width + col] + buf[rs + col] + buf[(size_t)rb * width + col]) * (1.0f/3.0f); +} + +/* Inner-region sparse classifier kernels (no boundary check). + clf_h has 8 non-zero taps: positions (dr,dc,weight) + (-2,-2,1), (-2,0,1), (-1,-1,1), (0,-2,3), (0,0,3), + (+1,-1,1), (+2,-2,1), (+2,0,1). + Caller must guarantee 2 <= row < height-2 AND 2 <= col < width-2. */ +static inline float _menon_clf_h_inner(const float *const restrict buf, + const int row, const int col, const int width) +{ + const size_t w = (size_t)width; + const size_t idx = (size_t)row * w + (size_t)col; + return buf[idx - 2*w - 2] + buf[idx - 2*w] + + buf[idx - w - 1] + + 3.f*(buf[idx - 2] + buf[idx]) + + buf[idx + w - 1] + + buf[idx + 2*w - 2] + buf[idx + 2*w]; +} + +/* clf_v has 8 non-zero taps (mirror of clf_h taps along anti-diagonal): + (-2,-2,1), (-2,0,3), (-2,+2,1), + (-1,-1,1), (-1,+1,1), + (0,-2,1), (0,0,3), (0,+2,1). */ +static inline float _menon_clf_v_inner(const float *const restrict buf, + const int row, const int col, const int width) +{ + const size_t w = (size_t)width; + const size_t idx = (size_t)row * w + (size_t)col; + return buf[idx - 2*w - 2] + 3.f*buf[idx - 2*w] + buf[idx - 2*w + 2] + + buf[idx - w - 1] + buf[idx - w + 1] + + buf[idx - 2] + 3.f*buf[idx] + buf[idx + 2]; +} + DT_OMP_DECLARE_SIMD(aligned(in, out : 64)) static void menon_demosaic(float *const restrict out, const float *const restrict in, const int width, const int height, - const uint32_t filters) + const uint32_t filters, + const int refining_step) { const size_t numpix = (size_t)width * height; @@ -160,10 +248,12 @@ static void menon_demosaic(float *const restrict out, } /* ---- Filter kernels ---- */ - const float h_0[5] = { 0.0f, 0.5f, 0.0f, 0.5f, 0.0f }; - const float h_1[5] = { -0.25f, 0.0f, 0.5f, 0.0f, -0.25f }; + /* Step 2 used two separate kernels h_0 + h_1; by linearity we merge them + into one 5-tap kernel (saves one 5-tap convolution per non-green pixel). */ + const float h_sum[5] = { -0.25f, 0.5f, 0.5f, 0.5f, -0.25f }; const float k_b[3] = { 0.5f, 0.0f, 0.5f }; const float FIR[3] = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f }; + (void)k_b; (void)FIR; /* only used via OpenMP firstprivate clauses below */ /* Directional classifier kernel (5x5) for horizontal. scipy.ndimage.convolve flips the kernel (true convolution), but our @@ -187,65 +277,70 @@ static void menon_demosaic(float *const restrict out, /* ---- Step 1: Extract CFA and initial R, G, B channels ---- */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(in, CFA, R, G, B, width, height, filters, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(in, CFA, R, G, B, width, height, filters)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const float val = fmaxf(in[idx], 0.0f); // clamp negative values from rawprepare - CFA[idx] = val; - const int fc = FC(row, col, filters); - R[idx] = (fc == 0) ? val : 0.0f; - G[idx] = (fc == 1) ? val : 0.0f; - B[idx] = (fc == 2) ? val : 0.0f; + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) + { + const size_t idx = rowstart + col; + const float val = fmaxf(in[idx], 0.0f); + CFA[idx] = val; + const int fc = FC(row, col, filters); + R[idx] = (fc == 0) ? val : 0.0f; + G[idx] = (fc == 1) ? val : 0.0f; + B[idx] = (fc == 2) ? val : 0.0f; + } } /* ---- Step 2: Tentative horizontal and vertical green estimates ---- */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(CFA, G, G_H, G_V, width, height, filters, h_0, h_1, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(CFA, G, G_H, G_V, width, height, filters, h_sum)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - if(fc == 1) - { - // Green pixel: keep original - G_H[idx] = G[idx]; - G_V[idx] = G[idx]; - } - else + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) { - // Non-green pixel: interpolate green directionally - G_H[idx] = _menon_cnv_h5(CFA, row, col, width, height, h_0) - + _menon_cnv_h5(CFA, row, col, width, height, h_1); - G_V[idx] = _menon_cnv_v5(CFA, row, col, width, height, h_0) - + _menon_cnv_v5(CFA, row, col, width, height, h_1); + const size_t idx = rowstart + col; + const int fc = FC(row, col, filters); + if(fc == 1) + { + G_H[idx] = G[idx]; + G_V[idx] = G[idx]; + } + else + { + G_H[idx] = _menon_cnv_h5(CFA, row, col, width, height, h_sum); + G_V[idx] = _menon_cnv_v5(CFA, row, col, width, height, h_sum); + } } } /* ---- Step 3: Compute color differences for classification ---- */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, B, G_H, G_V, C_H, C_V, width, height, filters, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, B, G_H, G_V, C_H, C_V, width, height, filters)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - if(fc == 0) // Red pixel - { - C_H[idx] = R[idx] - G_H[idx]; - C_V[idx] = R[idx] - G_V[idx]; - } - else if(fc == 2) // Blue pixel - { - C_H[idx] = B[idx] - G_H[idx]; - C_V[idx] = B[idx] - G_V[idx]; - } - else + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) { - C_H[idx] = 0.0f; - C_V[idx] = 0.0f; + const size_t idx = rowstart + col; + const int fc = FC(row, col, filters); + if(fc == 0) + { + C_H[idx] = R[idx] - G_H[idx]; + C_V[idx] = R[idx] - G_V[idx]; + } + else if(fc == 2) + { + C_H[idx] = B[idx] - G_H[idx]; + C_V[idx] = B[idx] - G_V[idx]; + } + else + { + C_H[idx] = 0.0f; + C_V[idx] = 0.0f; + } } } @@ -254,23 +349,29 @@ static void menon_demosaic(float *const restrict out, D_V[row][col] = |C_V[row][col] - C_V[row+2][col]| (forward shift by 2) Boundary: reflect (numpy pad mode='reflect') */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(C_H, C_V, D_H, D_V, width, height, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(C_H, C_V, D_H, D_V, width, height)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - - // Horizontal gradient with reflect padding + const size_t rowstart = (size_t)row * width; + int r2 = row + 2; + if(r2 >= height) r2 = 2 * (height - 1) - r2; + const size_t r2start = (size_t)r2 * width; + /* Interior cols: col+2 < width (col < width-2) */ + const int inner_end = width - 2; + for(int col = 0; col < inner_end; col++) { - int c2 = col + 2; - if(c2 >= width) c2 = 2 * (width - 1) - c2; // reflect - D_H[idx] = fabsf(C_H[idx] - C_H[(size_t)row * width + c2]); + const size_t idx = rowstart + col; + D_H[idx] = fabsf(C_H[idx] - C_H[rowstart + col + 2]); + D_V[idx] = fabsf(C_V[idx] - C_V[r2start + col]); } - // Vertical gradient with reflect padding + /* Border cols: reflect */ + for(int col = inner_end; col < width; col++) { - int r2 = row + 2; - if(r2 >= height) r2 = 2 * (height - 1) - r2; // reflect - D_V[idx] = fabsf(C_V[idx] - C_V[(size_t)r2 * width + col]); + const size_t idx = rowstart + col; + int c2 = col + 2; + if(c2 >= width) c2 = 2 * (width - 1) - c2; + D_H[idx] = fabsf(C_H[idx] - C_H[rowstart + c2]); + D_V[idx] = fabsf(C_V[idx] - C_V[r2start + col]); } } @@ -278,22 +379,35 @@ static void menon_demosaic(float *const restrict out, /* d_H = convolve2d(D_H, clf_h), d_V = convolve2d(D_V, clf_v) */ /* Step 6: Choose direction: if d_V >= d_H => horizontal (M=1), else vertical (M=0) */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(D_H, D_V, G_H, G_V, G, M, width, height, clf_h, clf_v, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(D_H, D_V, G_H, G_V, G, M, width, height, clf_h, clf_v)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const float d_h = _menon_cnv_2d(D_H, row, col, width, height, clf_h); - const float d_v = _menon_cnv_2d(D_V, row, col, width, height, clf_v); - if(d_v >= d_h) - { - M[idx] = 1; // choose horizontal - G[idx] = G_H[idx]; - } - else + const size_t rowstart = (size_t)row * width; + const int row_inner = (row >= 2 && row < height - 2); + for(int col = 0; col < width; col++) { - M[idx] = 0; // choose vertical - G[idx] = G_V[idx]; + const size_t idx = rowstart + col; + float d_h, d_v; + if(row_inner && col >= 2 && col < width - 2) + { + d_h = _menon_clf_h_inner(D_H, row, col, width); + d_v = _menon_clf_v_inner(D_V, row, col, width); + } + else + { + d_h = _menon_cnv_2d(D_H, row, col, width, height, clf_h); + d_v = _menon_cnv_2d(D_V, row, col, width, height, clf_v); + } + if(d_v >= d_h) + { + M[idx] = 1; + G[idx] = G_H[idx]; + } + else + { + M[idx] = 0; + G[idx] = G_V[idx]; + } } } @@ -307,78 +421,76 @@ static void menon_demosaic(float *const restrict out, /* 7c: At green pixels on blue rows, interpolate B horizontally */ /* 7d: At green pixels on red rows, interpolate B vertically */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, G, B, width, height, filters, k_b, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, G, B, width, height, filters, k_b)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - - if(fc != 1) continue; // only process green pixels - - // Determine if this row is a "red row" or "blue row" - // A "red row" has red pixels on it; a "blue row" has blue pixels + const size_t rowstart = (size_t)row * width; const gboolean red_row = (FC(row, 0, filters) == 0) || (FC(row, 1, filters) == 0); - - if(red_row) - { - // R at green pixel on red row: interpolate horizontally - R[idx] = G[idx] - + _menon_cnv_h3(R, row, col, width, height, k_b) - - _menon_cnv_h3(G, row, col, width, height, k_b); - // B at green pixel on red row: interpolate vertically - B[idx] = G[idx] - + _menon_cnv_v3(B, row, col, width, height, k_b) - - _menon_cnv_v3(G, row, col, width, height, k_b); - } - else // blue_row + for(int col = 0; col < width; col++) { - // R at green pixel on blue row: interpolate vertically - R[idx] = G[idx] - + _menon_cnv_v3(R, row, col, width, height, k_b) - - _menon_cnv_v3(G, row, col, width, height, k_b); - // B at green pixel on blue row: interpolate horizontally - B[idx] = G[idx] - + _menon_cnv_h3(B, row, col, width, height, k_b) - - _menon_cnv_h3(G, row, col, width, height, k_b); + const int fc = FC(row, col, filters); + if(fc != 1) continue; + const size_t idx = rowstart + col; + if(red_row) + { + R[idx] = G[idx] + + _menon_cnv_h3_kb(R, row, col, width) + - _menon_cnv_h3_kb(G, row, col, width); + B[idx] = G[idx] + + _menon_cnv_v3_kb(B, row, col, width, height) + - _menon_cnv_v3_kb(G, row, col, width, height); + } + else + { + R[idx] = G[idx] + + _menon_cnv_v3_kb(R, row, col, width, height) + - _menon_cnv_v3_kb(G, row, col, width, height); + B[idx] = G[idx] + + _menon_cnv_h3_kb(B, row, col, width) + - _menon_cnv_h3_kb(G, row, col, width); + } } } /* 7e: At blue pixels, interpolate R using direction mask M 7f: At red pixels, interpolate B using direction mask M */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, G, B, M, width, height, filters, k_b, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, G, B, M, width, height, filters, k_b)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - - if(fc == 2) // Blue pixel: interpolate R - { - if(M[idx]) - R[idx] = B[idx] - + _menon_cnv_h3(R, row, col, width, height, k_b) - - _menon_cnv_h3(B, row, col, width, height, k_b); - else - R[idx] = B[idx] - + _menon_cnv_v3(R, row, col, width, height, k_b) - - _menon_cnv_v3(B, row, col, width, height, k_b); - } - else if(fc == 0) // Red pixel: interpolate B + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) { - if(M[idx]) - B[idx] = R[idx] - + _menon_cnv_h3(B, row, col, width, height, k_b) - - _menon_cnv_h3(R, row, col, width, height, k_b); - else - B[idx] = R[idx] - + _menon_cnv_v3(B, row, col, width, height, k_b) - - _menon_cnv_v3(R, row, col, width, height, k_b); + const int fc = FC(row, col, filters); + const size_t idx = rowstart + col; + if(fc == 2) + { + if(M[idx]) + R[idx] = B[idx] + + _menon_cnv_h3_kb(R, row, col, width) + - _menon_cnv_h3_kb(B, row, col, width); + else + R[idx] = B[idx] + + _menon_cnv_v3_kb(R, row, col, width, height) + - _menon_cnv_v3_kb(B, row, col, width, height); + } + else if(fc == 0) + { + if(M[idx]) + B[idx] = R[idx] + + _menon_cnv_h3_kb(B, row, col, width) + - _menon_cnv_h3_kb(R, row, col, width); + else + B[idx] = R[idx] + + _menon_cnv_v3_kb(B, row, col, width, height) + - _menon_cnv_v3_kb(R, row, col, width, height); + } } } - /* ---- Step 8: Refinement ---- */ + /* ---- Step 8: Refinement (optional) ---- */ + if(refining_step) + { /* 8a: Update green at R/B locations using FIR filter on color differences */ { @@ -397,65 +509,67 @@ static void menon_demosaic(float *const restrict out, /* At R pixels: G = R - directional_FIR(R_G) At B pixels: G = B - directional_FIR(B_G) */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, G, B, R_G, B_G, M, width, height, filters, FIR, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, G, B, R_G, B_G, M, width, height, filters, FIR)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - if(fc == 0) // Red pixel - { - const float r_g_m = M[idx] - ? _menon_cnv_h3(R_G, row, col, width, height, FIR) - : _menon_cnv_v3(R_G, row, col, width, height, FIR); - G[idx] = R[idx] - r_g_m; - } - else if(fc == 2) // Blue pixel + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) { - const float b_g_m = M[idx] - ? _menon_cnv_h3(B_G, row, col, width, height, FIR) - : _menon_cnv_v3(B_G, row, col, width, height, FIR); - G[idx] = B[idx] - b_g_m; + const int fc = FC(row, col, filters); + const size_t idx = rowstart + col; + if(fc == 0) + { + const float r_g_m = M[idx] + ? _menon_cnv_h3_fir(R_G, row, col, width) + : _menon_cnv_v3_fir(R_G, row, col, width, height); + G[idx] = R[idx] - r_g_m; + } + else if(fc == 2) + { + const float b_g_m = M[idx] + ? _menon_cnv_h3_fir(B_G, row, col, width) + : _menon_cnv_v3_fir(B_G, row, col, width, height); + G[idx] = B[idx] - b_g_m; + } } } - /* Recompute R_G after green update */ + /* Recompute R_G and B_G after green update (fused into one pass) */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, G, R_G, numpix)) + dt_omp_firstprivate(R, G, B, R_G, B_G, numpix)) for(size_t idx = 0; idx < numpix; idx++) + { R_G[idx] = R[idx] - G[idx]; - - /* Recompute B_G after green update */ - DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(B, G, B_G, numpix)) - for(size_t idx = 0; idx < numpix; idx++) B_G[idx] = B[idx] - G[idx]; + } /* 8b: Update R at green pixels on blue rows (vertical averaging of R-G) Update R at green pixels on blue columns (horizontal averaging of R-G) Update B at green pixels on red rows (vertical averaging of B-G) Update B at green pixels on red columns (horizontal averaging of B-G) */ DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, G, B, R_G, B_G, width, height, filters, k_b, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, G, B, R_G, B_G, width, height, filters, k_b)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - if(fc != 1) continue; // only green pixels - + const size_t rowstart = (size_t)row * width; const gboolean red_row = (FC(row, 0, filters) == 0) || (FC(row, 1, filters) == 0); - const gboolean red_col = (FC(0, col, filters) == 0) || (FC(1, col, filters) == 0); - - if(!red_row) // blue row: update R vertically - R[idx] = G[idx] + _menon_cnv_v3(R_G, row, col, width, height, k_b); - if(!red_col) // blue column: update R horizontally - R[idx] = G[idx] + _menon_cnv_h3(R_G, row, col, width, height, k_b); - - if(red_row) // red row: update B vertically - B[idx] = G[idx] + _menon_cnv_v3(B_G, row, col, width, height, k_b); - if(red_col) // red column: update B horizontally - B[idx] = G[idx] + _menon_cnv_h3(B_G, row, col, width, height, k_b); + for(int col = 0; col < width; col++) + { + const int fc = FC(row, col, filters); + if(fc != 1) continue; + const size_t idx = rowstart + col; + const gboolean red_col = (FC(0, col, filters) == 0) || (FC(1, col, filters) == 0); + + if(!red_row) + R[idx] = G[idx] + _menon_cnv_v3_kb(R_G, row, col, width, height); + if(!red_col) + R[idx] = G[idx] + _menon_cnv_h3_kb(R_G, row, col, width); + + if(red_row) + B[idx] = G[idx] + _menon_cnv_v3_kb(B_G, row, col, width, height); + if(red_col) + B[idx] = G[idx] + _menon_cnv_h3_kb(B_G, row, col, width); + } } /* 8c: Update R at blue pixels and B at red pixels using R-B differences */ @@ -467,28 +581,32 @@ static void menon_demosaic(float *const restrict out, R_B[idx] = R[idx] - B[idx]; DT_OMP_PRAGMA(parallel for schedule(static) default(none) - dt_omp_firstprivate(R, B, R_B, M, width, height, filters, FIR, numpix)) - for(size_t idx = 0; idx < numpix; idx++) + dt_omp_firstprivate(R, B, R_B, M, width, height, filters, FIR)) + for(int row = 0; row < height; row++) { - const int row = idx / width; - const int col = idx % width; - const int fc = FC(row, col, filters); - if(fc == 2) // Blue pixel: update R from R-B - { - const float r_b_m = M[idx] - ? _menon_cnv_h3(R_B, row, col, width, height, FIR) - : _menon_cnv_v3(R_B, row, col, width, height, FIR); - R[idx] = B[idx] + r_b_m; - } - else if(fc == 0) // Red pixel: update B from R-B + const size_t rowstart = (size_t)row * width; + for(int col = 0; col < width; col++) { - const float r_b_m = M[idx] - ? _menon_cnv_h3(R_B, row, col, width, height, FIR) - : _menon_cnv_v3(R_B, row, col, width, height, FIR); - B[idx] = R[idx] - r_b_m; + const int fc = FC(row, col, filters); + const size_t idx = rowstart + col; + if(fc == 2) + { + const float r_b_m = M[idx] + ? _menon_cnv_h3_fir(R_B, row, col, width) + : _menon_cnv_v3_fir(R_B, row, col, width, height); + R[idx] = B[idx] + r_b_m; + } + else if(fc == 0) + { + const float r_b_m = M[idx] + ? _menon_cnv_h3_fir(R_B, row, col, width) + : _menon_cnv_v3_fir(R_B, row, col, width, height); + B[idx] = R[idx] - r_b_m; + } } } } + } // end if(refining_step) /* ---- Step 9: Write output in planar RGBX format ---- */ DT_OMP_PRAGMA(parallel for schedule(static) default(none)