darktable-org
diff --git a/‎data/kernels/basic.cl‎
Lines changed: 9 additions & 9 deletions b/‎data/kernels/basic.cl‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎data/kernels/blendop.cl‎
Lines changed: 8 additions & 8 deletions b/‎data/kernels/blendop.cl‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎data/kernels/colorequal.cl‎
Lines changed: 4 additions & 15 deletions b/‎data/kernels/colorequal.cl‎
Lines changed: 4 additions & 15 deletions
diff --git a/‎data/kernels/colorharmonizer.cl‎
Lines changed: 2 additions & 2 deletions b/‎data/kernels/colorharmonizer.cl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎data/kernels/colorreconstruction.cl‎
Lines changed: 1 addition & 1 deletion b/‎data/kernels/colorreconstruction.cl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎data/kernels/colorspace.h‎
Lines changed: 8 additions & 8 deletions b/‎data/kernels/colorspace.h‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎data/kernels/common.h‎
Lines changed: 36 additions & 7 deletions b/‎data/kernels/common.h‎
Lines changed: 36 additions & 7 deletions
diff --git a/‎data/kernels/demosaic_rcd.cl‎
Lines changed: 8 additions & 12 deletions b/‎data/kernels/demosaic_rcd.cl‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎src/chart/thinplate.c‎
Lines changed: 1 addition & 1 deletion b/‎src/chart/thinplate.c‎
Lines changed: 1 addition & 1 deletion
@@ -3243,12 +3243,12 @@ colorzones_v3 (read_only image2d_t in,
 
   if(x >= width || y >= height) return;
 
-  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
+  float4 pixel = readpixel(in, x, y);
 
   const float a = pixel.y;
   const float b = pixel.z;
-  const float h = fmod(atan2(b, a) + 2.0f*M_PI_F, 2.0f*M_PI_F)/(2.0f*M_PI_F);
-  const float C = sqrt(b*b + a*a);
+  const float h = fmod(atan2(b, a) + DT_2PI_F, DT_2PI_F) / DT_2PI_F;
+  const float C = dt_fast_hypot(b, a);
 
   float select = 0.0f;
   float blend = 0.0f;
@@ -3264,7 +3264,7 @@ colorzones_v3 (read_only image2d_t in,
     default:
     case DT_IOP_COLORZONES_h:
       select = h;
-      blend = pow(1.0f - C/128.0f, 2.0f);
+      blend = dtcl_pow(1.0f - C/128.0f, 2.0f);
       break;
   }
 
@@ -3273,11 +3273,11 @@ colorzones_v3 (read_only image2d_t in,
   blend *= blend; // saturation isn't as prone to artifacts:
   // const float Cm = 2.0f* (blend*0.5f + (1.0f-blend)*lookup(d->lut[1], select));
   const float Cm = 2.0f * lookup(table_a, select);
-  const float L = pixel.x * pow(2.0f, 4.0f*Lm);
+  const float L = pixel.x * dtcl_pow(2.0f, 4.0f*Lm);
 
   pixel.x = L;
-  pixel.y = cos(2.0f*M_PI_F*(h + hm)) * Cm * C;
-  pixel.z = sin(2.0f*M_PI_F*(h + hm)) * Cm * C;
+  pixel.y = dtcl_cos(DT_2PI_F*(h + hm)) * Cm * C;
+  pixel.z = dtcl_sin(DT_2PI_F*(h + hm)) * Cm * C;
 
   write_imagef (out, (int2)(x, y), pixel);
 }
@@ -3297,10 +3297,10 @@ colorzones (read_only image2d_t in,
 
   if(x >= width || y >= height) return;
 
-  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
+  float4 pixel = readpixel(in, x, y);
 
   float4 LCh;
-  const float normalize_C = 1.f / (128.0f * sqrt(2.f));
+  const float normalize_C = 1.f / (128.0f * M_SQRT2_F);
 
   LCh = Lab_2_LCH(pixel);
 
 
@@ -194,10 +194,10 @@ blendif_factor_Lab(const float4 input, const float4 output,
     float4 LCH_input = Lab_2_LCH(input);
     float4 LCH_output = Lab_2_LCH(output);
 
-    scaled[DEVELOP_BLENDIF_C_in] = LCH_input.y / (128.0f*sqrt(2.0f));        // C scaled to 0..1
+    scaled[DEVELOP_BLENDIF_C_in] = LCH_input.y / (128.0f*M_SQRT2_F);        // C scaled to 0..1
     scaled[DEVELOP_BLENDIF_h_in] = LCH_input.z;		                // h scaled to 0..1
 
-    scaled[DEVELOP_BLENDIF_C_out] = LCH_output.y / (128.0f*sqrt(2.0f));       // C scaled to 0..1
+    scaled[DEVELOP_BLENDIF_C_out] = LCH_output.y / (128.0f*M_SQRT2_F);       // C scaled to 0..1
     scaled[DEVELOP_BLENDIF_h_out] = LCH_output.z;		                // h scaled to 0..1
   }
 
@@ -1266,10 +1266,10 @@ blendop_rgb_hsl(__read_only image2d_t in_a, __read_only image2d_t in_b, __read_o
       ta = RGB_2_HSV(a);
       tb = RGB_2_HSV(b);
       // blend color vectors of input and output
-      d = ta.y*cos(2.0f*M_PI_F*ta.x) * (1.0f - opacity) + tb.y*cos(2.0f*M_PI_F*tb.x) * opacity;
-      s = ta.y*sin(2.0f*M_PI_F*ta.x) * (1.0f - opacity) + tb.y*sin(2.0f*M_PI_F*tb.x) * opacity;
-      to.x = fmod(atan2(s, d)/(2.0f*M_PI_F)+1.0f, 1.0f);
-      to.y = sqrt(s*s + d*d);
+      d = ta.y*cos(DT_2PI_F*ta.x) * (1.0f - opacity) + tb.y*cos(DT_2PI_F*tb.x) * opacity;
+      s = ta.y*sin(DT_2PI_F*ta.x) * (1.0f - opacity) + tb.y*sin(DT_2PI_F*tb.x) * opacity;
+      to.x = fmod(atan2(s, d)/DT_2PI_F+1.0f, 1.0f);
+      to.y = dt_fast_hypot(s, d);
       to.z = ta.z;
       o = HSV_2_RGB(to);
       break;
@@ -1560,12 +1560,12 @@ blendop_display_channel(__read_only image2d_t in_a, __read_only image2d_t in_b,
       break;
     case DT_DEV_PIXELPIPE_DISPLAY_LCH_C:
       LCH = Lab_2_LCH(a);
-      c = clipf(LCH.y / (128.0f * sqrt(2.0f) / exp2(boost_factors[DEVELOP_BLENDIF_C_in])));
+      c = clipf(LCH.y / (128.0f * M_SQRT2_F / exp2(boost_factors[DEVELOP_BLENDIF_C_in])));
       is_lab = 1;
       break;
     case (DT_DEV_PIXELPIPE_DISPLAY_LCH_C | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT):
       LCH = Lab_2_LCH(b);
-      c = clipf(LCH.y / (128.0f * sqrt(2.0f)) / exp2(boost_factors[DEVELOP_BLENDIF_C_out]));
+      c = clipf(LCH.y / (128.0f * M_SQRT2_F) / exp2(boost_factors[DEVELOP_BLENDIF_C_out]));
       is_lab = 1;
       break;
     case DT_DEV_PIXELPIPE_DISPLAY_LCH_h:
 
@@ -37,23 +37,12 @@ typedef enum dt_iop_colorequal_channel_t
 
 static inline float _get_satweight(const float sat, global float *weights)
 {
-  const float isat = SATSIZE * (1.0f + clamp(sat, -1.0f, 1.0f - (1.0f / SATSIZE)));
+  const float isat = (float)SATSIZE * (1.0f + clamp(sat, -1.0f, 1.0f - (1.0f / SATSIZE)));
   const float base = floor(isat);
   const int i = (int)base;
   return weights[i] + (isat - base) * (weights[i+1] - weights[i]);
 }
 
-static inline float _scharr_gradient(global float *in,
-                                     const size_t k,
-                                     const int w)
-{
-  const float gx = 47.0f / 255.0f * (in[k-w-1] - in[k-w+1] + in[k+w-1] - in[k+w+1])
-                + 162.0f / 255.0f * (in[k-1]   - in[k+1]);
-  const float gy = 47.0f / 255.0f * (in[k-w-1] - in[k+w-1] + in[k-w+1] - in[k+w+1])
-                + 162.0f / 255.0f * (in[k-w]   - in[k+w]);
-  return dt_fast_hypot(gx, gy);
-}
-
 static inline float gamut_map_HSB(const float4 HSB, global float *gamut_LUT, const float L_white)
 {
   const float4 JCH = dt_UCS_HSB_to_JCH(HSB);
@@ -258,9 +247,9 @@ __kernel void apply_guided(global float2 *uv,
   const float2 CV = { a[k].x * uv[k].x + a[k].y * uv[k].y + b[k].x,
                       a[k].z * uv[k].x + a[k].w * uv[k].y + b[k].y };
 
-  corrections[k].y = mix(1.0f, CV.x, _get_satweight(saturation[k] - sat_shift, weights));
+  corrections[k].y = 1.0f + (CV.x - 1.0f) * _get_satweight(saturation[k] - sat_shift, weights);
   const float gradient_weight = 1.0f - clipf(scharr[k]);
-  b_corrections[k] = mix(0.0f, CV.y, gradient_weight * _get_satweight(saturation[k] - bright_shift, weights));
+  b_corrections[k] = CV.y * gradient_weight * _get_satweight(saturation[k] - bright_shift, weights);
 }
 
 __kernel void sample_input(__read_only image2d_t dev_in,
@@ -443,7 +432,7 @@ __kernel void process_data(global float2 *uv,
   {
     const int kk = mad24(clamp(row, 1, height - 2), width, clamp(col, 1, width - 2));
 
-    const float kscharr = fmax(0.0f, _scharr_gradient(saturation, kk, width) - 0.02f);
+    const float kscharr = fmax(0.0f, scharr_gradient(saturation, kk, width) - 0.02f);
     Lscharr[k] = gradient_amp * kscharr * kscharr;
   }
 
 
@@ -99,7 +99,7 @@ kernel void colorharmonizer_map(read_only  image2d_t  in,
   float4 xyY = dt_D65_XYZ_to_xyY(XYZ_D65);
   float4 JCH = xyY_to_dt_UCS_JCH(xyY, L_white);
 
-  const float hue = (JCH.z + M_PI_F) / (2.0f * M_PI_F);
+  const float hue = (JCH.z + M_PI_F) / DT_2PI_F;
 
   const int idx = y * width + x;
   jch_out[idx] = (float4)(JCH.x, JCH.y, hue, pix_in.w);
@@ -146,7 +146,7 @@ kernel void colorharmonizer_apply(write_only image2d_t  out,
   float4 JCH;
   JCH.x = J;
   JCH.y = fmax(chroma * (1.0f + corr.y * chroma_weight), 0.0f);
-  JCH.z = wrap_hue(hue + corr.x * effect_strength * chroma_weight) * 2.0f * M_PI_F - M_PI_F;
+  JCH.z = wrap_hue(hue + corr.x * effect_strength * chroma_weight) * DT_2PI_F - M_PI_F;
 
   float4 xyY = dt_UCS_JCH_to_xyY(JCH, L_white);
   float4 XYZ_D65 = dt_xyY_to_XYZ(xyY);
 
@@ -99,7 +99,7 @@ colorreconstruction_splat(
     case COLORRECONSTRUCT_PRECEDENCE_HUE:
       m = atan2(pixel.z, pixel.y) - params.x;
       // readjust m into [-pi, +pi] interval
-      m = m > M_PI_F ? m - 2*M_PI_F : (m < -M_PI_F ? m + 2*M_PI_F : m);
+      m = m > M_PI_F ? m - DT_2PI_F : (m < -M_PI_F ? m + DT_2PI_F : m);
       weight = exp(-m*m/params.y);
       break;
 
 
@@ -56,10 +56,10 @@ static inline float4 Lab_2_LCH(float4 Lab)
 {
   float H = atan2(Lab.z, Lab.y);
 
-  H = (H > 0.0f) ? H / (2.0f*M_PI_F) : 1.0f - fabs(H) / (2.0f*M_PI_F);
+  H = (H > 0.0f) ? H / DT_2PI_F : 1.0f - fabs(H) / DT_2PI_F;
 
   const float L = Lab.x;
-  const float C = hypot(Lab.y, Lab.z);
+  const float C = dt_fast_hypot(Lab.y, Lab.z);
 
   return (float4)(L, C, H, Lab.w);
 }
@@ -68,8 +68,8 @@ static inline float4 Lab_2_LCH(float4 Lab)
 static inline float4 LCH_2_Lab(float4 LCH)
 {
   const float L = LCH.x;
-  const float a = cos(2.0f*M_PI_F*LCH.z) * LCH.y;
-  const float b = sin(2.0f*M_PI_F*LCH.z) * LCH.y;
+  const float a = cos(DT_2PI_F*LCH.z) * LCH.y;
+  const float b = sin(DT_2PI_F*LCH.z) * LCH.y;
 
   return (float4)(L, a, b, LCH.w);
 }
@@ -434,10 +434,10 @@ static inline float4 JzAzBz_2_XYZ(const float4 JzAzBz)
 
 static inline float4 JzAzBz_to_JzCzhz(float4 JzAzBz)
 {
-  const float h = atan2(JzAzBz.z, JzAzBz.y) / (2.0f * M_PI_F);
+  const float h = atan2(JzAzBz.z, JzAzBz.y) / DT_2PI_F;
   float4 JzCzhz;
   JzCzhz.x = JzAzBz.x;
-  JzCzhz.y = hypot(JzAzBz.y, JzAzBz.z);
+  JzCzhz.y = dt_fast_hypot(JzAzBz.y, JzAzBz.z);
   JzCzhz.z = (h >= 0.0f) ? h : 1.0f + h;
   JzCzhz.w = JzAzBz.w;
   return JzCzhz;
@@ -561,7 +561,7 @@ static inline float4 Yrg_to_Ych(const float4 Yrg)
   // -> grading RGB conversion.
   const float r = Yrg.y - 0.21902143f;
   const float g = Yrg.z - 0.54371398f;
-  const float c = hypot(g, r);
+  const float c = dt_fast_hypot(g, r);
   const float cos_h = c != 0.f ? r / c : 1.f;
   const float sin_h = c != 0.f ? g / c : 0.f;
   return (float4)(Y, c, cos_h, sin_h);
@@ -955,7 +955,7 @@ static inline float lookup_gamut(global const float *gamut_lut, const float x)
 
   // Linearly interpolate the value of the gamut LUT at the hue angle in radians.
   // convert in LUT coordinate
-  const float x_test = (float)LUT_ELEM * (x + M_PI_F) / (2.f * M_PI_F);
+  const float x_test = (float)LUT_ELEM * (x + M_PI_F) / DT_2PI_F;
 
   // find the 2 closest integer coordinates (next/previous)
   const float x_prev = floor(x_test);
 
@@ -31,10 +31,26 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
 
 
 #ifndef M_PI_F
-#define M_PI_F           3.14159265358979323846f  // should be defined by the OpenCL compiler acc. to standard
+#define M_PI_F 3.14159265358979323846f
 #endif
 
+#ifndef M_LN2f
 #define M_LN2f 0.69314718055994530942f
+#endif
+
+#ifndef M_PI_2f
+#define M_PI_2f 1.57079632679489661923f
+#endif
+
+#ifndef M_PI_4f
+#define M_PI_4f 0.78539816339744830962f
+#endif
+
+#ifndef M_SQRT2_F
+#define M_SQRT2_F 1.41421356237309504880f
+#endif
+
+#define DT_2PI_F 6.28318530717958647693f
 
 #define LUT_ELEM 512 // gamut LUT number of elements:
 
@@ -55,6 +71,11 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
   #define dtcl_sin(A) native_sin(A)
   #define dtcl_cos(A) native_cos(A)
 
+  static inline float dt_fast_hypot(const float x, const float y)
+  {
+    return native_sqrt(x * x + y * y);
+  }
+
   // Allow the compiler to convert a * b + c to fused multiply-add to use hardware acceleration
   // on compatible platforms
   #pragma OPENCL FP_CONTRACT ON
@@ -70,6 +91,11 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
   #define dtcl_sin(A) sin(A)
   #define dtcl_cos(A) cos(A)
 
+  static inline float dt_fast_hypot(const float x, const float y)
+  {
+    return hypot(x, y);
+  }
+
   #pragma OPENCL FP_CONTRACT OFF
 #endif
 
@@ -82,6 +108,15 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
     m = t2;                         \
   }
 
+static inline float scharr_gradient(global float *in, const int k, const int w)
+{
+  const float gx = 47.0f / 255.0f * (in[k-w-1] - in[k-w+1] + in[k+w-1] - in[k+w+1])
+                + 162.0f / 255.0f * (in[k-1]   - in[k+1]);
+  const float gy = 47.0f / 255.0f * (in[k-w-1] - in[k+w-1] + in[k-w+1] - in[k+w+1])
+                + 162.0f / 255.0f * (in[k-w]   - in[k+w]);
+  return dt_fast_hypot(gx, gy);
+}
+
 static inline int
 FC(const int row, const int col, const unsigned int filters)
 {
@@ -143,12 +178,6 @@ atomic_add_f(
 #endif
 }
 
-static inline float
-dt_fast_hypot(const float x, const float y)
-{
-  return dtcl_sqrt(x * x + y * y);
-}
-
 /* we use this exp approximation to maintain full identity with cpu path */
 static inline float
 dt_fast_expf(const float x)
 
@@ -32,7 +32,7 @@ __kernel void rcd_populate (__read_only image2d_t in, global float *cfa, global
   const int col = get_global_id(0);
   const int row = get_global_id(1);
   if(col >= w || row >= height) return;
-  const float val = scale * fmax(0.0f, readsingle(in, col, row));
+  const float val = scale * fmax(0.0f, Areadsingle(in, col, row));
   const int color = FC(row, col, filters);
 
   global float *rgbcol = rgb0;
@@ -51,7 +51,7 @@ __kernel void rcd_write_output (__write_only image2d_t out, global float *rgb0,
   if(!(col >= border && col < w - border && row >= border && row < height - border)) return;
   const int idx = mad24(row, w, col);
 
-  write_imagef(out, (int2)(col, row), (float4)(fmax(scale * rgb0[idx], 0.0f), fmax(scale * rgb1[idx], 0.0f), fmax(scale * rgb2[idx], 0.0f), 0.0f));
+  write_imagef(out, (int2)(col, row), fmax(0.0f, (float4)(scale * rgb0[idx], scale * rgb1[idx], scale * rgb2[idx], 0.0f)));
 }
 
 #define eps 1e-5f              // Tolerance to avoid dividing by zero
@@ -278,8 +278,8 @@ __kernel void write_blended_dual(__read_only image2d_t high,
   const int row = get_global_id(1);
   if((col >= w) || (row >= height)) return;
 
-  const float4 high_val = readpixel(high, col, row);
-  const float4 low_val = readpixel(low, col, row);
+  const float4 high_val = Areadpixel(high, col, row);
+  const float4 low_val = Areadpixel(low, col, row);
   const float4 blender = (float4)clipf(mask[mad24(row, w, col)]);
   float4 data = mix(low_val, high_val, blender);
   data.w = showmask ? blender.x : 0.0f;
@@ -297,8 +297,8 @@ __kernel void calc_Y0_mask(global float *mask,
   if((col >= w) || (row >= height)) return;
   const int idx = mad24(row, w, col);
 
-  const float4 pt = wb * fmax(0.0f, readpixel(in, col, row));
-  mask[idx] = dtcl_sqrt(0.33333333f * (pt.x + pt.y + pt.z));
+  const float4 pt = wb * fmax(0.0f, Areadpixel(in, col, row));
+  mask[idx] = dtcl_sqrt((pt.x + pt.y + pt.z) / 3.0f);
 }
 
 __kernel void calc_scharr_mask(global float *in, global float *out, const int w, const int height)
@@ -311,11 +311,7 @@ __kernel void calc_scharr_mask(global float *in, global float *out, const int w,
   const int incol = clamp(col, 1, w - 2);
   const int inrow = clamp(row, 1, height -2);
   const int idx = mad24(inrow, w, incol);
-  const float gx = 47.0f / 255.0f * (in[idx-w-1] - in[idx-w+1] + in[idx+w-1] - in[idx+w+1])
-                + 162.0f / 255.0f * (in[idx-1]   - in[idx+1]);
-  const float gy = 47.0f / 255.0f * (in[idx-w-1] - in[idx+w-1] + in[idx-w+1] - in[idx+w+1])
-                + 162.0f / 255.0f * (in[idx-w]   - in[idx+w]);
-  const float gradient_magnitude = dt_fast_hypot(gx, gy);
+  const float gradient_magnitude = scharr_gradient(in, idx, w);
   out[oidx] = clipf(gradient_magnitude / 16.0f);
 }
 
@@ -350,7 +346,7 @@ kernel void demosaic_box3(read_only image2d_t in,
       if(x >= 0 && y >= 0 && x < width && y < height)
       {
         const int color = fcol(y, x, filters, xtrans);
-        sum[color] += fmax(0.0f, read_imagef(in, sampleri, (int2)(x, y)).x);
+        sum[color] += fmax(0.0f, Areadsingle(in, x, y));
         cnt[color] += 1.0f;
       }
     }
 
@@ -440,7 +440,7 @@ int thinplate_match(const tonecurve_t *curve, // tonecurve to apply after this (
 float thinplate_color_pos(const float L, const float a, const float b)
 {
   const float h = atan2f(b, a) + M_PI_F;
-  const int sector = 4.0f * h / (2.0f * M_PI_F);
+  const int sector = 4.0f * h / DT_2PI_F;
   return 256.0 * sector + L; // C;
 }
Original file line number	Diff line number	Diff line change
`@@ -440,7 +440,7 @@ int thinplate_match(const tonecurve_t *curve, // tonecurve to apply after this (`
`440`	`440`	`float thinplate_color_pos(const float L, const float a, const float b)`
`441`	`441`	`{`
`442`	`442`	`const float h = atan2f(b, a) + M_PI_F;`
`443`		`- const int sector = 4.0f * h / (2.0f * M_PI_F);`
	`443`	`+ const int sector = 4.0f * h / DT_2PI_F;`
`444`	`444`	`return 256.0 * sector + L; // C;`
`445`	`445`	`}`
`446`	`446`