Skip to content

Commit c356afe

Browse files
No OpenCL advantage hints
Until now we had the per-device advantage setting; it was used to decide if tiled processing was worth to be done via OpenCL. That decision was based on the assumption, the overall amount of processed data on CPU vs. OpenCL knowing a "performance-ratio" would be a good bet. Unfortunately, this didn't work that good. 1. The "calibration" was difficult and thus the advantage feature was used by only very few people. 2. The assumption was often misleading. Still, on some systems like those with very small OpenCL memory but decent CPUs it might be worthwhile to have non-demanding modules (like exposure, temperature ...) running on the GPU but exclude those requiring a lot of graphics memory. So let's 1. remove the advantage feature 2. introduce a per-device entry in the conf database '***device***_nocl This can hold a comma-separated list of module->so names. Any module in this list will be executed on the CPU instead of OpenCL. It would be tempting, to allow users to toggle OpenCL processing on/off in the module header, but that would only make sense for systems with a single OpenCL device. Some minor code cleanup when reading/writing per-device conf, simplifications rwvb
1 parent fc677e6 commit c356afe

6 files changed

Lines changed: 46 additions & 184 deletions

File tree

src/common/guided_filter.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -484,10 +484,8 @@ static int _guided_filter_cl_impl(int devid,
484484
const gboolean tiling = num_tiles > 1;
485485

486486
// When should we avoid internal tiling and thus use CPU fallback code?
487-
// Lets use advantage hint if provided or assume OpenCL is 10 times faster
488-
const float hint = darktable.opencl->dev[devid].advantage;
489-
const float advantage = hint > 1.0f ? 1.0f / hint : 0.1f;
490-
const gboolean possible = ((float)valid_rows / (float)tile_height) > advantage;
487+
// Lets assume OpenCL is 10 times faster
488+
const gboolean possible = ((float)valid_rows / (float)tile_height) > 0.1f;
491489

492490
if(tiling || (darktable.unmuted & DT_DEBUG_VERBOSE))
493491
dt_print(DT_DEBUG_PIPE | DT_DEBUG_TILING,

src/common/opencl.c

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -359,26 +359,33 @@ static void _opencl_write_device_config(const int devid)
359359

360360
gchar key[256] = { 0 };
361361
gchar dat[512] = { 0 };
362-
g_snprintf(key, 254, "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
363-
g_snprintf(dat, 510, "%i %i %i %i %i %.3f %.3f",
362+
g_snprintf(key, sizeof(key), "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
363+
g_snprintf(dat, sizeof(dat), "%i %i %i %i %i %.3f %.3f",
364364
cl->dev[devid].micro_nap,
365365
cl->dev[devid].pinned_memory,
366366

367367
// this used to define the number of slots, now a bool and using DT_OPENCL_EVENTS if true
368368
cl->dev[devid].use_events ? 1 : 0,
369369
cl->dev[devid].asyncmode,
370370
cl->dev[devid].disabled,
371-
cl->dev[devid].advantage,
371+
0.0f,
372372
cl->dev[devid].unified_fraction);
373373
dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
374-
"\n[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
374+
"[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
375+
dt_conf_set_string(key, dat);
376+
377+
// write per device list of modules that should not use OpenCL
378+
g_snprintf(key, sizeof(key), "%s%s_nocl", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
379+
g_snprintf(dat, sizeof(dat), "%s", cl->dev[devid].avoid ? cl->dev[devid].avoid : "");
380+
dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
381+
"[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
375382
dt_conf_set_string(key, dat);
376383

377384
// Also take care of extended device data, these are not only device
378385
// specific but also depend on the devid to support systems with two
379386
// similar cards.
380-
g_snprintf(key, 254, "%s%s_id%i", DT_CLDEVICE_HEAD, cl->dev[devid].cname, devid);
381-
g_snprintf(dat, 510, "%i", cl->dev[devid].headroom);
387+
g_snprintf(key, sizeof(key), "%s%s_id%i", DT_CLDEVICE_HEAD, cl->dev[devid].cname, devid);
388+
g_snprintf(dat, sizeof(dat), "%i", cl->dev[devid].headroom);
382389
dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
383390
"[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
384391
dt_conf_set_string(key, dat);
@@ -413,7 +420,7 @@ static gboolean _opencl_read_device_config(const int devid)
413420
dt_opencl_t *cl = darktable.opencl;
414421
dt_opencl_device_t *cldid = &cl->dev[devid];
415422
gchar key[256] = { 0 };
416-
g_snprintf(key, 254, "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
423+
g_snprintf(key, sizeof(key), "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
417424

418425
const gboolean existing_device = dt_conf_key_not_empty(key);
419426
gboolean safety_ok = TRUE;
@@ -435,7 +442,6 @@ static gboolean _opencl_read_device_config(const int devid)
435442
cldid->pinned_memory = pinned_memory ? TRUE : FALSE;
436443
cldid->asyncmode = asyncmode ? TRUE : FALSE;
437444
cldid->disabled = disabled ? TRUE : FALSE;
438-
cldid->advantage = advantage;
439445
cldid->unified_fraction = unified_fraction;
440446
}
441447

@@ -444,12 +450,14 @@ static gboolean _opencl_read_device_config(const int devid)
444450
cldid->unified_fraction = 0.25f;
445451
if((cldid->micro_nap < 0) || (cldid->micro_nap > 1000000))
446452
cldid->micro_nap = 250;
447-
if((cldid->advantage < 0.0f) || (cldid->advantage > 10000.0f))
448-
cldid->advantage = 0.0f;
453+
454+
// Also read the per-device list of modules to be avoided for OpenCL
455+
g_snprintf(key, sizeof(key), "%s%s_nocl", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
456+
cldid->avoid = dt_conf_key_not_empty(key) ? dt_conf_get_string(key) : NULL;
449457

450458
// Also take care of extended device data, these are not only device
451459
// specific but also depend on the devid
452-
g_snprintf(key, 254, "%s%s_id%i", DT_CLDEVICE_HEAD, cldid->cname, devid);
460+
g_snprintf(key, sizeof(key), "%s%s_id%i", DT_CLDEVICE_HEAD, cldid->cname, devid);
453461
if(dt_conf_key_not_empty(key))
454462
{
455463
const gchar *dat = dt_conf_get_string_const(key);
@@ -516,6 +524,7 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
516524
cl->dev[dev].cname = NULL;
517525
cl->dev[dev].options = NULL;
518526
cl->dev[dev].cflags = NULL;
527+
cl->dev[dev].avoid = NULL;
519528
cl->dev[dev].memory_in_use = 0;
520529
cl->dev[dev].peak_memory = 0;
521530
cl->dev[dev].used_available = 0;
@@ -528,7 +537,6 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
528537
cl->dev[dev].clmem_error = FALSE;
529538
cl->dev[dev].clroundup_wd = 16;
530539
cl->dev[dev].clroundup_ht = 16;
531-
cl->dev[dev].advantage = 0.0f;
532540
cl->dev[dev].use_events = TRUE;
533541
cl->dev[dev].asyncmode = FALSE;
534542
cl->dev[dev].disabled = FALSE;
@@ -853,8 +861,8 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
853861
if(cl->dev[dev].max_global_mem < (uint64_t)800ul * DT_MEGA)
854862
{
855863
dt_print_nts(DT_DEBUG_OPENCL,
856-
" *** insufficient global memory (%" PRIu64 "MB) ***\n",
857-
cl->dev[dev].max_global_mem / DT_MEGA);
864+
" *** insufficient global memory %zu MB) ***\n",
865+
(size_t)cl->dev[dev].max_global_mem / DT_MEGA);
858866
res = TRUE;
859867
cl->dev[dev].disabled |= TRUE;
860868
goto end;
@@ -875,18 +883,15 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
875883
}
876884

877885
dt_print_nts(DT_DEBUG_OPENCL,
878-
" GLOBAL MEM SIZE: %.0f MB\n",
879-
(double)cl->dev[dev].max_global_mem / (double)DT_MEGA);
886+
" GLOBAL MEM SIZE: %zu MB\n", (size_t)(cl->dev[dev].max_global_mem / DT_MEGA));
880887
dt_print_nts(DT_DEBUG_OPENCL,
881-
" MAX IMAGE ALLOC: %.0f MB\n",
882-
(double)cl->dev[dev].max_mem_alloc / (double)DT_MEGA);
888+
" MAX IMAGE ALLOC: %zu MB\n", (size_t)(cl->dev[dev].max_mem_alloc / DT_MEGA));
883889
dt_print_nts(DT_DEBUG_OPENCL,
884-
" MAX IMAGE SIZE: %zd x %zd\n",
885-
cl->dev[dev].max_image_width, cl->dev[dev].max_image_height);
890+
" MAX IMAGE SIZE: %zu x %zu\n", cl->dev[dev].max_image_width, cl->dev[dev].max_image_height);
886891
dt_print_nts(DT_DEBUG_OPENCL,
887-
" MAX CONSTANT BUFFER: %.0f KB\n", (double)cl->dev[dev].max_mem_constant / 1024.0);
892+
" MAX CONSTANT BUFFER: %zu KB\n", (size_t)(cl->dev[dev].max_mem_constant / 1024));
888893
dt_print_nts(DT_DEBUG_OPENCL,
889-
" LOCAL MEM SIZE: %zu KB\n", cl->dev[dev].local_size / 1024lu);
894+
" LOCAL MEM SIZE: %zu KB\n", (size_t)(cl->dev[dev].local_size / 1024));
890895
dt_print_nts(DT_DEBUG_OPENCL,
891896
" ADDRESS ALIGN: %d B\n", cl->dev[dev].alignsize / 8);
892897
dt_print_nts(DT_DEBUG_OPENCL,
@@ -961,10 +966,10 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
961966
" EVENTS HANDLED: %s\n", STR_YESNO(cl->dev[dev].use_events));
962967
dt_print_nts(DT_DEBUG_OPENCL,
963968
" OPENCL FAST MODE: %s\n", STR_YESNO(fastopencl));
964-
dt_print_nts(DT_DEBUG_OPENCL,
965-
" TILING ADVANTAGE: %.3f\n", cl->dev[dev].advantage);
966969
dt_print_nts(DT_DEBUG_OPENCL,
967970
" DEFAULT DEVICE: %s\n", STR_YESNO(type & CL_DEVICE_TYPE_DEFAULT));
971+
dt_print_nts(DT_DEBUG_OPENCL,
972+
" AVOIDED MODULES: %s\n", cl->dev[dev].avoid ? cl->dev[dev].avoid : "none");
968973

969974
if(cl->dev[dev].disabled)
970975
{
@@ -1221,6 +1226,7 @@ static void _cleanup_cl_device_mem(dt_opencl_t *cl, const int i)
12211226
free((void *)(cl->dev[i].cname));
12221227
free((void *)(cl->dev[i].options));
12231228
free((void *)(cl->dev[i].cflags));
1229+
g_free((void *)(cl->dev[i].avoid));
12241230
}
12251231

12261232
void dt_opencl_init(dt_opencl_t *cl,
@@ -1607,7 +1613,7 @@ void dt_opencl_init(dt_opencl_t *cl,
16071613
dt_opencl_scheduling_profile_t profile = _opencl_get_scheduling_profile();
16081614
_opencl_apply_scheduling_profile(profile);
16091615

1610-
// let's keep track on unified memory devices
1616+
// let's report unified memory per device
16111617
dt_sys_resources_t *res = &darktable.dtresources;
16121618
for(int i = 0; i < cl->num_devs; i++)
16131619
{
@@ -3598,9 +3604,9 @@ void dt_opencl_memory_statistics(int devid,
35983604
{
35993605
dt_print(DT_DEBUG_OPENCL,"[opencl memory] device '%s' id=%d: %.1fMB in use, %.1fMB available GPU mem of %.1fMB",
36003606
cl->dev[devid].fullname, devid,
3601-
(float)cl->dev[devid].memory_in_use/(1024*1024),
3602-
(float)cl->dev[devid].used_available/(1024*1024),
3603-
(float)cl->dev[devid].max_global_mem/(1024*1024));
3607+
(float)cl->dev[devid].memory_in_use / DT_MEGA,
3608+
(float)cl->dev[devid].used_available / DT_MEGA,
3609+
(float)cl->dev[devid].max_global_mem / DT_MEGA);
36043610
if(cl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
36053611
{
36063612
dt_print(DT_DEBUG_OPENCL,

src/common/opencl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ typedef struct dt_opencl_device_t
151151
const char *cname;
152152
const char *options;
153153
const char *cflags;
154+
const char *avoid;
154155
cl_int summary;
155156
size_t memory_in_use;
156157
size_t peak_memory;
@@ -211,8 +212,6 @@ typedef struct dt_opencl_device_t
211212

212213
// lets keep the vendor for runtime checks
213214
int vendor_id;
214-
215-
float advantage;
216215
} dt_opencl_device_t;
217216

218217
struct dt_bilateral_cl_global_t;

src/develop/pixelpipe_hb.c

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,6 +1694,13 @@ static void _opencl_dump_diff_pipe_pfm(dt_dev_pixelpipe_t *pipe,
16941694
dt_free_align(clin);
16951695
}
16961696
}
1697+
1698+
static inline gboolean _avoid_cl_module(const dt_dev_pixelpipe_iop_t *piece)
1699+
{
1700+
const dt_opencl_device_t *cldid = &darktable.opencl->dev[piece->pipe->devid];
1701+
return cldid->avoid && dt_str_commasubstring(cldid->avoid, piece->module->op);
1702+
}
1703+
16971704
#endif
16981705

16991706
static inline gboolean _skip_piece_on_tags(const dt_dev_pixelpipe_iop_t *piece)
@@ -2140,7 +2147,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
21402147
gboolean possible_cl =
21412148
module->process_cl
21422149
&& piece->process_cl_ready
2143-
&& !(dt_pipe_is_preview(pipe) && (module->flags() & IOP_FLAGS_PREVIEW_NON_OPENCL));
2150+
&& !(dt_pipe_is_preview(pipe) && (module->flags() & IOP_FLAGS_PREVIEW_NON_OPENCL))
2151+
&& !_avoid_cl_module(piece);
21442152

21452153
const uint32_t m_bpp = MAX(in_bpp, bpp);
21462154
const size_t m_width = MAX(roi_in.width, roi_out->width);
@@ -2154,24 +2162,6 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
21542162
{
21552163
if(!_piece_may_tile(piece))
21562164
possible_cl = FALSE;
2157-
2158-
const float advantage = darktable.opencl->dev[pipe->devid].advantage;
2159-
if(possible_cl && (advantage > 0.0f))
2160-
{
2161-
const float tilemem_cl = dt_tiling_estimate_clmem(&tiling, piece,
2162-
&roi_in, roi_out, m_bpp);
2163-
const float tilemem_cpu = dt_tiling_estimate_cpumem(&tiling, piece,
2164-
&roi_in, roi_out, m_bpp);
2165-
if((tilemem_cpu * advantage) < tilemem_cl)
2166-
{
2167-
dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING,
2168-
"[dt_dev_pixelpipetiling_cl] [%s] estimates cpu"
2169-
" advantage in `%s', (dev=%i, adv=%.2f, GPU %.2f CPU %.2f)",
2170-
dt_dev_pixelpipe_type_to_str(pipe->type), module->op, pipe->devid,
2171-
advantage, tilemem_cl / 1e9, tilemem_cpu / 1e9);
2172-
possible_cl = FALSE;
2173-
}
2174-
}
21752165
}
21762166

21772167
if(possible_cl)

src/develop/tiling.c

Lines changed: 0 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,129 +1181,7 @@ void default_process_tiling(dt_iop_module_t *self,
11811181
return;
11821182
}
11831183

1184-
float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling,
1185-
const dt_dev_pixelpipe_iop_t *piece,
1186-
const dt_iop_roi_t *const roi_in,
1187-
const dt_iop_roi_t *const roi_out,
1188-
const int max_bpp)
1189-
{
1190-
const int m_dx = MAX(roi_in->width, roi_out->width);
1191-
const int m_dy = MAX(roi_in->height, roi_out->height);
1192-
if(dt_tiling_piece_fits_host_memory(piece, m_dx, m_dy, max_bpp, tiling->factor, tiling->overhead))
1193-
return (float)m_dx * m_dy * max_bpp * tiling->factor + tiling->overhead;
1194-
1195-
const float fullscale = fmaxf(roi_in->scale / roi_out->scale, sqrtf(((float)roi_in->width * roi_in->height)
1196-
/ ((float)roi_out->width * roi_out->height)));
1197-
float available = dt_get_available_pipe_mem(piece->pipe);
1198-
available = fmaxf(available - ((float)roi_out->width * roi_out->height * max_bpp)
1199-
- ((float)roi_in->width * roi_in->height * max_bpp) - tiling->overhead, 0.0f);
1200-
1201-
float singlebuffer = dt_get_singlebuffer_mem();
1202-
const float factor = fmaxf(tiling->factor, 1.0f);
1203-
const float maxbuf = fmaxf(tiling->maxbuf, 1.0f);
1204-
singlebuffer = fmaxf(available / factor, singlebuffer);
1205-
1206-
int width = MAX(roi_in->width, roi_out->width);
1207-
int height = MAX(roi_in->height, roi_out->height);
1208-
1209-
const unsigned int align = tiling->align;
1210-
if((float)width * height * max_bpp * maxbuf > singlebuffer)
1211-
{
1212-
const float scale = singlebuffer / ((float)width * height * max_bpp * maxbuf);
1213-
if(width < height && scale >= 0.333f)
1214-
height = _align_down((int)floorf(height * scale), align);
1215-
else if(height <= width && scale >= 0.333f)
1216-
width = _align_down((int)floorf(width * scale), align);
1217-
else
1218-
{
1219-
width = _align_down((int)floorf(width * sqrtf(scale)), align);
1220-
height = _align_down((int)floorf(height * sqrtf(scale)), align);
1221-
}
1222-
}
1223-
1224-
if(3 * tiling->overlap > width || 3 * tiling->overlap > height)
1225-
width = height = _align_down((int)floorf(sqrtf((float)width * height)), align);
1226-
const int overlap_in = _align_up(tiling->overlap, align);
1227-
const int overlap_out = ceilf((float)overlap_in / fullscale);
1228-
1229-
int tiles_x = 1, tiles_y = 1;
1230-
1231-
if(roi_in->width > roi_out->width)
1232-
tiles_x = (width < roi_in->width) ? ceilf((float)roi_in->width / (float)MAX(width - 2 * overlap_in, 1)) : 1;
1233-
else
1234-
tiles_x = (width < roi_out->width) ? ceilf((float)roi_out->width / (float)MAX(width - 2 * overlap_out, 1)) : 1;
1235-
1236-
if(roi_in->height > roi_out->height)
1237-
tiles_y = (height < roi_in->height) ? ceilf((float)roi_in->height / (float)MAX(height - 2 * overlap_in, 1)) : 1;
1238-
else
1239-
tiles_y = (height < roi_out->height) ? ceilf((float)roi_out->height / (float)MAX(height - 2 * overlap_out, 1)) : 1;
1240-
dt_print(DT_DEBUG_TILING, "tilex = %i, tiley = %i", tiles_x, tiles_y);
1241-
return (float)tiles_x * tiles_y * singlebuffer ;
1242-
}
1243-
12441184
#ifdef HAVE_OPENCL
1245-
float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling,
1246-
const dt_dev_pixelpipe_iop_t *piece,
1247-
const dt_iop_roi_t *const roi_in,
1248-
const dt_iop_roi_t *const roi_out,
1249-
const int max_bpp)
1250-
{
1251-
const int devid = piece->pipe->devid;
1252-
const float fullscale = fmaxf(roi_in->scale / roi_out->scale, sqrtf(((float)roi_in->width * roi_in->height)
1253-
/ ((float)roi_out->width * roi_out->height)));
1254-
const gboolean use_pinned_memory = dt_opencl_use_pinned_memory(devid);
1255-
/* If using pinned transfer on devices with dedicated GPU mem there is an additional
1256-
mem pressure as they will allocate also on device as cache for performance
1257-
*/
1258-
const float pinned_buffer_overhead = use_pinned_memory && !dt_opencl_unified_memory(devid) ? 2.0f : 0.0f;
1259-
const float pinned_buffer_slack = use_pinned_memory ? 0.85f : 1.0f;
1260-
const float available = (float)dt_opencl_get_device_available(devid);
1261-
const float factor = fmaxf(tiling->factor_cl + pinned_buffer_overhead, 1.0f);
1262-
const float singlebuffer = fminf(fmaxf((available - tiling->overhead) / factor, 0.0f),
1263-
pinned_buffer_slack * (float)(dt_opencl_get_device_memalloc(devid)));
1264-
const float maxbuf = fmaxf(tiling->maxbuf_cl, 1.0f);
1265-
1266-
int width = MIN(MAX(roi_in->width, roi_out->width), darktable.opencl->dev[devid].max_image_width);
1267-
int height = MIN(MAX(roi_in->height, roi_out->height), darktable.opencl->dev[devid].max_image_height);
1268-
1269-
const unsigned int align = _lcm(tiling->align, dt_opencl_tiling_align(devid));
1270-
1271-
if((float)width * height * max_bpp * maxbuf > singlebuffer)
1272-
{
1273-
const float scale = singlebuffer / ((float)width * height * max_bpp * maxbuf);
1274-
1275-
if(width < height && scale >= 0.333f)
1276-
height = _align_down((int)floorf(height * scale), align);
1277-
else if(height <= width && scale >= 0.333f)
1278-
width = _align_down((int)floorf(width * scale), align);
1279-
else
1280-
{
1281-
width = _align_down((int)floorf(width * sqrtf(scale)), align);
1282-
height = _align_down((int)floorf(height * sqrtf(scale)), align);
1283-
}
1284-
}
1285-
1286-
if(3 * tiling->overlap > width || 3 * tiling->overlap > height)
1287-
width = height = _align_down((int)floorf(sqrtf((float)width * height)), align);
1288-
1289-
const int overlap_in = _align_up(tiling->overlap, align);
1290-
const int overlap_out = ceilf((float)overlap_in / fullscale);
1291-
1292-
int tiles_x = 1, tiles_y = 1;
1293-
1294-
if(roi_in->width > roi_out->width)
1295-
tiles_x = (width < roi_in->width) ? ceilf((float)roi_in->width / (float)MAX(width - 2 * overlap_in, 1)) : 1;
1296-
else
1297-
tiles_x = (width < roi_out->width) ? ceilf((float)roi_out->width / (float)MAX(width - 2 * overlap_out, 1)) : 1;
1298-
1299-
if(roi_in->height > roi_out->height)
1300-
tiles_y = (height < roi_in->height) ? ceilf((float)roi_in->height / (float)MAX(height - 2 * overlap_in, 1)) : 1;
1301-
else
1302-
tiles_y = (height < roi_out->height) ? ceilf((float)roi_out->height / (float)MAX(height - 2 * overlap_out, 1)) : 1;
1303-
1304-
return (float)tiles_x * tiles_y * singlebuffer * factor;
1305-
}
1306-
13071185
/* simple tiling algorithm for roi_in == roi_out, i.e. for pixel to pixel modules/operations */
13081186
static int _default_process_tiling_cl_ptp(dt_iop_module_t *self,
13091187
dt_dev_pixelpipe_iop_t *piece,

src/develop/tiling.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,6 @@ void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t
7171
gboolean dt_tiling_piece_fits_host_memory(const struct dt_dev_pixelpipe_iop_t *piece, const size_t width, const size_t height, const unsigned bpp,
7272
const float factor, const size_t overhead);
7373

74-
float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece,
75-
const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
76-
const int max_bpp);
77-
78-
#ifdef HAVE_OPENCL
79-
float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece,
80-
const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
81-
const int max_bpp);
82-
#endif
8374
// clang-format off
8475
// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
8576
// vim: shiftwidth=2 expandtab tabstop=2 cindent

0 commit comments

Comments
 (0)