@@ -1606,23 +1606,6 @@ void dt_opencl_init(dt_opencl_t *cl,
16061606 // priorities and pixelpipe synchronization timeout
16071607 dt_opencl_scheduling_profile_t profile = _opencl_get_scheduling_profile ();
16081608 _opencl_apply_scheduling_profile (profile );
1609-
1610- // let's keep track on unified memory devices
1611- dt_sys_resources_t * res = & darktable .dtresources ;
1612- for (int i = 0 ; i < cl -> num_devs ; i ++ )
1613- {
1614- if (cl -> dev [i ].unified_memory )
1615- {
1616- const size_t reserved = MIN (cl -> dev [i ].max_global_mem , res -> total_memory * cl -> dev [i ].unified_fraction );
1617- cl -> dev [i ].max_global_mem = reserved ;
1618- cl -> dev [i ].max_mem_alloc = MIN (cl -> dev [i ].max_mem_alloc , reserved );
1619- dt_print_nts (DT_DEBUG_OPENCL ,
1620- " UNIFIED MEM SIZE: %.0f MB (%i%%) reserved for '%s' id=%d\n" ,
1621- (double )reserved / 1024.0 / 1024.0 , (int )(100.0f * cl -> dev [i ].unified_fraction ),
1622- cl -> dev [i ].cname , i );
1623- res -> total_memory -= reserved ;
1624- }
1625- }
16261609 }
16271610 else // initialization failed
16281611 {
@@ -3412,13 +3395,19 @@ void *dt_opencl_alloc_device(const int devid,
34123395 return dev ;
34133396}
34143397
3398+ static cl_ulong _opencl_get_device_memalloc (const int devid )
3399+ {
3400+ dt_opencl_t * cl = darktable .opencl ;
3401+ return MIN (cl -> dev [devid ].used_available , cl -> dev [devid ].max_mem_alloc );
3402+ }
3403+
34153404void * dt_opencl_alloc_device_buffer (const int devid ,
34163405 const size_t size )
34173406{
34183407 if (!_cldev_running (devid ))
34193408 return NULL ;
34203409 dt_opencl_t * cl = darktable .opencl ;
3421- if (cl -> dev [ devid ]. max_mem_alloc < size )
3410+ if (_opencl_get_device_memalloc ( devid ) < size )
34223411 return NULL ;
34233412 cl_int err = CL_SUCCESS ;
34243413
@@ -3443,7 +3432,7 @@ void *dt_opencl_alloc_device_buffer_with_flags(const int devid,
34433432 if (!_cldev_running (devid ))
34443433 return NULL ;
34453434 dt_opencl_t * cl = darktable .opencl ;
3446- if (cl -> dev [ devid ]. max_mem_alloc < size )
3435+ if (_opencl_get_device_memalloc ( devid ) < size )
34473436 return NULL ;
34483437
34493438 cl_int err = CL_SUCCESS ;
@@ -3610,61 +3599,12 @@ void dt_opencl_memory_statistics(int devid,
36103599 }
36113600}
36123601
3613- /* amount of graphics memory declared as available depends on max_global_mem and
3614- "resourcelevel". We garantee
3615- - a headroom of DT_OPENCL_DEFAULT_HEADROOM MB in all cases not using tuned cl
3616- - 256MB to simulate a minimum system
3617- - 2GB to simulate a reference system
3618- */
3619- void dt_opencl_check_tuning (const int devid )
3620- {
3621- dt_sys_resources_t * res = & darktable .dtresources ;
3622- dt_opencl_t * cl = darktable .opencl ;
3623- if (!_cldev_running (devid )) return ;
3624-
3625- const int level = res -> level ;
3626- const gboolean tunehead = cl -> num_devs > 1
3627- && level >= 0
3628- && !dt_gimpmode ()
3629- && dt_conf_get_bool ("opencl_tune_headroom" );
3630-
3631- cl -> dev [devid ].tunehead = tunehead ;
3632-
3633- if (level < 0 )
3634- {
3635- cl -> dev [devid ].used_available = res -> refresource [4 * (- level - 1 ) + 3 ] * DT_MEGA ;
3636- }
3637- else
3638- {
3639- const size_t allmem = cl -> dev [devid ].max_global_mem ;
3640- const size_t lowmem = 256ul * DT_MEGA ;
3641- const size_t dhead = DT_OPENCL_DEFAULT_HEADROOM * DT_MEGA ;
3642- if (cl -> dev [devid ].tunehead )
3643- {
3644- const size_t headroom = (cl -> dev [devid ].headroom ? DT_MEGA * cl -> dev [devid ].headroom : dhead )
3645- + (cl -> dev [devid ].clmem_error ? dhead : 0 );
3646- cl -> dev [devid ].used_available = allmem > headroom ? allmem - headroom : lowmem ;
3647- }
3648- else
3649- {
3650- const size_t disposable = allmem > dhead ? allmem - dhead : 0 ;
3651- const int fraction = MIN (1024 , res -> fractions [4 * res -> level + 3 ]);
3652- cl -> dev [devid ].used_available = MAX (lowmem , disposable / 1024ul * fraction );
3653- }
3654- }
3655- }
3656-
36573602cl_ulong dt_opencl_get_device_available (const int devid )
36583603{
36593604 if (!darktable .opencl -> inited || devid <= DT_DEVICE_CPU ) return 0 ;
36603605 return darktable .opencl -> dev [devid ].used_available ;
36613606}
36623607
3663- static cl_ulong _opencl_get_device_memalloc (const int devid )
3664- {
3665- return darktable .opencl -> dev [devid ].max_mem_alloc ;
3666- }
3667-
36683608cl_ulong dt_opencl_get_device_memalloc (const int devid )
36693609{
36703610 if (!darktable .opencl -> inited || devid <= DT_DEVICE_CPU ) return 0 ;
@@ -3756,6 +3696,59 @@ void dt_opencl_update_settings(void)
37563696 const char * pstr = dt_conf_get_string_const ("opencl_scheduling_profile" );
37573697 dt_print (DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE ,
37583698 "[opencl_update_settings] scheduling profile set to %s" , pstr );
3699+
3700+ dt_sys_resources_t * res = & darktable .dtresources ;
3701+ /* If we have cl devices with unified memery we should not use that part
3702+ for general dt use.
3703+ As that part might change with a different resource level we have to
3704+ fix that whenever that changes.
3705+ */
3706+ res -> cl_uni_memory = 0 ;
3707+ const int level = res -> level ;
3708+ const gboolean tunehead = cl -> num_devs > 1
3709+ && level >= 0
3710+ && !dt_gimpmode ()
3711+ && dt_conf_get_bool ("opencl_tune_headroom" );
3712+
3713+ for (int i = 0 ; i < cl -> num_devs ; i ++ )
3714+ {
3715+ cl -> dev [i ].tunehead = tunehead ;
3716+ if (level < 0 )
3717+ {
3718+ cl -> dev [i ].used_available = res -> refresource [4 * (- level - 1 ) + 3 ] * DT_MEGA ;
3719+ }
3720+ else
3721+ {
3722+ const size_t allmem = cl -> dev [i ].max_global_mem ;
3723+ const size_t lowmem = 256ul * DT_MEGA ;
3724+ const size_t dhead = DT_OPENCL_DEFAULT_HEADROOM * DT_MEGA ;
3725+ if (cl -> dev [i ].tunehead )
3726+ {
3727+ const size_t headroom = cl -> dev [i ].headroom ? DT_MEGA * cl -> dev [i ].headroom : dhead ;
3728+ cl -> dev [i ].used_available = allmem > headroom ? allmem - headroom : lowmem ;
3729+ }
3730+ else
3731+ {
3732+ const size_t disposable = allmem > dhead ? allmem - dhead : 0 ;
3733+ const int fraction = MIN (1024 , res -> fractions [4 * res -> level + 3 ]);
3734+ cl -> dev [i ].used_available = MAX (lowmem , disposable / 1024ul * fraction );
3735+ }
3736+ }
3737+
3738+ if (cl -> dev [i ].unified_memory )
3739+ {
3740+ cl -> dev [i ].used_available = MIN (cl -> dev [i ].used_available , res -> total_memory * cl -> dev [i ].unified_fraction );
3741+ res -> cl_uni_memory += cl -> dev [i ].used_available ;
3742+ }
3743+ dt_print_nts (DT_DEBUG_OPENCL ,
3744+ " AVAILABLE CLMEM SIZE: %zu MB%s%s\n" ,
3745+ (size_t )(cl -> dev [i ].used_available / DT_MEGA ),
3746+ cl -> dev [i ].tunehead ? ", tuned" : "" ,
3747+ cl -> dev [i ].pinned_memory ? ", pinned" : "" );
3748+ }
3749+ if (res -> cl_uni_memory )
3750+ dt_print_nts (DT_DEBUG_OPENCL ,
3751+ " UNIFIED SYSMEM SIZE: %zu MB\n" , (size_t )(res -> cl_uni_memory / DT_MEGA ));
37593752}
37603753
37613754/** read scheduling profile for config variables */
0 commit comments