File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -24,7 +24,7 @@ if(Halide_TARGET MATCHES "cuda|metal")
2424 list (APPEND _camera_pipe_autoscheduler_params
2525 autoscheduler.last_level_cache_size=10000
2626 )
27- elseif (Halide_TARGET MATCHES "opencl" )
27+ elseif (Halide_TARGET MATCHES "opencl|vulkan " )
2828 # Set last_level_cache per GPU block to an extremely small value. This
2929 # eliminates all `.compute_at` in the generated schedules, which in turn
3030 # eliminates all GPU shared memory allocations.
Original file line number Diff line number Diff line change @@ -14,12 +14,23 @@ find_package(Halide REQUIRED)
1414# Generator
1515add_halide_generator (harris.generator SOURCES harris_generator.cpp )
1616
17+ set (_harris_autoscheduler_params autoscheduler.experimental_gpu_schedule=1)
18+
19+ if (Halide_TARGET MATCHES "opencl|metal" )
20+ # Set last_level_cache per GPU block to an extremely small value. This
21+ # eliminates all `.compute_at` in the generated schedules, which in turn
22+ # eliminates all GPU shared memory allocations.
23+ list (APPEND _harris_autoscheduler_params
24+ autoscheduler.last_level_cache_size=1000
25+ )
26+ endif ()
27+
1728# Filters
1829add_halide_library (harris FROM harris.generator )
1930add_halide_library (harris_auto_schedule FROM harris.generator
2031 GENERATOR harris
2132 AUTOSCHEDULER Halide::Mullapudi2016
22- PARAMS autoscheduler.experimental_gpu_schedule=1 )
33+ PARAMS ${_harris_autoscheduler_params} )
2334
2435# Main executable
2536add_executable (harris_filter filter .cpp )
Original file line number Diff line number Diff line change @@ -24,12 +24,12 @@ if(Halide_TARGET MATCHES "cuda")
2424 list (APPEND _local_laplacian_autoscheduler_params
2525 autoscheduler.last_level_cache_size=10000
2626 )
27- elseif (Halide_TARGET MATCHES "metal|opencl" )
28- # Set last_level_cache per GPU block to an extremely small value. This
29- # eliminates all `.compute_at` in the generated schedules, which in turn
30- # eliminates all GPU shared memory allocations.
27+ elseif (Halide_TARGET MATCHES "metal|opencl|vulkan" )
28+ # The pipeline is shared GPU memory bounded. Limit the parallelism to
29+ # minimal value (=32) to cap the shared GPU memory size.
3130 list (APPEND _local_laplacian_autoscheduler_params
3231 autoscheduler.last_level_cache_size=1000
32+ autoscheduler.parallelism=32
3333 )
3434endif ()
3535
You can’t perform that action at this time.
0 commit comments