Skip to content

Commit 56bff19

Browse files
committed
Also estimate the last_level_cache size for Vulkan devices
1 parent b3d2f7f commit 56bff19

3 files changed

Lines changed: 17 additions & 6 deletions

File tree

apps/camera_pipe/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ if(Halide_TARGET MATCHES "cuda|metal")
2424
list(APPEND _camera_pipe_autoscheduler_params
2525
autoscheduler.last_level_cache_size=10000
2626
)
27-
elseif(Halide_TARGET MATCHES "opencl")
27+
elseif(Halide_TARGET MATCHES "opencl|vulkan")
2828
# Set last_level_cache per GPU block to an extremely small value. This
2929
# eliminates all `.compute_at` in the generated schedules, which in turn
3030
# eliminates all GPU shared memory allocations.

apps/harris/CMakeLists.txt

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,23 @@ find_package(Halide REQUIRED)
1414
# Generator
1515
add_halide_generator(harris.generator SOURCES harris_generator.cpp)
1616

17+
set(_harris_autoscheduler_params autoscheduler.experimental_gpu_schedule=1)
18+
19+
if(Halide_TARGET MATCHES "opencl|metal")
20+
# Set last_level_cache per GPU block to an extremely small value. This
21+
# eliminates all `.compute_at` in the generated schedules, which in turn
22+
# eliminates all GPU shared memory allocations.
23+
list(APPEND _harris_autoscheduler_params
24+
autoscheduler.last_level_cache_size=1000
25+
)
26+
endif()
27+
1728
# Filters
1829
add_halide_library(harris FROM harris.generator)
1930
add_halide_library(harris_auto_schedule FROM harris.generator
2031
GENERATOR harris
2132
AUTOSCHEDULER Halide::Mullapudi2016
22-
PARAMS autoscheduler.experimental_gpu_schedule=1)
33+
PARAMS ${_harris_autoscheduler_params})
2334

2435
# Main executable
2536
add_executable(harris_filter filter.cpp)

apps/local_laplacian/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ if(Halide_TARGET MATCHES "cuda")
2424
list(APPEND _local_laplacian_autoscheduler_params
2525
autoscheduler.last_level_cache_size=10000
2626
)
27-
elseif(Halide_TARGET MATCHES "metal|opencl")
28-
# Set last_level_cache per GPU block to an extremely small value. This
29-
# eliminates all `.compute_at` in the generated schedules, which in turn
30-
# eliminates all GPU shared memory allocations.
27+
elseif(Halide_TARGET MATCHES "metal|opencl|vulkan")
28+
# The pipeline is shared GPU memory bounded. Limit the parallelism to
29+
# minimal value (=32) to cap the shared GPU memory size.
3130
list(APPEND _local_laplacian_autoscheduler_params
3231
autoscheduler.last_level_cache_size=1000
32+
autoscheduler.parallelism=32
3333
)
3434
endif()
3535

0 commit comments

Comments
 (0)