From f9c904ab963a05beeddecdaa47cedac9667732f5 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 18 Feb 2026 15:25:39 -0800 Subject: [PATCH 1/3] Add an index offset during codegen to allow arbitrary offsets when accessing buffers to avoid restrictive alignment constraints. CodeGen now adds one int32 buffer offset param for each buffer after all other scalar args The runtime packs these params into the uniform buffer for each storage buffer Crop device now computes an index offset (instead of a byte offset Copy to/from device recomputes a byte offset from this index offset --- src/CodeGen_Vulkan_Dev.cpp | 90 +++++++++++++++++++++++-- src/runtime/internal/memory_resources.h | 1 + src/runtime/vulkan.cpp | 2 + src/runtime/vulkan_memory.h | 8 +-- src/runtime/vulkan_resources.h | 85 ++++++++++++++++++----- 5 files changed, 161 insertions(+), 25 deletions(-) diff --git a/src/CodeGen_Vulkan_Dev.cpp b/src/CodeGen_Vulkan_Dev.cpp index 671f923ec183..f65a57005175 100644 --- a/src/CodeGen_Vulkan_Dev.cpp +++ b/src/CodeGen_Vulkan_Dev.cpp @@ -147,6 +147,8 @@ class CodeGen_Vulkan_Dev : public CodeGen_GPU_Dev { void store_at_scalar_index(const Store *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); void store_at_vector_index(const Store *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); + SpvId apply_storage_buffer_offset(SpvId variable_id, SpvId index_id); + SpvFactory::Components split_vector(Type type, SpvId value_id); SpvId join_vector(Type type, const SpvFactory::Components &value_components); SpvId fill_vector(Type type, SpvId value_id); @@ -237,6 +239,9 @@ class CodeGen_Vulkan_Dev : public CodeGen_GPU_Dev { using StorageAccessMap = std::unordered_map; StorageAccessMap storage_access_map; + using StorageBufferOffsetMap = std::unordered_map; + StorageBufferOffsetMap storage_buffer_offset_map; + // Defines the binding information for a specialization constant // that is exported by the module and can be overriden at runtime struct SpecializationBinding { @@ -1373,6 +1378,27 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Select *op) { builder.update_id(result_id); } +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::apply_storage_buffer_offset(SpvId variable_id, SpvId index_id) { + auto offset_map_it = storage_buffer_offset_map.find(variable_id); + if (offset_map_it == storage_buffer_offset_map.end()) { + return index_id; + } + + SpvId offset_id = offset_map_it->second; + SpvId index_type_id = builder.declare_type(Int(32)); + SpvId adjusted_index_id = builder.reserve_id(SpvResultId); + + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::apply_storage_buffer_offset(): " + << "variable_id=" << variable_id << " " + << "index_type_id=" << index_type_id << " " + << "index_id=" << index_id << " " + << "offset_id=" << offset_id << " " + << "adjusted_index_id=" << adjusted_index_id << "\n"; + + builder.append(SpvFactory::integer_add(index_type_id, adjusted_index_id, index_id, offset_id)); + return adjusted_index_id; +} + void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(const Load *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class) { debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(): " << "index_id=" << index_id << " " @@ -1392,7 +1418,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(const Load *op, S uint32_t zero = 0; SpvId src_id = SpvInvalidId; - SpvId src_index_id = index_id; + SpvId src_index_id = apply_storage_buffer_offset(variable_id, index_id); if (storage_class == SpvStorageClassUniform) { if (builder.is_struct_type(base_type_id)) { SpvId zero_id = builder.declare_constant(UInt(32), &zero); @@ -1490,7 +1516,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_scalar_index(const Store *op, S uint32_t zero = 0; SpvId dst_id = SpvInvalidId; - SpvId dst_index_id = index_id; + SpvId dst_index_id = apply_storage_buffer_offset(variable_id, index_id); SpvId ptr_type_id = builder.declare_pointer_type(storage_type, storage_class); if (storage_class == SpvStorageClassUniform) { @@ -2259,6 +2285,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::reset() { SymbolScope empty; symbol_table.swap(empty); storage_access_map.clear(); + storage_buffer_offset_map.clear(); descriptor_set_table.clear(); reset_workgroup_size(); } @@ -2598,6 +2625,14 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId builder.add_entry_point(kernel_func_id, SpvExecutionModelGLCompute, entry_point_variables); } +namespace { + +uint32_t align_offset(uint32_t offset, uint32_t alignment) { + return (offset + (alignment - 1)) & ~(alignment - 1); +} + +} // namespace + void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint32_t entry_point_index, const std::string &entry_point_name, const std::vector &args) { @@ -2621,7 +2656,9 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 // GLSL-style: each input buffer is a runtime array in a buffer struct // All other params get passed in as a single uniform block - // First, need to count scalar parameters to construct the uniform struct + // First, need to count scalar parameters and buffer parameters to construct the uniform struct + uint32_t scalar_arg_count = 0; + uint32_t buffer_arg_count = 0; SpvBuilder::StructMemberTypes param_struct_members; for (const auto &arg : args) { if (!arg.is_buffer) { @@ -2634,11 +2671,21 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 SpvId arg_type_id = builder.declare_type(arg.type); param_struct_members.push_back(arg_type_id); + scalar_arg_count++; + } else { + buffer_arg_count++; } } + // Add a buffer offset parameter for each buffer (one Int32 per buffer) + // to support crops at arbitrary index offsets. + Type offset_type = Int(32); + SpvId offset_type_id = builder.declare_type(offset_type); + param_struct_members.insert(param_struct_members.end(), size_t(buffer_arg_count), offset_type_id); + // Add a binding for a uniform buffer packed with all scalar args uint32_t binding_counter = 0; + SpvId param_pack_var_id = SpvInvalidId; if (!param_struct_members.empty()) { const std::string struct_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_struct"); @@ -2647,6 +2694,8 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 // Add a decoration describing the offset for each parameter struct member uint32_t param_member_index = 0; uint32_t param_member_offset = 0; + + // First, add decorations for each scalar arg for (const auto &arg : args) { if (!arg.is_buffer) { SpvBuilder::Literals param_offset_literals = {param_member_offset}; @@ -2656,13 +2705,24 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 } } + // Force alignment for the parameter offset (e.g. all Int32 members in Uniform blocks must be 4-byte aligned) + param_member_offset = align_offset(param_member_offset, offset_type.bytes()); + + // Next, add a decoration for the storage buffer offsets + for (uint32_t b = 0; b < buffer_arg_count; b++) { + SpvBuilder::Literals param_offset_literals = {param_member_offset}; + builder.add_struct_annotation(param_struct_type_id, param_member_index, SpvDecorationOffset, param_offset_literals); + param_member_offset += offset_type.bytes(); + param_member_index++; + } + // Add a Block decoration for the parameter pack itself builder.add_annotation(param_struct_type_id, SpvDecorationBlock); // Add a variable for the parameter pack const std::string param_pack_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_var"); SpvId param_pack_ptr_type_id = builder.declare_pointer_type(param_struct_type_id, SpvStorageClassUniform); - SpvId param_pack_var_id = builder.declare_global_variable(param_pack_var_name, param_pack_ptr_type_id, SpvStorageClassUniform); + param_pack_var_id = builder.declare_global_variable(param_pack_var_name, param_pack_ptr_type_id, SpvStorageClassUniform); // We always pass in the parameter pack as the first binding SpvBuilder::Literals binding_index = {0}; @@ -2672,7 +2732,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 descriptor_set.uniform_buffer_count++; binding_counter++; - // Declare all the args with appropriate offsets into the parameter struct + // Declare all the scalar args with appropriate offsets into the parameter struct uint32_t scalar_index = 0; for (const auto &arg : args) { if (!arg.is_buffer) { @@ -2692,6 +2752,8 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 } // Add bindings for all device buffers declared as GLSL-style buffer blocks in uniform storage + // and adjust the indices with the appropriate storage buffer offsets (to support arbitrary crops) + uint32_t buffer_index = 0; for (const auto &arg : args) { if (arg.is_buffer) { @@ -2741,6 +2803,24 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 access.storage_class = storage_class; storage_access_map[buffer_block_var_id] = access; descriptor_set.storage_buffer_count++; + + // Load the storage buffer offset for this buffer from the uniform struct + // These offsets are stored *after* all scalar args in the uniform struct + if (param_pack_var_id != SpvInvalidId) { + uint32_t buffer_offset_index_param = scalar_arg_count + buffer_index; + SpvId buffer_offset_index_param_id = builder.declare_constant(UInt(32), &buffer_offset_index_param); + SpvId index_ptr_type_id = builder.declare_pointer_type(offset_type_id, SpvStorageClassUniform); + SpvFactory::Indices buffer_offset_index_access_indices = {buffer_offset_index_param_id}; + SpvId buffer_offset_index_access_chain = builder.declare_access_chain(index_ptr_type_id, param_pack_var_id, buffer_offset_index_access_indices); + + SpvId buffer_offset_index_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(offset_type_id, buffer_offset_index_id, buffer_offset_index_access_chain)); + + // Store the mapping from the parameter defining the buffer index offset to the variable it should be applied to + storage_buffer_offset_map[buffer_block_var_id] = buffer_offset_index_id; + } + + buffer_index++; } } diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index 0be6041519a1..92d95c3a532b 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -80,6 +80,7 @@ struct MemoryRegion { void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) or a pointer to region owning allocation size_t offset = 0; //< offset from base address in block (in bytes) size_t size = 0; //< allocated size (in bytes) + uint32_t index_offset = 0; //< index offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) MemoryRange range; //< optional range (e.g. for handling crops, etc) bool dedicated = false; //< flag indicating whether allocation is one dedicated resource (or split/shared into other resources) bool is_owner = true; //< flag indicating whether allocation is owned by this region, in which case handle is a native handle. Otherwise handle points to owning region of alloction. diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 36fc3047ddfb..7398b36feb10 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -489,6 +489,7 @@ WEAK int halide_vulkan_copy_to_device(void *user_context, halide_buffer_t *halid copy_helper.dst = (uint64_t)(device_buffer); uint64_t src_offset = copy_helper.src_begin; uint64_t dst_offset = copy_helper.dst_begin + device_region->range.head_offset; + dst_offset += device_region->index_offset * halide_buffer->type.bytes(); // enqueue the copy operation, using the allocated buffers error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, @@ -657,6 +658,7 @@ WEAK int halide_vulkan_copy_to_host(void *user_context, halide_buffer_t *halide_ copy_helper.src = (uint64_t)(device_buffer); copy_helper.dst = (uint64_t)(staging_buffer); uint64_t src_offset = copy_helper.src_begin + device_region->range.head_offset; + src_offset += device_region->index_offset * halide_buffer->type.bytes(); uint64_t dst_offset = copy_helper.dst_begin; // enqueue the copy operation, using the allocated buffers diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 9b32de9a15c0..ecf89ef9e074 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -69,7 +69,7 @@ class VulkanMemoryAllocator { void *map(void *user_context, MemoryRegion *region); int unmap(void *user_context, MemoryRegion *region); - MemoryRegion *create_crop(void *user_context, MemoryRegion *region, uint64_t offset); + MemoryRegion *create_crop(void *user_context, MemoryRegion *region, uint32_t index_offset); int destroy_crop(void *user_context, MemoryRegion *region); MemoryRegion *owner_of(void *user_context, MemoryRegion *region); @@ -352,7 +352,7 @@ int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { return halide_error_code_success; } -MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, uint64_t offset) { +MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, uint32_t index_offset) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Cropping region (" << "user_context=" << user_context << " " @@ -361,7 +361,7 @@ MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegio << "region=" << (void *)(region) << " " << "region_size=" << (uint32_t)region->size << " " << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (int64_t)offset << ") ...\n"; + << "index_offset=" << (int64_t)index_offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to crop region! Invalid device handle!\n"; @@ -401,7 +401,7 @@ MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegio // point the handle to the owner of the allocated region, and update the head offset memory_region->is_owner = false; memory_region->handle = (void *)owner; - memory_region->range.head_offset = owner->range.head_offset + offset; + memory_region->index_offset += owner->index_offset + index_offset; return memory_region; } diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index c5c3c6620a9f..0143516e173b 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -284,12 +284,10 @@ bool vk_needs_scalar_uniform_buffer(void *user_context, size_t arg_sizes[], void *args[], int8_t arg_is_buffer[]) { - int i = 0; - while (arg_sizes[i] > 0) { - if (!arg_is_buffer[i]) { - return true; - } - i++; + + // if any args exist, we need at least 1x uniform buffer + if (arg_sizes[0] > 0) { + return true; } return false; } @@ -650,14 +648,41 @@ size_t vk_estimate_scalar_uniform_buffer_size(void *user_context, size_t arg_sizes[], void *args[], int8_t arg_is_buffer[]) { - int i = 0; - int scalar_uniform_buffer_size = 0; - while (arg_sizes[i] > 0) { + size_t scalar_arg_sizes = 0; + uint32_t buffer_count = 0; + + // Add up all arg sizes for scalar params + for (size_t i = 0; arg_sizes[i] > 0; i++) { if (!arg_is_buffer[i]) { - scalar_uniform_buffer_size += arg_sizes[i]; + scalar_arg_sizes += arg_sizes[i]; + } else { + buffer_count++; + } + } + + // Add space for index offsets for storage buffers (to support arbitrary crops) + size_t buffer_offset_arg_sizes = 0; + if (buffer_count) { + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (arg_is_buffer[i]) { + buffer_offset_arg_sizes += sizeof(int32_t); + } } - i++; } + + size_t end_of_scalar_args = aligned_offset(scalar_arg_sizes, sizeof(int32_t)); + size_t scalar_uniform_buffer_size = end_of_scalar_args + buffer_offset_arg_sizes; + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_estimate_scalar_uniform_buffer_size (user_context: " << user_context << "): " + << "scalar_uniform_buffer_size: " << (uint32_t)scalar_uniform_buffer_size << ", " + << "scalar_arg_sizes: " << (uint32_t)scalar_arg_sizes << ", " + << "end_of_scalar_args: " << (uint32_t)end_of_scalar_args << ", " + << "buffer_count: " << (uint32_t)buffer_count << ", " + << "buffer_offset_arg_sizes: " << (uint32_t)buffer_offset_arg_sizes << ")\n"; +#endif + return scalar_uniform_buffer_size; } @@ -724,12 +749,37 @@ int vk_update_scalar_uniform_buffer(void *user_context, return halide_error_code_internal_error; } - // copy to the (host-visible/coherent) scalar uniform buffer + // Copy all scalar params to the (host-visible/coherent) scalar uniform buffer size_t arg_offset = 0; + size_t buffer_count = 0; for (size_t i = 0; arg_sizes[i] > 0; i++) { if (!arg_is_buffer[i]) { + halide_debug_assert(user_context, (arg_offset + arg_sizes[i]) <= region->size); memcpy(host_ptr + arg_offset, args[i], arg_sizes[i]); arg_offset += arg_sizes[i]; + } else { + buffer_count++; + } + } + + // Copy all storage buffer offsets (to support arbitrary crops) + if (buffer_count) { + + // Force alignment for the buffer offsets (e.g. all Int32 members in Uniform blocks must be 4-byte aligned) + arg_offset = aligned_offset(arg_offset, sizeof(int32_t)); + + // Copy all storage buffer offsets (to support arbitrary crops) + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (arg_is_buffer[i]) { + + // get the allocated region for the buffer + MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); + halide_debug_assert(user_context, device_region != nullptr); + int32_t index_offset = device_region->index_offset; + halide_debug_assert(user_context, (arg_offset + sizeof(int32_t)) <= region->size); + memcpy(host_ptr + arg_offset, &index_offset, sizeof(int32_t)); + arg_offset += sizeof(int32_t); + } } } @@ -1791,7 +1841,7 @@ int vk_do_multidimensional_copy(void *user_context, VkCommandBuffer command_buff int vk_device_crop_from_offset(void *user_context, const struct halide_buffer_t *src, - int64_t offset, + int64_t byte_offset, struct halide_buffer_t *dst) { VulkanContext ctx(user_context); @@ -1804,7 +1854,7 @@ int vk_device_crop_from_offset(void *user_context, uint64_t t_before = halide_current_time_ns(user_context); #endif - if (offset < 0) { + if (byte_offset < 0) { error(user_context) << "Vulkan: Invalid offset for device crop!\n"; return halide_error_code_device_crop_failed; } @@ -1816,8 +1866,11 @@ int vk_device_crop_from_offset(void *user_context, return halide_error_code_device_crop_failed; } - // create the croppeg region from the allocated region - MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, (uint64_t)offset); + // create the cropped region from the allocated region by computing a relative offset + // from the start of the region as an index based on the declared type size which + // will be passed as a shader parameter to adjust the indices during loads/stores + uint32_t index_offset = byte_offset / src->type.bytes(); + MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, index_offset); if ((cropped_region == nullptr) || (cropped_region->handle == nullptr)) { error(user_context) << "Vulkan: Failed to crop region! Unable to create memory region!\n"; return halide_error_code_device_crop_failed; From 70830d4aaa89e49ecf320880e7a853de8158cc4d Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Thu, 19 Feb 2026 12:42:46 -0800 Subject: [PATCH 2/3] Refactor cleanup to remove unused MemoryRegion.range. Adjusted relative offsets to use indexing for buffer copies. Added RegionAllocation and RegionIndexing to clarify mapping. Updated all affected interfaces. --- src/runtime/internal/memory_resources.h | 17 ++--- src/runtime/internal/region_allocator.h | 86 ++++++++++++------------- src/runtime/vulkan.cpp | 26 ++++---- src/runtime/vulkan_memory.h | 84 +++++++++++------------- src/runtime/vulkan_resources.h | 21 +++--- test/runtime/block_allocator.cpp | 8 +-- 6 files changed, 119 insertions(+), 123 deletions(-) diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index 92d95c3a532b..c24ea32aeb2d 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -69,19 +69,20 @@ struct MemoryBlock { MemoryProperties properties; //< properties for the allocated block }; -// Client-facing struct for specifying a range of a memory region (eg for crops) -struct MemoryRange { - size_t head_offset = 0; //< byte offset from start of region - size_t tail_offset = 0; //< byte offset from end of region +struct RegionAllocation { + size_t offset = 0; //< offset from base address in block (in bytes) + size_t size = 0; //< allocated size in block (in bytes) +}; + +struct RegionIndexing { + int32_t offset = 0; //< indexing offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) }; // Client-facing struct for exchanging memory region allocation requests struct MemoryRegion { void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) or a pointer to region owning allocation - size_t offset = 0; //< offset from base address in block (in bytes) - size_t size = 0; //< allocated size (in bytes) - uint32_t index_offset = 0; //< index offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) - MemoryRange range; //< optional range (e.g. for handling crops, etc) + RegionAllocation allocation; //< allocation in parent block for region + RegionIndexing indexing; //< indexing adjustments for controlling access bool dedicated = false; //< flag indicating whether allocation is one dedicated resource (or split/shared into other resources) bool is_owner = true; //< flag indicating whether allocation is owned by this region, in which case handle is a native handle. Otherwise handle points to owning region of alloction. MemoryProperties properties; //< properties for the allocated region diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index 6f39991ff79c..03001dc3761d 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -197,7 +197,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & if (can_split(block_region, region_request)) { #ifdef DEBUG_RUNTIME_INTERNAL - debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") " + debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.allocation.size) << ") " << "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)"; #endif split_block_region(user_context, block_region, region_request); @@ -257,7 +257,7 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... not available! (" << " block_region=" << (void *)region - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; @@ -277,20 +277,20 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... incompatible properties! (" << " block_region=" << (void *)region - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; } // is the adjusted size larger than the current region? - if (region_request.size > region->memory.size) { + if (region_request.size > region->memory.allocation.size) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... not enough space for adjusted size! (" << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) << " actual_size=" << (uint32_t)(region_request.size) - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; @@ -303,7 +303,7 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) << " actual_size=" << (uint32_t)(region_request.size) - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return true; // you betcha @@ -329,7 +329,7 @@ BlockRegion *RegionAllocator::find_block_region(void *user_context, const Memory debug(user_context) << "RegionAllocator: found suitable region ( " << "user_context=" << (void *)(user_context) << " " << "block_resource=" << (void *)block << " " - << "block_size=" << (uint32_t)block->memory.size << " " + << "block_size=" << (uint32_t)block->memory.allocation.size << " " << "block_reserved=" << (uint32_t)block->reserved << " " << "requested_size=" << (uint32_t)request.size << " " << "requested_is_dedicated=" << (request.dedicated ? "true" : "false") << " " @@ -395,7 +395,7 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe debug(user_context) << "RegionAllocator: Freeing unused region to coalesce (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -410,15 +410,15 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " - << "previous region (offset=" << (int32_t)prev_region->memory.offset << " size=" << (int32_t)(prev_region->memory.size) << " bytes) " - << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)!"; + << "previous region (offset=" << (int32_t)prev_region->memory.allocation.offset << " size=" << (int32_t)(prev_region->memory.allocation.size) << " bytes) " + << "into current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes)!"; #endif prev_region->next_ptr = block_region->next_ptr; if (block_region->next_ptr) { block_region->next_ptr->prev_ptr = prev_region; } - prev_region->memory.size += block_region->memory.size; + prev_region->memory.allocation.size += block_region->memory.allocation.size; destroy_block_region(user_context, block_region); block_region = prev_region; } @@ -428,15 +428,15 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " - << "next region (offset=" << (int32_t)next_region->memory.offset << " size=" << (int32_t)(next_region->memory.size) << " bytes) " - << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)"; + << "next region (offset=" << (int32_t)next_region->memory.allocation.offset << " size=" << (int32_t)(next_region->memory.allocation.size) << " bytes) " + << "into current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes)"; #endif if (next_region->next_ptr) { next_region->next_ptr->prev_ptr = block_region; } block_region->next_ptr = next_region->next_ptr; - block_region->memory.size += next_region->memory.size; + block_region->memory.allocation.size += next_region->memory.allocation.size; destroy_block_region(user_context, next_region); } @@ -444,7 +444,7 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe } bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const { - return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)); + return (block_region && (block_region->memory.allocation.size > split_request.size) && (block_region->usage_count == 0)); } BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) { @@ -454,7 +454,7 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion debug(user_context) << "RegionAllocator: Split deallocate region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block_region->block_ptr->reserved << " " << ")"; #endif @@ -465,12 +465,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion } MemoryRequest split_request = request; - split_request.size = block_region->memory.size - request.size; - split_request.offset = block_region->memory.offset + request.size; + split_request.size = block_region->memory.allocation.size - request.size; + split_request.offset = block_region->memory.allocation.offset + request.size; #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting " - << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) " + << "current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes) " << "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)"; #endif BlockRegion *next_region = block_region->next_ptr; @@ -483,7 +483,7 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion } empty_region->prev_ptr = block_region; block_region->next_ptr = empty_region; - block_region->memory.size -= empty_region->memory.size; + block_region->memory.allocation.size -= empty_region->memory.allocation.size; return empty_region; } @@ -525,8 +525,8 @@ BlockRegion *RegionAllocator::create_block_region(void *user_context, const Memo } block_region->memory.handle = nullptr; - block_region->memory.offset = region_request.offset; - block_region->memory.size = region_request.size; + block_region->memory.allocation.offset = region_request.offset; + block_region->memory.allocation.size = region_request.size; block_region->memory.properties = region_request.properties; block_region->memory.dedicated = region_request.dedicated; block_region->status = AllocationStatus::Available; @@ -538,8 +538,8 @@ BlockRegion *RegionAllocator::create_block_region(void *user_context, const Memo << "user_context=" << (void *)(user_context) << " " << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << ")"; #endif @@ -553,8 +553,8 @@ int RegionAllocator::release_block_region(void *user_context, BlockRegion *block << "block_ptr=" << ((block_region) ? ((void *)block_region->block_ptr) : nullptr) << " " << "block_region=" << (void *)block_region << " " << "usage_count=" << ((block_region) ? (uint32_t)(block_region->usage_count) : 0) << " " - << "memory_offset=" << ((block_region) ? (uint32_t)(block_region->memory.offset) : 0) << " " - << "memory_size=" << ((block_region) ? (uint32_t)(block_region->memory.size) : 0) << " " + << "memory_offset=" << ((block_region) ? (uint32_t)(block_region->memory.allocation.offset) : 0) << " " + << "memory_size=" << ((block_region) ? (uint32_t)(block_region->memory.allocation.size) : 0) << " " << "block_reserved=" << (uint32_t)(block->reserved) << ") ... "; #endif if (block_region == nullptr) { @@ -571,13 +571,13 @@ int RegionAllocator::release_block_region(void *user_context, BlockRegion *block debug(user_context) << " releasing region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " - << "block_reserved=" << (uint32_t)(block->reserved - block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " + << "block_reserved=" << (uint32_t)(block->reserved - block_region->memory.allocation.size) << " " << ")"; #endif - block->reserved -= block_region->memory.size; + block->reserved -= block_region->memory.allocation.size; } block_region->status = AllocationStatus::Available; return 0; @@ -600,8 +600,8 @@ int RegionAllocator::destroy_block_region(void *user_context, BlockRegion *block int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_region) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Allocating region (user_context=" << (void *)(user_context) - << " size=" << (int32_t)(block_region->memory.size) - << " offset=" << (int32_t)block_region->memory.offset << ")"; + << " size=" << (int32_t)(block_region->memory.allocation.size) + << " offset=" << (int32_t)block_region->memory.allocation.offset << ")"; #endif halide_abort_if_false(user_context, allocators.region.allocate != nullptr); halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available); @@ -615,8 +615,8 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r debug(user_context) << " allocating region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -627,15 +627,15 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r debug(user_context) << " re-using region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif } if (error_code == 0) { block_region->status = block_region->memory.dedicated ? AllocationStatus::Dedicated : AllocationStatus::InUse; - block->reserved += block_region->memory.size; + block->reserved += block_region->memory.allocation.size; } return error_code; } @@ -646,7 +646,7 @@ int RegionAllocator::free_block_region(void *user_context, BlockRegion *block_re << "user_context=" << (void *)(user_context) << " " << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)(block_region) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "status=" << (uint32_t)block_region->status << " " << "usage_count=" << (uint32_t)block_region->usage_count << " " << "block_reserved=" << (uint32_t)block->reserved << ")"; @@ -657,7 +657,7 @@ int RegionAllocator::free_block_region(void *user_context, BlockRegion *block_re debug(user_context) << " deallocating region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -709,13 +709,13 @@ bool RegionAllocator::collect(void *user_context) { BlockRegion *block_region = block->regions; while (block_region != nullptr) { #ifdef DEBUG_RUNTIME_INTERNAL - scanned_bytes += block_region->memory.size; + scanned_bytes += block_region->memory.allocation.size; debug(user_context) << " checking region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " << "usage_count=" << (uint32_t)(block_region->usage_count) << " " << "status=" << (uint32_t)(block_region->status) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -726,7 +726,7 @@ bool RegionAllocator::collect(void *user_context) { debug(user_context) << " collecting region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -738,7 +738,7 @@ bool RegionAllocator::collect(void *user_context) { #endif } #ifdef DEBUG_RUNTIME_INTERNAL - available_bytes += is_available(block_region) ? block_region->memory.size : 0; + available_bytes += is_available(block_region) ? block_region->memory.allocation.size : 0; #endif if (is_last_block_region(user_context, block_region)) { break; diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 7398b36feb10..df4a8453ba10 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -126,7 +126,7 @@ WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_b debug(user_context) << "Vulkan: Released memory for device region (" << "user_context: " << user_context << ", " << "buffer: " << halide_buffer << ", " - << "size_in_bytes: " << (uint64_t)device_region->size << ")\n"; + << "size_in_bytes: " << (uint64_t)device_region->allocation.size << ")\n"; uint64_t t_after = halide_current_time_ns(user_context); debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; @@ -272,7 +272,7 @@ WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { size_t size = buf->size_in_bytes(); if (buf->device) { MemoryRegion *device_region = (MemoryRegion *)(buf->device); - if (device_region->size >= size) { + if (device_region->allocation.size >= size) { debug(user_context) << "Vulkan: Requested allocation for existing device memory ... using existing buffer!\n"; return halide_error_code_success; } else { @@ -485,11 +485,12 @@ WEAK int halide_vulkan_copy_to_device(void *user_context, halide_buffer_t *halid // define the src and dst config bool from_host = true; bool to_host = false; + + uint64_t src_offset = copy_helper.src_begin; + uint64_t dst_offset = copy_helper.dst_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); + copy_helper.src = (uint64_t)(staging_buffer); copy_helper.dst = (uint64_t)(device_buffer); - uint64_t src_offset = copy_helper.src_begin; - uint64_t dst_offset = copy_helper.dst_begin + device_region->range.head_offset; - dst_offset += device_region->index_offset * halide_buffer->type.bytes(); // enqueue the copy operation, using the allocated buffers error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, @@ -655,11 +656,11 @@ WEAK int halide_vulkan_copy_to_host(void *user_context, halide_buffer_t *halide_ bool from_host = false; bool to_host = true; uint64_t copy_dst = copy_helper.dst; + uint64_t src_offset = copy_helper.src_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); + uint64_t dst_offset = copy_helper.dst_begin; + copy_helper.src = (uint64_t)(device_buffer); copy_helper.dst = (uint64_t)(staging_buffer); - uint64_t src_offset = copy_helper.src_begin + device_region->range.head_offset; - src_offset += device_region->index_offset * halide_buffer->type.bytes(); - uint64_t dst_offset = copy_helper.dst_begin; // enqueue the copy operation, using the allocated buffers int error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, @@ -936,13 +937,14 @@ WEAK int halide_vulkan_buffer_copy(void *user_context, struct halide_buffer_t *s // define the src and dst config uint64_t copy_dst = copy_helper.dst; + uint64_t src_offset = copy_helper.src_begin + (src_buffer_region->indexing.offset * src->type.bytes()); + uint64_t dst_offset = copy_helper.dst_begin + (dst_buffer_region->indexing.offset * dst->type.bytes()); + copy_helper.src = (uint64_t)(src_device_buffer); copy_helper.dst = (uint64_t)(dst_device_buffer); - uint64_t src_offset = copy_helper.src_begin + src_buffer_region->range.head_offset; - uint64_t dst_offset = copy_helper.dst_begin + dst_buffer_region->range.head_offset; - debug(user_context) << " src region=" << (void *)src_memory_region << " buffer=" << (void *)src_device_buffer << " crop_offset=" << (uint64_t)src_buffer_region->range.head_offset << " copy_offset=" << src_offset << "\n"; - debug(user_context) << " dst region=" << (void *)dst_memory_region << " buffer=" << (void *)dst_device_buffer << " crop_offset=" << (uint64_t)dst_buffer_region->range.head_offset << " copy_offset=" << dst_offset << "\n"; + debug(user_context) << " src region=" << (void *)src_memory_region << " buffer=" << (void *)src_device_buffer << " copy_offset=" << src_offset << "\n"; + debug(user_context) << " dst region=" << (void *)dst_memory_region << " buffer=" << (void *)dst_device_buffer << " copy_offset=" << dst_offset << "\n"; // enqueue the copy operation, using the allocated buffers error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index ecf89ef9e074..994a69c0a801 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -69,7 +69,7 @@ class VulkanMemoryAllocator { void *map(void *user_context, MemoryRegion *region); int unmap(void *user_context, MemoryRegion *region); - MemoryRegion *create_crop(void *user_context, MemoryRegion *region, uint32_t index_offset); + MemoryRegion *create_crop(void *user_context, MemoryRegion *region, const RegionIndexing &indexing); int destroy_crop(void *user_context, MemoryRegion *region); MemoryRegion *owner_of(void *user_context, MemoryRegion *region); @@ -254,9 +254,8 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to map memory! Invalid device handle!\n"; @@ -288,19 +287,13 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { } void *mapped_ptr = nullptr; - VkDeviceSize memory_offset = region->offset + region->range.head_offset; - VkDeviceSize memory_size = region->size - region->range.tail_offset - region->range.head_offset; - if (((double)region->size - (double)region->range.tail_offset - (double)region->range.head_offset) <= 0.0) { - error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid memory range !\n"; - return nullptr; - } + VkDeviceSize memory_offset = region->allocation.offset; + VkDeviceSize memory_size = region->allocation.size; #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: MapMemory (" << "user_context=" << user_context << "\n" - << " region_size=" << (uint32_t)region->size << "\n" - << " region_offset=" << (uint32_t)region->offset << "\n" - << " region_range.head_offset=" << (uint32_t)region->range.head_offset << "\n" - << " region_range.tail_offset=" << (uint32_t)region->range.tail_offset << "\n" + << " region_size=" << (uint32_t)region->allocation.size << "\n" + << " region_offset=" << (uint32_t)region->allocation.offset << "\n" << " memory_offset=" << (uint32_t)memory_offset << "\n" << " memory_size=" << (uint32_t)memory_size << "\n)\n"; #endif @@ -320,9 +313,8 @@ int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid device handle!\n"; @@ -352,16 +344,16 @@ int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { return halide_error_code_success; } -MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, uint32_t index_offset) { +MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, const RegionIndexing &indexing) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Cropping region (" << "user_context=" << user_context << " " << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "index_offset=" << (int64_t)index_offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << " " + << "indexing_offset=" << (int32_t)indexing.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to crop region! Invalid device handle!\n"; @@ -398,10 +390,10 @@ MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegio } memcpy(memory_region, owner, sizeof(MemoryRegion)); - // point the handle to the owner of the allocated region, and update the head offset + // point the handle to the owner of the allocated region, and update the indexing offset memory_region->is_owner = false; memory_region->handle = (void *)owner; - memory_region->index_offset += owner->index_offset + index_offset; + memory_region->indexing.offset += owner->indexing.offset + indexing.offset; return memory_region; } @@ -449,8 +441,8 @@ int VulkanMemoryAllocator::release(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Releasing region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to release region! Invalid device handle!\n"; @@ -468,8 +460,8 @@ int VulkanMemoryAllocator::reclaim(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Reclaiming region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to reclaim region! Invalid device handle!\n"; @@ -487,8 +479,8 @@ int VulkanMemoryAllocator::retain(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Retaining region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to retain region! Invalid device handle!\n"; @@ -556,7 +548,7 @@ int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size, #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements (" << "user_context=" << user_context << " " - << "size=" << (uint32_t)block->size << ", " + << "size=" << (uint32_t)size << ", " << "usage_flags=" << usage_flags << ") ... \n"; #endif VkBufferCreateInfo create_info = { @@ -998,7 +990,7 @@ int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" - << "requested_size=" << (uint32_t)region->size << ", " + << "requested_size=" << (uint32_t)request->size << ", " << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; #endif @@ -1051,7 +1043,7 @@ int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequ #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Conforming region request (" << "user_context=" << user_context << " " - << "request=" << (void *)(region) << ") ... \n"; + << "request=" << (void *)(request) << ") ... \n"; #endif if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { @@ -1098,8 +1090,8 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanRegionAllocator: Allocating region (" - << "size=" << (uint32_t)region->size << ", " - << "offset=" << (uint32_t)region->offset << ", " + << "size=" << (uint32_t)region->allocation.size << ", " + << "offset=" << (uint32_t)region->allocation.offset << ", " << "dedicated=" << (region->dedicated ? "true" : "false") << " " << "usage=" << halide_memory_usage_name(region->properties.usage) << " " << "caching=" << halide_memory_caching_name(region->properties.caching) << " " @@ -1112,7 +1104,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type nullptr, // struct extending this 0, // create flags - region->size, // buffer size (in bytes) + region->allocation.size, // buffer size (in bytes) usage_flags, // buffer usage flags VK_SHARING_MODE_EXCLUSIVE, // sharing mode 0, nullptr}; @@ -1147,16 +1139,16 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" - << "requested_size=" << (uint32_t)region->size << ", " + << "requested_size=" << (uint32_t)region->allocation.size << ", " << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; #endif - if (memory_requirements.size > region->size) { + if (memory_requirements.size > region->allocation.size) { vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); #ifdef DEBUG_RUNTIME debug(nullptr) << "VulkanMemoryAllocator: Reallocating buffer to match required size (" - << (uint64_t)region->size << " => " << (uint64_t)memory_requirements.size << " bytes) ...\n"; + << (uint64_t)region->allocation.size << " => " << (uint64_t)memory_requirements.size << " bytes) ...\n"; #endif create_info.size = memory_requirements.size; VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); @@ -1168,7 +1160,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg } #ifdef DEBUG_RUNTIME - debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; + debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->allocation.size << " bytes) ...\n"; #endif RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region); @@ -1190,7 +1182,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg } // Finally, bind buffer to the device memory - result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset); + result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->allocation.offset); if (result != VK_SUCCESS) { error(user_context) << "VulkanRegionAllocator: Failed to bind buffer!\n\t" << "vkBindBufferMemory returned: " << vk_get_error_name(result) << "\n"; @@ -1199,7 +1191,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg region->handle = (void *)buffer; region->is_owner = true; - instance->region_byte_count += region->size; + instance->region_byte_count += region->allocation.size; instance->region_count++; return halide_error_code_success; } @@ -1229,8 +1221,8 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanRegionAllocator: Deallocating region (" - << "size=" << (uint32_t)region->size << ", " - << "offset=" << (uint32_t)region->offset << ", " + << "size=" << (uint32_t)region->allocation.size << ", " + << "offset=" << (uint32_t)region->allocation.offset << ", " << "dedicated=" << (region->dedicated ? "true" : "false") << " " << "usage=" << halide_memory_usage_name(region->properties.usage) << " " << "caching=" << halide_memory_caching_name(region->properties.caching) << " " @@ -1250,7 +1242,7 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); #ifdef DEBUG_RUNTIME - debug(nullptr) << "vkDestroyBuffer: Destroyed buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; + debug(nullptr) << "vkDestroyBuffer: Destroyed buffer for device region (" << (uint64_t)region->allocation.size << " bytes) ...\n"; #endif halide_error_code_t error_code = halide_error_code_success; region->handle = nullptr; @@ -1262,8 +1254,8 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r error_code = halide_error_code_internal_error; } - if (int64_t(instance->region_byte_count) - int64_t(region->size) >= 0) { - instance->region_byte_count -= region->size; + if (int64_t(instance->region_byte_count) - int64_t(region->allocation.size) >= 0) { + instance->region_byte_count -= region->allocation.size; } else { error(user_context) << "VulkanRegionAllocator: Region byte counter invalid ... reseting to zero!\n"; instance->region_byte_count = 0; diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 0143516e173b..95d1ae8d4a9b 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -592,7 +592,8 @@ int vk_update_descriptor_set(void *user_context, if (arg_is_buffer[i]) { // get the allocated region for the buffer - MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); + halide_buffer_t *halide_buffer = (halide_buffer_t *)args[i]; + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); MemoryRegion *owner = allocator->owner_of(user_context, device_region); // retrieve the buffer from the region @@ -602,9 +603,8 @@ int vk_update_descriptor_set(void *user_context, return halide_error_code_internal_error; } - VkDeviceSize range_offset = device_region->range.head_offset; - VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset; - halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0); + VkDeviceSize range_offset = 0; + VkDeviceSize range_size = device_region->allocation.size; VkDescriptorBufferInfo device_buffer_info = { *device_buffer, // the buffer range_offset, // range offset @@ -754,7 +754,7 @@ int vk_update_scalar_uniform_buffer(void *user_context, size_t buffer_count = 0; for (size_t i = 0; arg_sizes[i] > 0; i++) { if (!arg_is_buffer[i]) { - halide_debug_assert(user_context, (arg_offset + arg_sizes[i]) <= region->size); + halide_debug_assert(user_context, (arg_offset + arg_sizes[i]) <= region->allocation.size); memcpy(host_ptr + arg_offset, args[i], arg_sizes[i]); arg_offset += arg_sizes[i]; } else { @@ -775,9 +775,9 @@ int vk_update_scalar_uniform_buffer(void *user_context, // get the allocated region for the buffer MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); halide_debug_assert(user_context, device_region != nullptr); - int32_t index_offset = device_region->index_offset; - halide_debug_assert(user_context, (arg_offset + sizeof(int32_t)) <= region->size); - memcpy(host_ptr + arg_offset, &index_offset, sizeof(int32_t)); + RegionIndexing region_indexing = device_region->indexing; + halide_debug_assert(user_context, (arg_offset + sizeof(int32_t)) <= region->allocation.size); + memcpy(host_ptr + arg_offset, &(region_indexing.offset), sizeof(int32_t)); arg_offset += sizeof(int32_t); } } @@ -1869,8 +1869,9 @@ int vk_device_crop_from_offset(void *user_context, // create the cropped region from the allocated region by computing a relative offset // from the start of the region as an index based on the declared type size which // will be passed as a shader parameter to adjust the indices during loads/stores - uint32_t index_offset = byte_offset / src->type.bytes(); - MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, index_offset); + RegionIndexing region_indexing = {}; + region_indexing.offset = byte_offset / src->type.bytes(); + MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, region_indexing); if ((cropped_region == nullptr) || (cropped_region->handle == nullptr)) { error(user_context) << "Vulkan: Failed to crop region! Unable to create memory region!\n"; return halide_error_code_device_crop_failed; diff --git a/test/runtime/block_allocator.cpp b/test/runtime/block_allocator.cpp index 26ce8066e118..94f3a31f3e6e 100644 --- a/test/runtime/block_allocator.cpp +++ b/test/runtime/block_allocator.cpp @@ -56,11 +56,11 @@ int conform_block(void *user_context, MemoryRequest *request) { int allocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)1; - allocated_region_memory += region->size; + allocated_region_memory += region->allocation.size; debug(user_context) << "Test : allocate_region (" << "region=" << (void *)(region) << " " - << "region_size=" << int32_t(region->size) << " " + << "region_size=" << int32_t(region->allocation.size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") ..."; @@ -69,11 +69,11 @@ int allocate_region(void *user_context, MemoryRegion *region) { int deallocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)0; - allocated_region_memory -= region->size; + allocated_region_memory -= region->allocation.size; debug(user_context) << "Test : deallocate_region (" << "region=" << (void *)(region) << " " - << "region_size=" << int32_t(region->size) << " " + << "region_size=" << int32_t(region->allocation.size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") ..."; From bc65d2107c736629eb1f978c028f8c1983a2466b Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Thu, 19 Feb 2026 12:56:08 -0800 Subject: [PATCH 3/3] Clang formatting pass --- src/runtime/internal/memory_resources.h | 6 +++--- src/runtime/vulkan.cpp | 4 ++-- src/runtime/vulkan_memory.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index c24ea32aeb2d..3da39bda0919 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -70,12 +70,12 @@ struct MemoryBlock { }; struct RegionAllocation { - size_t offset = 0; //< offset from base address in block (in bytes) - size_t size = 0; //< allocated size in block (in bytes) + size_t offset = 0; //< offset from base address in block (in bytes) + size_t size = 0; //< allocated size in block (in bytes) }; struct RegionIndexing { - int32_t offset = 0; //< indexing offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) + int32_t offset = 0; //< indexing offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) }; // Client-facing struct for exchanging memory region allocation requests diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index df4a8453ba10..f44b60b90e94 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -937,8 +937,8 @@ WEAK int halide_vulkan_buffer_copy(void *user_context, struct halide_buffer_t *s // define the src and dst config uint64_t copy_dst = copy_helper.dst; - uint64_t src_offset = copy_helper.src_begin + (src_buffer_region->indexing.offset * src->type.bytes()); - uint64_t dst_offset = copy_helper.dst_begin + (dst_buffer_region->indexing.offset * dst->type.bytes()); + uint64_t src_offset = copy_helper.src_begin + (src_buffer_region->indexing.offset * src->type.bytes()); + uint64_t dst_offset = copy_helper.dst_begin + (dst_buffer_region->indexing.offset * dst->type.bytes()); copy_helper.src = (uint64_t)(src_device_buffer); copy_helper.dst = (uint64_t)(dst_device_buffer); diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 994a69c0a801..a6f522294670 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -288,7 +288,7 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { void *mapped_ptr = nullptr; VkDeviceSize memory_offset = region->allocation.offset; - VkDeviceSize memory_size = region->allocation.size; + VkDeviceSize memory_size = region->allocation.size; #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: MapMemory (" << "user_context=" << user_context << "\n" @@ -1104,7 +1104,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type nullptr, // struct extending this 0, // create flags - region->allocation.size, // buffer size (in bytes) + region->allocation.size, // buffer size (in bytes) usage_flags, // buffer usage flags VK_SHARING_MODE_EXCLUSIVE, // sharing mode 0, nullptr};