diff --git a/src/CodeGen_Vulkan_Dev.cpp b/src/CodeGen_Vulkan_Dev.cpp index 671f923ec183..f65a57005175 100644 --- a/src/CodeGen_Vulkan_Dev.cpp +++ b/src/CodeGen_Vulkan_Dev.cpp @@ -147,6 +147,8 @@ class CodeGen_Vulkan_Dev : public CodeGen_GPU_Dev { void store_at_scalar_index(const Store *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); void store_at_vector_index(const Store *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); + SpvId apply_storage_buffer_offset(SpvId variable_id, SpvId index_id); + SpvFactory::Components split_vector(Type type, SpvId value_id); SpvId join_vector(Type type, const SpvFactory::Components &value_components); SpvId fill_vector(Type type, SpvId value_id); @@ -237,6 +239,9 @@ class CodeGen_Vulkan_Dev : public CodeGen_GPU_Dev { using StorageAccessMap = std::unordered_map; StorageAccessMap storage_access_map; + using StorageBufferOffsetMap = std::unordered_map; + StorageBufferOffsetMap storage_buffer_offset_map; + // Defines the binding information for a specialization constant // that is exported by the module and can be overriden at runtime struct SpecializationBinding { @@ -1373,6 +1378,27 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Select *op) { builder.update_id(result_id); } +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::apply_storage_buffer_offset(SpvId variable_id, SpvId index_id) { + auto offset_map_it = storage_buffer_offset_map.find(variable_id); + if (offset_map_it == storage_buffer_offset_map.end()) { + return index_id; + } + + SpvId offset_id = offset_map_it->second; + SpvId index_type_id = builder.declare_type(Int(32)); + SpvId adjusted_index_id = builder.reserve_id(SpvResultId); + + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::apply_storage_buffer_offset(): " + << "variable_id=" << variable_id << " " + << "index_type_id=" << index_type_id << " " + << "index_id=" << index_id << " " + << "offset_id=" << offset_id << " " + << "adjusted_index_id=" << adjusted_index_id << "\n"; + + builder.append(SpvFactory::integer_add(index_type_id, adjusted_index_id, index_id, offset_id)); + return adjusted_index_id; +} + void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(const Load *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class) { debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(): " << "index_id=" << index_id << " " @@ -1392,7 +1418,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(const Load *op, S uint32_t zero = 0; SpvId src_id = SpvInvalidId; - SpvId src_index_id = index_id; + SpvId src_index_id = apply_storage_buffer_offset(variable_id, index_id); if (storage_class == SpvStorageClassUniform) { if (builder.is_struct_type(base_type_id)) { SpvId zero_id = builder.declare_constant(UInt(32), &zero); @@ -1490,7 +1516,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_scalar_index(const Store *op, S uint32_t zero = 0; SpvId dst_id = SpvInvalidId; - SpvId dst_index_id = index_id; + SpvId dst_index_id = apply_storage_buffer_offset(variable_id, index_id); SpvId ptr_type_id = builder.declare_pointer_type(storage_type, storage_class); if (storage_class == SpvStorageClassUniform) { @@ -2259,6 +2285,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::reset() { SymbolScope empty; symbol_table.swap(empty); storage_access_map.clear(); + storage_buffer_offset_map.clear(); descriptor_set_table.clear(); reset_workgroup_size(); } @@ -2598,6 +2625,14 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId builder.add_entry_point(kernel_func_id, SpvExecutionModelGLCompute, entry_point_variables); } +namespace { + +uint32_t align_offset(uint32_t offset, uint32_t alignment) { + return (offset + (alignment - 1)) & ~(alignment - 1); +} + +} // namespace + void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint32_t entry_point_index, const std::string &entry_point_name, const std::vector &args) { @@ -2621,7 +2656,9 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 // GLSL-style: each input buffer is a runtime array in a buffer struct // All other params get passed in as a single uniform block - // First, need to count scalar parameters to construct the uniform struct + // First, need to count scalar parameters and buffer parameters to construct the uniform struct + uint32_t scalar_arg_count = 0; + uint32_t buffer_arg_count = 0; SpvBuilder::StructMemberTypes param_struct_members; for (const auto &arg : args) { if (!arg.is_buffer) { @@ -2634,11 +2671,21 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 SpvId arg_type_id = builder.declare_type(arg.type); param_struct_members.push_back(arg_type_id); + scalar_arg_count++; + } else { + buffer_arg_count++; } } + // Add a buffer offset parameter for each buffer (one Int32 per buffer) + // to support crops at arbitrary index offsets. + Type offset_type = Int(32); + SpvId offset_type_id = builder.declare_type(offset_type); + param_struct_members.insert(param_struct_members.end(), size_t(buffer_arg_count), offset_type_id); + // Add a binding for a uniform buffer packed with all scalar args uint32_t binding_counter = 0; + SpvId param_pack_var_id = SpvInvalidId; if (!param_struct_members.empty()) { const std::string struct_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_struct"); @@ -2647,6 +2694,8 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 // Add a decoration describing the offset for each parameter struct member uint32_t param_member_index = 0; uint32_t param_member_offset = 0; + + // First, add decorations for each scalar arg for (const auto &arg : args) { if (!arg.is_buffer) { SpvBuilder::Literals param_offset_literals = {param_member_offset}; @@ -2656,13 +2705,24 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 } } + // Force alignment for the parameter offset (e.g. all Int32 members in Uniform blocks must be 4-byte aligned) + param_member_offset = align_offset(param_member_offset, offset_type.bytes()); + + // Next, add a decoration for the storage buffer offsets + for (uint32_t b = 0; b < buffer_arg_count; b++) { + SpvBuilder::Literals param_offset_literals = {param_member_offset}; + builder.add_struct_annotation(param_struct_type_id, param_member_index, SpvDecorationOffset, param_offset_literals); + param_member_offset += offset_type.bytes(); + param_member_index++; + } + // Add a Block decoration for the parameter pack itself builder.add_annotation(param_struct_type_id, SpvDecorationBlock); // Add a variable for the parameter pack const std::string param_pack_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_var"); SpvId param_pack_ptr_type_id = builder.declare_pointer_type(param_struct_type_id, SpvStorageClassUniform); - SpvId param_pack_var_id = builder.declare_global_variable(param_pack_var_name, param_pack_ptr_type_id, SpvStorageClassUniform); + param_pack_var_id = builder.declare_global_variable(param_pack_var_name, param_pack_ptr_type_id, SpvStorageClassUniform); // We always pass in the parameter pack as the first binding SpvBuilder::Literals binding_index = {0}; @@ -2672,7 +2732,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 descriptor_set.uniform_buffer_count++; binding_counter++; - // Declare all the args with appropriate offsets into the parameter struct + // Declare all the scalar args with appropriate offsets into the parameter struct uint32_t scalar_index = 0; for (const auto &arg : args) { if (!arg.is_buffer) { @@ -2692,6 +2752,8 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 } // Add bindings for all device buffers declared as GLSL-style buffer blocks in uniform storage + // and adjust the indices with the appropriate storage buffer offsets (to support arbitrary crops) + uint32_t buffer_index = 0; for (const auto &arg : args) { if (arg.is_buffer) { @@ -2741,6 +2803,24 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint3 access.storage_class = storage_class; storage_access_map[buffer_block_var_id] = access; descriptor_set.storage_buffer_count++; + + // Load the storage buffer offset for this buffer from the uniform struct + // These offsets are stored *after* all scalar args in the uniform struct + if (param_pack_var_id != SpvInvalidId) { + uint32_t buffer_offset_index_param = scalar_arg_count + buffer_index; + SpvId buffer_offset_index_param_id = builder.declare_constant(UInt(32), &buffer_offset_index_param); + SpvId index_ptr_type_id = builder.declare_pointer_type(offset_type_id, SpvStorageClassUniform); + SpvFactory::Indices buffer_offset_index_access_indices = {buffer_offset_index_param_id}; + SpvId buffer_offset_index_access_chain = builder.declare_access_chain(index_ptr_type_id, param_pack_var_id, buffer_offset_index_access_indices); + + SpvId buffer_offset_index_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(offset_type_id, buffer_offset_index_id, buffer_offset_index_access_chain)); + + // Store the mapping from the parameter defining the buffer index offset to the variable it should be applied to + storage_buffer_offset_map[buffer_block_var_id] = buffer_offset_index_id; + } + + buffer_index++; } } diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index 0be6041519a1..3da39bda0919 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -69,18 +69,20 @@ struct MemoryBlock { MemoryProperties properties; //< properties for the allocated block }; -// Client-facing struct for specifying a range of a memory region (eg for crops) -struct MemoryRange { - size_t head_offset = 0; //< byte offset from start of region - size_t tail_offset = 0; //< byte offset from end of region +struct RegionAllocation { + size_t offset = 0; //< offset from base address in block (in bytes) + size_t size = 0; //< allocated size in block (in bytes) +}; + +struct RegionIndexing { + int32_t offset = 0; //< indexing offset from start of region (used to adjust indices in compute shader to avoid alignment constraints for arbitrary crops) }; // Client-facing struct for exchanging memory region allocation requests struct MemoryRegion { void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) or a pointer to region owning allocation - size_t offset = 0; //< offset from base address in block (in bytes) - size_t size = 0; //< allocated size (in bytes) - MemoryRange range; //< optional range (e.g. for handling crops, etc) + RegionAllocation allocation; //< allocation in parent block for region + RegionIndexing indexing; //< indexing adjustments for controlling access bool dedicated = false; //< flag indicating whether allocation is one dedicated resource (or split/shared into other resources) bool is_owner = true; //< flag indicating whether allocation is owned by this region, in which case handle is a native handle. Otherwise handle points to owning region of alloction. MemoryProperties properties; //< properties for the allocated region diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index 6f39991ff79c..03001dc3761d 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -197,7 +197,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & if (can_split(block_region, region_request)) { #ifdef DEBUG_RUNTIME_INTERNAL - debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") " + debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.allocation.size) << ") " << "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)"; #endif split_block_region(user_context, block_region, region_request); @@ -257,7 +257,7 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... not available! (" << " block_region=" << (void *)region - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; @@ -277,20 +277,20 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... incompatible properties! (" << " block_region=" << (void *)region - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; } // is the adjusted size larger than the current region? - if (region_request.size > region->memory.size) { + if (region_request.size > region->memory.allocation.size) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << " skipping block region ... not enough space for adjusted size! (" << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) << " actual_size=" << (uint32_t)(region_request.size) - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return false; @@ -303,7 +303,7 @@ bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, c << " block_region=" << (void *)region << " request_size=" << (uint32_t)(request.size) << " actual_size=" << (uint32_t)(region_request.size) - << " region_size=" << (uint32_t)(region->memory.size) + << " region_size=" << (uint32_t)(region->memory.allocation.size) << ")"; #endif return true; // you betcha @@ -329,7 +329,7 @@ BlockRegion *RegionAllocator::find_block_region(void *user_context, const Memory debug(user_context) << "RegionAllocator: found suitable region ( " << "user_context=" << (void *)(user_context) << " " << "block_resource=" << (void *)block << " " - << "block_size=" << (uint32_t)block->memory.size << " " + << "block_size=" << (uint32_t)block->memory.allocation.size << " " << "block_reserved=" << (uint32_t)block->reserved << " " << "requested_size=" << (uint32_t)request.size << " " << "requested_is_dedicated=" << (request.dedicated ? "true" : "false") << " " @@ -395,7 +395,7 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe debug(user_context) << "RegionAllocator: Freeing unused region to coalesce (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -410,15 +410,15 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " - << "previous region (offset=" << (int32_t)prev_region->memory.offset << " size=" << (int32_t)(prev_region->memory.size) << " bytes) " - << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)!"; + << "previous region (offset=" << (int32_t)prev_region->memory.allocation.offset << " size=" << (int32_t)(prev_region->memory.allocation.size) << " bytes) " + << "into current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes)!"; #endif prev_region->next_ptr = block_region->next_ptr; if (block_region->next_ptr) { block_region->next_ptr->prev_ptr = prev_region; } - prev_region->memory.size += block_region->memory.size; + prev_region->memory.allocation.size += block_region->memory.allocation.size; destroy_block_region(user_context, block_region); block_region = prev_region; } @@ -428,15 +428,15 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " - << "next region (offset=" << (int32_t)next_region->memory.offset << " size=" << (int32_t)(next_region->memory.size) << " bytes) " - << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)"; + << "next region (offset=" << (int32_t)next_region->memory.allocation.offset << " size=" << (int32_t)(next_region->memory.allocation.size) << " bytes) " + << "into current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes)"; #endif if (next_region->next_ptr) { next_region->next_ptr->prev_ptr = block_region; } block_region->next_ptr = next_region->next_ptr; - block_region->memory.size += next_region->memory.size; + block_region->memory.allocation.size += next_region->memory.allocation.size; destroy_block_region(user_context, next_region); } @@ -444,7 +444,7 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe } bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const { - return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)); + return (block_region && (block_region->memory.allocation.size > split_request.size) && (block_region->usage_count == 0)); } BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) { @@ -454,7 +454,7 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion debug(user_context) << "RegionAllocator: Split deallocate region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block_region->block_ptr->reserved << " " << ")"; #endif @@ -465,12 +465,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion } MemoryRequest split_request = request; - split_request.size = block_region->memory.size - request.size; - split_request.offset = block_region->memory.offset + request.size; + split_request.size = block_region->memory.allocation.size - request.size; + split_request.offset = block_region->memory.allocation.offset + request.size; #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting " - << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) " + << "current region (offset=" << (int32_t)block_region->memory.allocation.offset << " size=" << (int32_t)(block_region->memory.allocation.size) << " bytes) " << "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)"; #endif BlockRegion *next_region = block_region->next_ptr; @@ -483,7 +483,7 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion } empty_region->prev_ptr = block_region; block_region->next_ptr = empty_region; - block_region->memory.size -= empty_region->memory.size; + block_region->memory.allocation.size -= empty_region->memory.allocation.size; return empty_region; } @@ -525,8 +525,8 @@ BlockRegion *RegionAllocator::create_block_region(void *user_context, const Memo } block_region->memory.handle = nullptr; - block_region->memory.offset = region_request.offset; - block_region->memory.size = region_request.size; + block_region->memory.allocation.offset = region_request.offset; + block_region->memory.allocation.size = region_request.size; block_region->memory.properties = region_request.properties; block_region->memory.dedicated = region_request.dedicated; block_region->status = AllocationStatus::Available; @@ -538,8 +538,8 @@ BlockRegion *RegionAllocator::create_block_region(void *user_context, const Memo << "user_context=" << (void *)(user_context) << " " << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << ")"; #endif @@ -553,8 +553,8 @@ int RegionAllocator::release_block_region(void *user_context, BlockRegion *block << "block_ptr=" << ((block_region) ? ((void *)block_region->block_ptr) : nullptr) << " " << "block_region=" << (void *)block_region << " " << "usage_count=" << ((block_region) ? (uint32_t)(block_region->usage_count) : 0) << " " - << "memory_offset=" << ((block_region) ? (uint32_t)(block_region->memory.offset) : 0) << " " - << "memory_size=" << ((block_region) ? (uint32_t)(block_region->memory.size) : 0) << " " + << "memory_offset=" << ((block_region) ? (uint32_t)(block_region->memory.allocation.offset) : 0) << " " + << "memory_size=" << ((block_region) ? (uint32_t)(block_region->memory.allocation.size) : 0) << " " << "block_reserved=" << (uint32_t)(block->reserved) << ") ... "; #endif if (block_region == nullptr) { @@ -571,13 +571,13 @@ int RegionAllocator::release_block_region(void *user_context, BlockRegion *block debug(user_context) << " releasing region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " - << "block_reserved=" << (uint32_t)(block->reserved - block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " + << "block_reserved=" << (uint32_t)(block->reserved - block_region->memory.allocation.size) << " " << ")"; #endif - block->reserved -= block_region->memory.size; + block->reserved -= block_region->memory.allocation.size; } block_region->status = AllocationStatus::Available; return 0; @@ -600,8 +600,8 @@ int RegionAllocator::destroy_block_region(void *user_context, BlockRegion *block int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_region) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Allocating region (user_context=" << (void *)(user_context) - << " size=" << (int32_t)(block_region->memory.size) - << " offset=" << (int32_t)block_region->memory.offset << ")"; + << " size=" << (int32_t)(block_region->memory.allocation.size) + << " offset=" << (int32_t)block_region->memory.allocation.offset << ")"; #endif halide_abort_if_false(user_context, allocators.region.allocate != nullptr); halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available); @@ -615,8 +615,8 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r debug(user_context) << " allocating region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -627,15 +627,15 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r debug(user_context) << " re-using region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_offset=" << (uint32_t)(block_region->memory.allocation.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif } if (error_code == 0) { block_region->status = block_region->memory.dedicated ? AllocationStatus::Dedicated : AllocationStatus::InUse; - block->reserved += block_region->memory.size; + block->reserved += block_region->memory.allocation.size; } return error_code; } @@ -646,7 +646,7 @@ int RegionAllocator::free_block_region(void *user_context, BlockRegion *block_re << "user_context=" << (void *)(user_context) << " " << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)(block_region) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "status=" << (uint32_t)block_region->status << " " << "usage_count=" << (uint32_t)block_region->usage_count << " " << "block_reserved=" << (uint32_t)block->reserved << ")"; @@ -657,7 +657,7 @@ int RegionAllocator::free_block_region(void *user_context, BlockRegion *block_re debug(user_context) << " deallocating region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -709,13 +709,13 @@ bool RegionAllocator::collect(void *user_context) { BlockRegion *block_region = block->regions; while (block_region != nullptr) { #ifdef DEBUG_RUNTIME_INTERNAL - scanned_bytes += block_region->memory.size; + scanned_bytes += block_region->memory.allocation.size; debug(user_context) << " checking region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " << "usage_count=" << (uint32_t)(block_region->usage_count) << " " << "status=" << (uint32_t)(block_region->status) << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -726,7 +726,7 @@ bool RegionAllocator::collect(void *user_context) { debug(user_context) << " collecting region (" << "block_ptr=" << (void *)block_region->block_ptr << " " << "block_region=" << (void *)block_region << " " - << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "memory_size=" << (uint32_t)(block_region->memory.allocation.size) << " " << "block_reserved=" << (uint32_t)block->reserved << " " << ")"; #endif @@ -738,7 +738,7 @@ bool RegionAllocator::collect(void *user_context) { #endif } #ifdef DEBUG_RUNTIME_INTERNAL - available_bytes += is_available(block_region) ? block_region->memory.size : 0; + available_bytes += is_available(block_region) ? block_region->memory.allocation.size : 0; #endif if (is_last_block_region(user_context, block_region)) { break; diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 36fc3047ddfb..f44b60b90e94 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -126,7 +126,7 @@ WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_b debug(user_context) << "Vulkan: Released memory for device region (" << "user_context: " << user_context << ", " << "buffer: " << halide_buffer << ", " - << "size_in_bytes: " << (uint64_t)device_region->size << ")\n"; + << "size_in_bytes: " << (uint64_t)device_region->allocation.size << ")\n"; uint64_t t_after = halide_current_time_ns(user_context); debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; @@ -272,7 +272,7 @@ WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { size_t size = buf->size_in_bytes(); if (buf->device) { MemoryRegion *device_region = (MemoryRegion *)(buf->device); - if (device_region->size >= size) { + if (device_region->allocation.size >= size) { debug(user_context) << "Vulkan: Requested allocation for existing device memory ... using existing buffer!\n"; return halide_error_code_success; } else { @@ -485,10 +485,12 @@ WEAK int halide_vulkan_copy_to_device(void *user_context, halide_buffer_t *halid // define the src and dst config bool from_host = true; bool to_host = false; + + uint64_t src_offset = copy_helper.src_begin; + uint64_t dst_offset = copy_helper.dst_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); + copy_helper.src = (uint64_t)(staging_buffer); copy_helper.dst = (uint64_t)(device_buffer); - uint64_t src_offset = copy_helper.src_begin; - uint64_t dst_offset = copy_helper.dst_begin + device_region->range.head_offset; // enqueue the copy operation, using the allocated buffers error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, @@ -654,10 +656,11 @@ WEAK int halide_vulkan_copy_to_host(void *user_context, halide_buffer_t *halide_ bool from_host = false; bool to_host = true; uint64_t copy_dst = copy_helper.dst; + uint64_t src_offset = copy_helper.src_begin + (device_region->indexing.offset * halide_buffer->type.bytes()); + uint64_t dst_offset = copy_helper.dst_begin; + copy_helper.src = (uint64_t)(device_buffer); copy_helper.dst = (uint64_t)(staging_buffer); - uint64_t src_offset = copy_helper.src_begin + device_region->range.head_offset; - uint64_t dst_offset = copy_helper.dst_begin; // enqueue the copy operation, using the allocated buffers int error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, @@ -934,13 +937,14 @@ WEAK int halide_vulkan_buffer_copy(void *user_context, struct halide_buffer_t *s // define the src and dst config uint64_t copy_dst = copy_helper.dst; + uint64_t src_offset = copy_helper.src_begin + (src_buffer_region->indexing.offset * src->type.bytes()); + uint64_t dst_offset = copy_helper.dst_begin + (dst_buffer_region->indexing.offset * dst->type.bytes()); + copy_helper.src = (uint64_t)(src_device_buffer); copy_helper.dst = (uint64_t)(dst_device_buffer); - uint64_t src_offset = copy_helper.src_begin + src_buffer_region->range.head_offset; - uint64_t dst_offset = copy_helper.dst_begin + dst_buffer_region->range.head_offset; - debug(user_context) << " src region=" << (void *)src_memory_region << " buffer=" << (void *)src_device_buffer << " crop_offset=" << (uint64_t)src_buffer_region->range.head_offset << " copy_offset=" << src_offset << "\n"; - debug(user_context) << " dst region=" << (void *)dst_memory_region << " buffer=" << (void *)dst_device_buffer << " crop_offset=" << (uint64_t)dst_buffer_region->range.head_offset << " copy_offset=" << dst_offset << "\n"; + debug(user_context) << " src region=" << (void *)src_memory_region << " buffer=" << (void *)src_device_buffer << " copy_offset=" << src_offset << "\n"; + debug(user_context) << " dst region=" << (void *)dst_memory_region << " buffer=" << (void *)dst_device_buffer << " copy_offset=" << dst_offset << "\n"; // enqueue the copy operation, using the allocated buffers error_code = vk_do_multidimensional_copy(user_context, cmds.command_buffer, copy_helper, diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 9b32de9a15c0..a6f522294670 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -69,7 +69,7 @@ class VulkanMemoryAllocator { void *map(void *user_context, MemoryRegion *region); int unmap(void *user_context, MemoryRegion *region); - MemoryRegion *create_crop(void *user_context, MemoryRegion *region, uint64_t offset); + MemoryRegion *create_crop(void *user_context, MemoryRegion *region, const RegionIndexing &indexing); int destroy_crop(void *user_context, MemoryRegion *region); MemoryRegion *owner_of(void *user_context, MemoryRegion *region); @@ -254,9 +254,8 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to map memory! Invalid device handle!\n"; @@ -288,19 +287,13 @@ void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { } void *mapped_ptr = nullptr; - VkDeviceSize memory_offset = region->offset + region->range.head_offset; - VkDeviceSize memory_size = region->size - region->range.tail_offset - region->range.head_offset; - if (((double)region->size - (double)region->range.tail_offset - (double)region->range.head_offset) <= 0.0) { - error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid memory range !\n"; - return nullptr; - } + VkDeviceSize memory_offset = region->allocation.offset; + VkDeviceSize memory_size = region->allocation.size; #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: MapMemory (" << "user_context=" << user_context << "\n" - << " region_size=" << (uint32_t)region->size << "\n" - << " region_offset=" << (uint32_t)region->offset << "\n" - << " region_range.head_offset=" << (uint32_t)region->range.head_offset << "\n" - << " region_range.tail_offset=" << (uint32_t)region->range.tail_offset << "\n" + << " region_size=" << (uint32_t)region->allocation.size << "\n" + << " region_offset=" << (uint32_t)region->allocation.offset << "\n" << " memory_offset=" << (uint32_t)memory_offset << "\n" << " memory_size=" << (uint32_t)memory_size << "\n)\n"; #endif @@ -320,9 +313,8 @@ int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid device handle!\n"; @@ -352,16 +344,16 @@ int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { return halide_error_code_success; } -MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, uint64_t offset) { +MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, const RegionIndexing &indexing) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Cropping region (" << "user_context=" << user_context << " " << "device=" << (void *)(device) << " " << "physical_device=" << (void *)(physical_device) << " " << "region=" << (void *)(region) << " " - << "region_size=" << (uint32_t)region->size << " " - << "region_offset=" << (uint32_t)region->offset << " " - << "crop_offset=" << (int64_t)offset << ") ...\n"; + << "region_size=" << (uint32_t)region->allocation.size << " " + << "region_offset=" << (uint32_t)region->allocation.offset << " " + << "indexing_offset=" << (int32_t)indexing.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to crop region! Invalid device handle!\n"; @@ -398,10 +390,10 @@ MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegio } memcpy(memory_region, owner, sizeof(MemoryRegion)); - // point the handle to the owner of the allocated region, and update the head offset + // point the handle to the owner of the allocated region, and update the indexing offset memory_region->is_owner = false; memory_region->handle = (void *)owner; - memory_region->range.head_offset = owner->range.head_offset + offset; + memory_region->indexing.offset += owner->indexing.offset + indexing.offset; return memory_region; } @@ -449,8 +441,8 @@ int VulkanMemoryAllocator::release(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Releasing region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to release region! Invalid device handle!\n"; @@ -468,8 +460,8 @@ int VulkanMemoryAllocator::reclaim(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Reclaiming region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to reclaim region! Invalid device handle!\n"; @@ -487,8 +479,8 @@ int VulkanMemoryAllocator::retain(void *user_context, MemoryRegion *region) { debug(nullptr) << "VulkanMemoryAllocator: Retaining region (" << "user_context=" << user_context << " " << "region=" << (void *)(region) << " " - << "size=" << (uint32_t)region->size << " " - << "offset=" << (uint32_t)region->offset << ") ...\n"; + << "size=" << (uint32_t)region->allocation.size << " " + << "offset=" << (uint32_t)region->allocation.offset << ") ...\n"; #endif if ((device == nullptr) || (physical_device == nullptr)) { error(user_context) << "VulkanMemoryAllocator: Unable to retain region! Invalid device handle!\n"; @@ -556,7 +548,7 @@ int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size, #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements (" << "user_context=" << user_context << " " - << "size=" << (uint32_t)block->size << ", " + << "size=" << (uint32_t)size << ", " << "usage_flags=" << usage_flags << ") ... \n"; #endif VkBufferCreateInfo create_info = { @@ -998,7 +990,7 @@ int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" - << "requested_size=" << (uint32_t)region->size << ", " + << "requested_size=" << (uint32_t)request->size << ", " << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; #endif @@ -1051,7 +1043,7 @@ int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequ #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Conforming region request (" << "user_context=" << user_context << " " - << "request=" << (void *)(region) << ") ... \n"; + << "request=" << (void *)(request) << ") ... \n"; #endif if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { @@ -1098,8 +1090,8 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanRegionAllocator: Allocating region (" - << "size=" << (uint32_t)region->size << ", " - << "offset=" << (uint32_t)region->offset << ", " + << "size=" << (uint32_t)region->allocation.size << ", " + << "offset=" << (uint32_t)region->allocation.offset << ", " << "dedicated=" << (region->dedicated ? "true" : "false") << " " << "usage=" << halide_memory_usage_name(region->properties.usage) << " " << "caching=" << halide_memory_caching_name(region->properties.caching) << " " @@ -1112,7 +1104,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type nullptr, // struct extending this 0, // create flags - region->size, // buffer size (in bytes) + region->allocation.size, // buffer size (in bytes) usage_flags, // buffer usage flags VK_SHARING_MODE_EXCLUSIVE, // sharing mode 0, nullptr}; @@ -1147,16 +1139,16 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" - << "requested_size=" << (uint32_t)region->size << ", " + << "requested_size=" << (uint32_t)region->allocation.size << ", " << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; #endif - if (memory_requirements.size > region->size) { + if (memory_requirements.size > region->allocation.size) { vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); #ifdef DEBUG_RUNTIME debug(nullptr) << "VulkanMemoryAllocator: Reallocating buffer to match required size (" - << (uint64_t)region->size << " => " << (uint64_t)memory_requirements.size << " bytes) ...\n"; + << (uint64_t)region->allocation.size << " => " << (uint64_t)memory_requirements.size << " bytes) ...\n"; #endif create_info.size = memory_requirements.size; VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); @@ -1168,7 +1160,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg } #ifdef DEBUG_RUNTIME - debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; + debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->allocation.size << " bytes) ...\n"; #endif RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region); @@ -1190,7 +1182,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg } // Finally, bind buffer to the device memory - result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset); + result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->allocation.offset); if (result != VK_SUCCESS) { error(user_context) << "VulkanRegionAllocator: Failed to bind buffer!\n\t" << "vkBindBufferMemory returned: " << vk_get_error_name(result) << "\n"; @@ -1199,7 +1191,7 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg region->handle = (void *)buffer; region->is_owner = true; - instance->region_byte_count += region->size; + instance->region_byte_count += region->allocation.size; instance->region_count++; return halide_error_code_success; } @@ -1229,8 +1221,8 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanRegionAllocator: Deallocating region (" - << "size=" << (uint32_t)region->size << ", " - << "offset=" << (uint32_t)region->offset << ", " + << "size=" << (uint32_t)region->allocation.size << ", " + << "offset=" << (uint32_t)region->allocation.offset << ", " << "dedicated=" << (region->dedicated ? "true" : "false") << " " << "usage=" << halide_memory_usage_name(region->properties.usage) << " " << "caching=" << halide_memory_caching_name(region->properties.caching) << " " @@ -1250,7 +1242,7 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); #ifdef DEBUG_RUNTIME - debug(nullptr) << "vkDestroyBuffer: Destroyed buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; + debug(nullptr) << "vkDestroyBuffer: Destroyed buffer for device region (" << (uint64_t)region->allocation.size << " bytes) ...\n"; #endif halide_error_code_t error_code = halide_error_code_success; region->handle = nullptr; @@ -1262,8 +1254,8 @@ int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *r error_code = halide_error_code_internal_error; } - if (int64_t(instance->region_byte_count) - int64_t(region->size) >= 0) { - instance->region_byte_count -= region->size; + if (int64_t(instance->region_byte_count) - int64_t(region->allocation.size) >= 0) { + instance->region_byte_count -= region->allocation.size; } else { error(user_context) << "VulkanRegionAllocator: Region byte counter invalid ... reseting to zero!\n"; instance->region_byte_count = 0; diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index c5c3c6620a9f..95d1ae8d4a9b 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -284,12 +284,10 @@ bool vk_needs_scalar_uniform_buffer(void *user_context, size_t arg_sizes[], void *args[], int8_t arg_is_buffer[]) { - int i = 0; - while (arg_sizes[i] > 0) { - if (!arg_is_buffer[i]) { - return true; - } - i++; + + // if any args exist, we need at least 1x uniform buffer + if (arg_sizes[0] > 0) { + return true; } return false; } @@ -594,7 +592,8 @@ int vk_update_descriptor_set(void *user_context, if (arg_is_buffer[i]) { // get the allocated region for the buffer - MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); + halide_buffer_t *halide_buffer = (halide_buffer_t *)args[i]; + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); MemoryRegion *owner = allocator->owner_of(user_context, device_region); // retrieve the buffer from the region @@ -604,9 +603,8 @@ int vk_update_descriptor_set(void *user_context, return halide_error_code_internal_error; } - VkDeviceSize range_offset = device_region->range.head_offset; - VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset; - halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0); + VkDeviceSize range_offset = 0; + VkDeviceSize range_size = device_region->allocation.size; VkDescriptorBufferInfo device_buffer_info = { *device_buffer, // the buffer range_offset, // range offset @@ -650,14 +648,41 @@ size_t vk_estimate_scalar_uniform_buffer_size(void *user_context, size_t arg_sizes[], void *args[], int8_t arg_is_buffer[]) { - int i = 0; - int scalar_uniform_buffer_size = 0; - while (arg_sizes[i] > 0) { + size_t scalar_arg_sizes = 0; + uint32_t buffer_count = 0; + + // Add up all arg sizes for scalar params + for (size_t i = 0; arg_sizes[i] > 0; i++) { if (!arg_is_buffer[i]) { - scalar_uniform_buffer_size += arg_sizes[i]; + scalar_arg_sizes += arg_sizes[i]; + } else { + buffer_count++; + } + } + + // Add space for index offsets for storage buffers (to support arbitrary crops) + size_t buffer_offset_arg_sizes = 0; + if (buffer_count) { + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (arg_is_buffer[i]) { + buffer_offset_arg_sizes += sizeof(int32_t); + } } - i++; } + + size_t end_of_scalar_args = aligned_offset(scalar_arg_sizes, sizeof(int32_t)); + size_t scalar_uniform_buffer_size = end_of_scalar_args + buffer_offset_arg_sizes; + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_estimate_scalar_uniform_buffer_size (user_context: " << user_context << "): " + << "scalar_uniform_buffer_size: " << (uint32_t)scalar_uniform_buffer_size << ", " + << "scalar_arg_sizes: " << (uint32_t)scalar_arg_sizes << ", " + << "end_of_scalar_args: " << (uint32_t)end_of_scalar_args << ", " + << "buffer_count: " << (uint32_t)buffer_count << ", " + << "buffer_offset_arg_sizes: " << (uint32_t)buffer_offset_arg_sizes << ")\n"; +#endif + return scalar_uniform_buffer_size; } @@ -724,12 +749,37 @@ int vk_update_scalar_uniform_buffer(void *user_context, return halide_error_code_internal_error; } - // copy to the (host-visible/coherent) scalar uniform buffer + // Copy all scalar params to the (host-visible/coherent) scalar uniform buffer size_t arg_offset = 0; + size_t buffer_count = 0; for (size_t i = 0; arg_sizes[i] > 0; i++) { if (!arg_is_buffer[i]) { + halide_debug_assert(user_context, (arg_offset + arg_sizes[i]) <= region->allocation.size); memcpy(host_ptr + arg_offset, args[i], arg_sizes[i]); arg_offset += arg_sizes[i]; + } else { + buffer_count++; + } + } + + // Copy all storage buffer offsets (to support arbitrary crops) + if (buffer_count) { + + // Force alignment for the buffer offsets (e.g. all Int32 members in Uniform blocks must be 4-byte aligned) + arg_offset = aligned_offset(arg_offset, sizeof(int32_t)); + + // Copy all storage buffer offsets (to support arbitrary crops) + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (arg_is_buffer[i]) { + + // get the allocated region for the buffer + MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); + halide_debug_assert(user_context, device_region != nullptr); + RegionIndexing region_indexing = device_region->indexing; + halide_debug_assert(user_context, (arg_offset + sizeof(int32_t)) <= region->allocation.size); + memcpy(host_ptr + arg_offset, &(region_indexing.offset), sizeof(int32_t)); + arg_offset += sizeof(int32_t); + } } } @@ -1791,7 +1841,7 @@ int vk_do_multidimensional_copy(void *user_context, VkCommandBuffer command_buff int vk_device_crop_from_offset(void *user_context, const struct halide_buffer_t *src, - int64_t offset, + int64_t byte_offset, struct halide_buffer_t *dst) { VulkanContext ctx(user_context); @@ -1804,7 +1854,7 @@ int vk_device_crop_from_offset(void *user_context, uint64_t t_before = halide_current_time_ns(user_context); #endif - if (offset < 0) { + if (byte_offset < 0) { error(user_context) << "Vulkan: Invalid offset for device crop!\n"; return halide_error_code_device_crop_failed; } @@ -1816,8 +1866,12 @@ int vk_device_crop_from_offset(void *user_context, return halide_error_code_device_crop_failed; } - // create the croppeg region from the allocated region - MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, (uint64_t)offset); + // create the cropped region from the allocated region by computing a relative offset + // from the start of the region as an index based on the declared type size which + // will be passed as a shader parameter to adjust the indices during loads/stores + RegionIndexing region_indexing = {}; + region_indexing.offset = byte_offset / src->type.bytes(); + MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, region_indexing); if ((cropped_region == nullptr) || (cropped_region->handle == nullptr)) { error(user_context) << "Vulkan: Failed to crop region! Unable to create memory region!\n"; return halide_error_code_device_crop_failed; diff --git a/test/runtime/block_allocator.cpp b/test/runtime/block_allocator.cpp index 26ce8066e118..94f3a31f3e6e 100644 --- a/test/runtime/block_allocator.cpp +++ b/test/runtime/block_allocator.cpp @@ -56,11 +56,11 @@ int conform_block(void *user_context, MemoryRequest *request) { int allocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)1; - allocated_region_memory += region->size; + allocated_region_memory += region->allocation.size; debug(user_context) << "Test : allocate_region (" << "region=" << (void *)(region) << " " - << "region_size=" << int32_t(region->size) << " " + << "region_size=" << int32_t(region->allocation.size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") ..."; @@ -69,11 +69,11 @@ int allocate_region(void *user_context, MemoryRegion *region) { int deallocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)0; - allocated_region_memory -= region->size; + allocated_region_memory -= region->allocation.size; debug(user_context) << "Test : deallocate_region (" << "region=" << (void *)(region) << " " - << "region_size=" << int32_t(region->size) << " " + << "region_size=" << int32_t(region->allocation.size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") ...";