diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc index 91e07bc3d4..4375b1c220 100644 --- a/antora/modules/ROOT/nav.adoc +++ b/antora/modules/ROOT/nav.adoc @@ -58,6 +58,7 @@ ** xref:samples/extensions/debug_utils/README.adoc[Debug utils] ** xref:samples/extensions/descriptor_buffer_basic/README.adoc[Descriptor buffer basic] ** xref:samples/extensions/descriptor_indexing/README.adoc[Descriptor indexing] +** xref:samples/extensions/device_fault/README.adoc[Device Fault] ** xref:samples/extensions/dynamic_line_rasterization/README.adoc[Dynamic line rasterization] ** xref:samples/extensions/dynamic_primitive_clipping/README.adoc[Dynamic primitive clipping] ** xref:samples/extensions/dynamic_rendering/README.adoc[Dynamic rendering] diff --git a/framework/vulkan_type_mapping.h b/framework/vulkan_type_mapping.h index 7431917c8d..5d8968ae79 100644 --- a/framework/vulkan_type_mapping.h +++ b/framework/vulkan_type_mapping.h @@ -97,6 +97,18 @@ struct HPPType using Type = vk::PhysicalDeviceBufferDeviceAddressFeatures; }; +template <> +struct HPPType +{ + using Type = vk::PhysicalDeviceFaultFeaturesEXT; +}; + +template <> +struct HPPType +{ + using Type = vk::PhysicalDeviceAddressBindingReportFeaturesEXT; +}; + template <> struct HPPType { diff --git a/samples/extensions/device_fault/CMakeLists.txt b/samples/extensions/device_fault/CMakeLists.txt new file mode 100644 index 0000000000..5727720776 --- /dev/null +++ b/samples/extensions/device_fault/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright (c) 2026, Arm Limited and Contributors +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 the "License"; +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) +get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) +get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +add_sample( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Arm" + NAME "Device Fault" + DESCRIPTION "Using VK_EXT_device_fault extension to get more information when VK_ERROR_DEVICE_LOST error occurs." + SHADER_FILES_GLSL + "device_fault/glsl/render.vert" + "device_fault/glsl/render.frag" + "device_fault/glsl/update_vbo.comp") diff --git a/samples/extensions/device_fault/README.adoc b/samples/extensions/device_fault/README.adoc new file mode 100644 index 0000000000..958fa57ad4 --- /dev/null +++ b/samples/extensions/device_fault/README.adoc @@ -0,0 +1,246 @@ +//// +- Copyright (c) 2026, Arm Limited and Contributors +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// += Device Fault + +ifdef::site-gen-antora[] +TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/device_fault[Khronos Vulkan samples github repository]. +endif::[] + + +== Overview + +When developing Vulkan applications, understanding and handling GPU errors is crucial. Currently, traditional graphics debugging methods do not give detailed information about GPU faults. +The VK_EXT_device_fault extension provides detailed information when ERROR_DEVICE_LOST occur, while the VK_EXT_device_address_binding_report extension helps monitor GPU memory usage by reporting +allocated and bound/unbound addresses in Vulkan application. + + +=== VK_EXT_device_fault Extension Setup + +[,cpp] +---- +REQUEST_OPTIONAL_FEATURE(gpu, + VkPhysicalDeviceFaultFeaturesEXT, + deviceFault); +---- + +This code enables the device fault feature, which allows us to query detailed information about device lost errors. +The feature must be enabled before creating the logical device. + +=== Address Binding Report Extension Setup + +[,cpp] +---- +REQUEST_OPTIONAL_FEATURE(gpu, + VkPhysicalDeviceAddressBindingReportFeaturesEXT, + reportAddressBinding); +---- + +This feature enables monitoring of GPU memory address bindings, providing insights into memory allocation. + +=== Vulkan Validation Layer Configuration + +[,cpp] +---- +add_instance_extension(VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, true); + +VkLayerSettingEXT layerSetting; +layerSetting.pLayerName = "VK_LAYER_KHRONOS_validation"; +layerSetting.pSettingName = "enables"; +layerSetting.type = VK_LAYER_SETTING_TYPE_STRING_EXT; +layerSetting.valueCount = 1; +static const char *layerEnables = "VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT"; +layerSetting.pValues = &layerEnables; +---- + +=== VK_EXT_device_fault Implementation + +The core of device fault handling is implemented in the `check_device_fault` function. +Let's break it down: + +[,cpp] +---- +void DeviceFault::check_device_fault(VkResult result) +{ + VkDevice vk_device = get_device().get_handle(); + if (result != VK_ERROR_DEVICE_LOST) { + return; + } + + VkDeviceFaultCountsEXT faultCount = { VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT }; + VkResult countResult = vkGetDeviceFaultInfoEXT(vk_device, &faultCount, nullptr); +---- + +This initial part checks if we have a VK_ERROR_DEVICE_LOST and queries the number of faults. The `VkDeviceFaultCountsEXT` structure will be filled with counts of different types of fault information. + +[,cpp] +---- +if (faultCount.addressInfoCount > 0 || faultCount.vendorInfoCount > 0) { + std::vector addressInfos(faultCount.addressInfoCount); + std::vector vendorInfos(faultCount.vendorInfoCount); + std::vector vendorBinaryData(faultCount.vendorBinarySize); + + VkDeviceFaultInfoEXT faultInfo = { VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT }; + faultInfo.pAddressInfos = addressInfos.data(); + faultInfo.pVendorInfos = vendorInfos.data(); + faultInfo.pVendorBinaryData = vendorBinaryData.data(); +---- + +This section allocates storage for the fault information. We create vectors to store address information, vendor-specific information, and any binary data provided by the vendor. + +[, cpp] +---- +VkResult faultResult = vkGetDeviceFaultInfoEXT(vk_device, &faultCount, &faultInfo); + + // Log the description and address info if it was able to catch the fault. + if (faultResult == VK_SUCCESS) { + + // Some vendors may provide additional information + LOGE("Vendor Fault Description: {}", faultInfo.pVendorInfos ? faultInfo.pVendorInfos->description : "No Vendor Information available.") + // Log each address info with decimal and hexadecimal representations + for (uint32_t i = 0; i < faultCount.addressInfoCount; i++) { + LOGE("Fault Address Info Address Type: {}", vk::to_string(static_cast(addressInfos[i].addressType))); + LOGE("Fault Address Info Reported Address -> Decimal: {} | Hex: 0x{:X}", addressInfos[i].reportedAddress, static_cast(addressInfos[i].reportedAddress)); + } + } +} +---- + +Finally, we query the detailed fault information and log it. This includes vendor-provided descriptions and information about any memory addresses involved in the fault. + +=== VK_EXT_device_address_binding_report Implementation +The address binding report functionality is implemented through a callback system. +Here's the handler function: + +[,cpp] +---- +void DeviceFault::handle_address_binding(const VkDeviceAddressBindingCallbackDataEXT& callbackData) +{ + if (!deviceBindingReportEnabled) { + return; + } + + // Report current memory operations with a label, an address and size of the memory + switch (callbackData.bindingType) { + case VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT: + LOGI("{} : Address Bound -> Decimal: {} | Hex: 0x{:X} | Size = {} Bytes", current_memory_label, std::to_string(callbackData.baseAddress), static_cast(callbackData.baseAddress), callbackData.size); + break; + + case VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT: + LOGI("{} : Address Unbound -> Decimal: {} | Hex: 0x{:X} | Size = {} Bytes", current_memory_label, std::to_string(callbackData.baseAddress), static_cast(callbackData.baseAddress), callbackData.size); + break; + + default: + LOGE("No address binding/unbinding information!"); + break; + } +} +---- +This handler processes memory binding events. It logs when memory is bound or unbound, including a label, the address and size of the memory region. +This information is invaluable when debugging memory-related issues. + +=== Debug Utils Integration +The debug utils messenger needs to be configured to receive binding reports: + +[,cpp] +---- +VkDebugUtilsMessengerCreateInfoEXT debug_utils_create_info{ + VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT +}; + +debug_utils_create_info.messageSeverity = + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + +debug_utils_create_info.messageType = + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT; + +debug_utils_create_info.pfnUserCallback = debug_callback; +debug_utils_create_info.pUserData = this; +---- + +The debug messenger is configured to receive various types of debug messages, including address binding reports. +The callback function will be called whenever relevant events occur. + +== Testing the Implementation +The implementation includes mechanisms to trigger VK_ERROR_DEVICE_LOST error in a basic way of triggering with invalid address access. + +=== Triggering VK_ERROR_DEVICE_LOST in Shader + +One of the many ways of triggering VK_ERROR_DEVICE_LOST is to access an invalid memory access in shaders. +Since this sample is based on buffer_device_address, we'll only add the extra code needed for the sake of this tutorial. + +https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/buffer_device_address + +In update_vbo.comp shader, we add below section. + +[,glsl] +---- +// 1. Accessing an invalid address. +if (registers.trigger_device_fault_invalid_address) +{ + positions.positions[1000000] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5; +} +---- + +By using UI elements, we can manually trigger VK_ERROR_DEVICE_LOST by accessing an invalid address that will trigger an MMU page fault. + +image::./images/device_fault_ui.png[DeviceFault] + +[,cpp] +---- +void DeviceFault::on_update_ui_overlay(vkb::Drawer &drawer) +{ + if (drawer.header("Settings")) + { + if (drawer.button("Trigger Device Fault with Invalid Address")) { + triggerDeviceFaultInvalidAddress = true; + } + } +} +---- + +=== Debugging Details + +Concrete out-of-bounds example: +In our setup, a compute shader writes directly into a Shader Storage Buffer Object (SSBO) via buffer device addresses. +We are using VK_EXT_device_binding_report extension in this sample to report our memory allocation and binding addresses as shown below: + +image::./images/binding_address_info.png[DeviceFault] + +One of the SSBO's was bound at `0x5FF96E4000 (decimal 412,206,645,248)` with a size of `2048 bytes`. +We deliberately issued a store to `positions.positions[1,000,000]`, where `Position` is a `std430` array of `vec2` so each element is `8 bytes`. +The shader therefore targets `base + index * 8 = base + 8,000,000 bytes (hex 0x007A1200)`. + +image::./images/failed_logcat_output.png[DeviceFault] + +The device fault report shows `0x5ff9e85200 (decimal 412,214,645,248)`, and the difference from the bound base is exactly `8,000,000 bytes`. +There is no rounding in this report. Just straightforward pointer arithmetic making it trivial to correlate the fault with the offending SSBO and confirm the write was ``~7.63 MiB` past a `2 KiB` allocation. + +Keep that in mind that there are no guarantees around the granularity of the fault addresses (and no guarantee that an out of bounds access generates a fault at all!), so this will vary across vendors. + +== Conclusion +VK_EXT_device_fault and VK_EXT_device_address_binding_report are two important extensions that help developers debugging VK_ERROR_DEVICE_LOST crashes. +While the fault reports generated by VK_EXT_device_fault on GPUs may initially seem abstract containing GPU virtual addresses and faulting instruction offsets, this information still holds meaningful clues for developers. +In cases like out-of-bounds memory access or infinite shader loops, the extension identifies the affected shader stage and provides the memory address involved in the violation. +Although it's currently not possible to map these addresses directly back to SPIR-V lines or high-level shader code, developers can still use patterns in fault addresses and resource usage history (implemented via VK_EXT_device_binding_report) to narrow down the root cause. +In this way, the extension doesn't just flag that something went wrong, it also helps developers start asking the right questions about where and why it happened. + diff --git a/samples/extensions/device_fault/device_fault.cpp b/samples/extensions/device_fault/device_fault.cpp new file mode 100644 index 0000000000..c8953f7496 --- /dev/null +++ b/samples/extensions/device_fault/device_fault.cpp @@ -0,0 +1,615 @@ +/* Copyright (c) 2026, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device_fault.h" + +DeviceFault::DeviceFault() +{ + title = "Device Fault"; + + // Need to enable buffer device address extension. + // add_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + add_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); + + // Provides support for VkAllocateMemoryFlagsInfo. Otherwise, core in Vulkan 1.1. + add_device_extension(VK_KHR_DEVICE_GROUP_EXTENSION_NAME); + + // Provides additional info when VK_DEVICE_LOST error happens. + add_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME); + + // Gives more information about GPU Virtual Address space + add_device_extension(VK_EXT_DEVICE_ADDRESS_BINDING_REPORT_EXTENSION_NAME, true); +} + +DeviceFault::~DeviceFault() +{ + if (has_device()) + { + VkDevice vk_device = get_device().get_handle(); + vkDestroyPipelineLayout(vk_device, pipelines.compute_pipeline_layout, nullptr); + vkDestroyPipelineLayout(vk_device, pipelines.graphics_pipeline_layout, nullptr); + vkDestroyPipeline(vk_device, pipelines.bindless_vbo_pipeline, nullptr); + vkDestroyPipeline(vk_device, pipelines.compute_update_pipeline, nullptr); + + for (auto &buffer : test_buffers) + { + vkDestroyBuffer(vk_device, buffer.buffer, nullptr); + vkFreeMemory(vk_device, buffer.memory, nullptr); + } + vkDestroyBuffer(vk_device, pointer_buffer.buffer, nullptr); + vkFreeMemory(vk_device, pointer_buffer.memory, nullptr); + vkDestroyDebugUtilsMessengerEXT(get_instance().get_handle(), debug_utils_messenger, nullptr); + } +} + + +void DeviceFault::build_command_buffers() +{ +} + +void DeviceFault::on_update_ui_overlay(vkb::Drawer &drawer) +{ + if (drawer.header("Settings")) + { + if (drawer.button("Trigger Device Fault with Invalid Address")) + { + triggerDeviceFaultInvalidAddress = true; + } + } +} + +void DeviceFault::check_device_fault() +{ + VkDevice vk_device = get_device().get_handle(); + + // First query just the counts + VkDeviceFaultCountsEXT faultCount = { VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT }; + VkResult countResult = vkGetDeviceFaultInfoEXT(vk_device, &faultCount, nullptr); + + if (countResult != VK_SUCCESS && countResult != VK_ERROR_DEVICE_LOST) { + LOGE("Failed to query fault counts: ", std::to_string(countResult)); + return; + } + + // Only allocate and query full fault info if we have faults + if (faultCount.addressInfoCount > 0 || faultCount.vendorInfoCount > 0) { + LOGE("Device Fault encountered."); + + std::vector addressInfos(faultCount.addressInfoCount); + std::vector vendorInfos(faultCount.vendorInfoCount); + std::vector vendorBinaryData(faultCount.vendorBinarySize); + + VkDeviceFaultInfoEXT faultInfo = { VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT }; + + faultInfo.pAddressInfos = addressInfos.data(); + faultInfo.pVendorInfos = vendorInfos.data(); + faultInfo.pVendorBinaryData = vendorBinaryData.data(); + + VkResult faultResult = vkGetDeviceFaultInfoEXT(vk_device, &faultCount, &faultInfo); + + // Log the description and address info if it was able to catch the fault. + if (faultResult == VK_SUCCESS) { + // Some vendors may provide additional information + LOGE("Vendor Fault Description: {}", faultInfo.pVendorInfos ? faultInfo.pVendorInfos->description : "No Vendor Information available.") + // Log each address info + for (uint32_t i = 0; i < faultCount.addressInfoCount; i++) { + LOGE("Fault Address Info Address Type: {}", vk::to_string(static_cast(addressInfos[i].addressType))); + LOGE("Fault Address Info Reported Address -> Decimal: {} | Hex: 0x{:X}", addressInfos[i].reportedAddress, static_cast(addressInfos[i].reportedAddress)); + } + } + } else { + LOGI("No device faults detected."); + } +} + +void DeviceFault::handle_address_binding(const VkDeviceAddressBindingCallbackDataEXT& callbackData) +{ + if (!deviceBindingReportEnabled) { + return; + } + + // Report current memory operations with a label and an address + switch (callbackData.bindingType) { + case VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT: + LOGI("{} : Address Bound -> Decimal: {} | Hex: 0x{:X} | Size = {} Bytes", current_memory_label, std::to_string(callbackData.baseAddress), static_cast(callbackData.baseAddress), callbackData.size); + break; + + case VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT: + LOGI("{} : Address Unbound -> Decimal: {} | Hex: 0x{:X} | Size = {} Bytes", current_memory_label, std::to_string(callbackData.baseAddress), static_cast(callbackData.baseAddress), callbackData.size); + break; + + default: + LOGE("No address binding/unbinding information!"); + break; + } +} + +VKAPI_ATTR VkBool32 VKAPI_CALL DeviceFault::debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, + void* pUserData) +{ + // Check if this is an address binding callback + auto* bindingData = static_cast(pCallbackData->pNext); + if (bindingData && bindingData->sType == VK_STRUCTURE_TYPE_DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT) + { + auto* app = static_cast(pUserData); + app->handle_address_binding(*bindingData); + } + return VK_FALSE; +} + +bool DeviceFault::prepare(const vkb::ApplicationOptions &options) +{ + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + + // Set up debug utils messenger with proper user data pointer + VkDebugUtilsMessengerCreateInfoEXT debug_utils_create_info{VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT}; + + debug_utils_create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; + debug_utils_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT; + + debug_utils_create_info.pfnUserCallback = debug_callback; + debug_utils_create_info.pUserData = this; // Pass 'this' pointer to access instance methods + + vkCreateDebugUtilsMessengerEXT = reinterpret_cast(vkGetInstanceProcAddr(get_instance().get_handle(), "vkCreateDebugUtilsMessengerEXT")); + + if (!vkCreateDebugUtilsMessengerEXT) + { + LOGE("Failed to get vkCreateDebugUtilsMessengerEXT function pointer"); + return false; + } + + VkResult res = (vkCreateDebugUtilsMessengerEXT( + get_instance().get_handle(), + &debug_utils_create_info, + nullptr, + &debug_utils_messenger)); + + create_vbo_buffers(); + index_buffer = create_index_buffer(); + + create_pipelines(); + + prepared = true; + return true; +} + +struct PushCompute +{ + // This type is 8 bytes, and maps to a buffer_reference in Vulkan GLSL. + VkDeviceAddress table; + float fract_time; + VkBool32 trigger_device_fault_invalid_address; +}; + +struct PushVertex +{ + glm::mat4 view_projection; + VkDeviceAddress table; +}; + +VkPipelineLayout DeviceFault::create_pipeline_layout(bool graphics) +{ + // For simplicity, we avoid any use of descriptor sets here. + // We can just push a single pointer instead, which references all the buffers we need to work with. + VkPipelineLayout layout{}; + + VkPipelineLayoutCreateInfo layout_create_info = vkb::initializers::pipeline_layout_create_info(nullptr, 0); + + const std::vector ranges = { + vkb::initializers::push_constant_range(graphics ? VK_SHADER_STAGE_VERTEX_BIT : VK_SHADER_STAGE_COMPUTE_BIT, + graphics ? sizeof(PushVertex) : sizeof(PushCompute), 0), + }; + layout_create_info.pushConstantRangeCount = static_cast(ranges.size()); + layout_create_info.pPushConstantRanges = ranges.data(); + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &layout_create_info, nullptr, &layout)); + return layout; +} + +void DeviceFault::create_compute_pipeline() +{ + pipelines.compute_pipeline_layout = create_pipeline_layout(false); + VkComputePipelineCreateInfo info = vkb::initializers::compute_pipeline_create_info(pipelines.compute_pipeline_layout); + info.stage = load_shader("device_fault", "update_vbo.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), VK_NULL_HANDLE, 1, &info, nullptr, &pipelines.compute_update_pipeline)); +} + +void DeviceFault::create_graphics_pipeline() +{ + pipelines.graphics_pipeline_layout = create_pipeline_layout(true); + VkGraphicsPipelineCreateInfo info = vkb::initializers::pipeline_create_info(pipelines.graphics_pipeline_layout, render_pass); + + // No VBOs, everything is fetched from buffer device addresses. + VkPipelineVertexInputStateCreateInfo vertex_input_state = vkb::initializers::pipeline_vertex_input_state_create_info(); + + // Going to render a simple quad mesh here with index buffer strip and primitive restart, + // otherwise nothing interesting here. + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = + vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 0, VK_TRUE); + + VkPipelineRasterizationStateCreateInfo rasterization_state = + vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE, 0); + VkPipelineColorBlendAttachmentState blend_attachment_state = + vkb::initializers::pipeline_color_blend_attachment_state(0xf, VK_FALSE); + VkPipelineColorBlendStateCreateInfo color_blend_state = + vkb::initializers::pipeline_color_blend_state_create_info(1, &blend_attachment_state); + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = + vkb::initializers::pipeline_depth_stencil_state_create_info(VK_FALSE, VK_FALSE, VK_COMPARE_OP_GREATER); + VkPipelineViewportStateCreateInfo viewport_state = + vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + VkPipelineMultisampleStateCreateInfo multisample_state = + vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT, 0); + std::vector dynamic_state_enables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_state = + vkb::initializers::pipeline_dynamic_state_create_info(dynamic_state_enables); + + info.pVertexInputState = &vertex_input_state; + info.pInputAssemblyState = &input_assembly_state; + info.pRasterizationState = &rasterization_state; + info.pColorBlendState = &color_blend_state; + info.pDepthStencilState = &depth_stencil_state; + info.pViewportState = &viewport_state; + info.pMultisampleState = &multisample_state; + info.pDynamicState = &dynamic_state; + + VkPipelineShaderStageCreateInfo stages[2]; + info.pStages = stages; + info.stageCount = 2; + + stages[0] = load_shader("device_fault", "render.vert.spv", VK_SHADER_STAGE_VERTEX_BIT); + stages[1] = load_shader("device_fault", "render.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT); + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), VK_NULL_HANDLE, 1, &info, nullptr, &pipelines.bindless_vbo_pipeline)); +} + +void DeviceFault::create_pipelines() +{ + set_memory_debug_label("Creating Compute Pipeline"); + create_compute_pipeline(); + + set_memory_debug_label("Creating Graphics Pipeline"); + create_graphics_pipeline(); +} + +// A straight forward way of creating a "tessellated" quad mesh. +// Choose a low resolution per mesh so it's more visible in the vertex shader what is happening. +static constexpr unsigned mesh_width = 16; +static constexpr unsigned mesh_height = 16; +static constexpr unsigned mesh_strips = mesh_height - 1; +static constexpr unsigned mesh_indices_per_strip = 2 * mesh_width; +static constexpr unsigned mesh_num_indices = mesh_strips * (mesh_indices_per_strip + 1); // Add one index to handle primitive restart. + +std::unique_ptr DeviceFault::create_index_buffer() +{ + constexpr size_t size = mesh_num_indices * sizeof(uint16_t); + + // Build a simple subdivided quad mesh. We can tweak the vertices later in compute to create a simple cloth-y/wave-like effect. + auto index_buffer_ = std::make_unique(get_device(), + size, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + + auto staging_buffer = vkb::core::BufferC::create_staging_buffer(get_device(), size, nullptr); + + auto *buffer = reinterpret_cast(staging_buffer.map()); + for (unsigned strip = 0; strip < mesh_strips; strip++) + { + for (unsigned x = 0; x < mesh_width; x++) + { + *buffer++ = strip * mesh_width + x; + *buffer++ = (strip + 1) * mesh_width + x; + } + *buffer++ = 0xffff; + } + + staging_buffer.flush(); + staging_buffer.unmap(); + + auto cmd = get_device().get_command_pool().request_command_buffer(); + cmd->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + cmd->copy_buffer(staging_buffer, *index_buffer_, size); + + vkb::BufferMemoryBarrier memory_barrier; + memory_barrier.src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + memory_barrier.dst_access_mask = VK_ACCESS_INDEX_READ_BIT; + memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + cmd->buffer_memory_barrier(*index_buffer_, 0, VK_WHOLE_SIZE, memory_barrier); + cmd->end(); + + auto const &graphicsQueue = get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT, 0); + graphicsQueue.submit(*cmd, VK_NULL_HANDLE); + graphicsQueue.wait_idle(); + return index_buffer_; +} + +void DeviceFault::create_vbo_buffers() +{ + test_buffers.resize(64); + for (auto &buffer : test_buffers) + { + buffer = create_vbo_buffer(); + } + + pointer_buffer = create_pointer_buffer(); +} + +DeviceFault::TestBuffer DeviceFault::create_vbo_buffer() +{ + TestBuffer buffer; + + // Here we represent each "meshlet" as its own buffer to demonstrate maximum allocation flexibility. + VkDevice device = get_device().get_handle(); + constexpr size_t mesh_size = mesh_width * mesh_height * sizeof(glm::vec2); + + // To be able to query the buffer device address, we must use the SHADER_DEVICE_ADDRESS_BIT usage flag. + // STORAGE_BUFFER is also required. + VkBufferCreateInfo create_info = vkb::initializers::buffer_create_info( + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR, mesh_size); + + VK_CHECK(vkCreateBuffer(device, &create_info, nullptr, &buffer.buffer)); + + VkMemoryAllocateInfo memory_allocation_info = vkb::initializers::memory_allocate_info(); + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(device, buffer.buffer, &memory_requirements); + + // Another change is that the memory we allocate must be marked as buffer device address capable. + VkMemoryAllocateFlagsInfoKHR flags_info{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR}; + flags_info.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + memory_allocation_info.pNext = &flags_info; + + memory_allocation_info.allocationSize = memory_requirements.size; + memory_allocation_info.memoryTypeIndex = get_device().get_gpu().get_memory_type(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + set_memory_debug_label("Allocating Vertex Buffer Object"); + VK_CHECK(vkAllocateMemory(get_device().get_handle(), &memory_allocation_info, nullptr, &buffer.memory)); + + set_memory_debug_label("Binding Vertex Buffer Object"); + VK_CHECK(vkBindBufferMemory(get_device().get_handle(), buffer.buffer, buffer.memory, 0)); + + // Once we've bound the buffer, we query the buffer device address. + // We can now place this address (or any offset of said address) into a buffer and access data as a raw pointer in shaders. + VkBufferDeviceAddressInfoKHR address_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR}; + address_info.buffer = buffer.buffer; + buffer.gpu_address = vkGetBufferDeviceAddressKHR(device, &address_info); + + // The buffer content will be computed at runtime, so don't upload anything. + + return buffer; +} + +DeviceFault::TestBuffer DeviceFault::create_pointer_buffer() +{ + // Just like create_vbo_buffer(), we create a buffer which holds other pointers. + TestBuffer buffer; + + VkDevice device = get_device().get_handle(); + size_t buffer_size = test_buffers.size() * sizeof(VkDeviceAddress); // 64 * 8 + + // We use TRANSFER_DST since we will upload to the buffer later. + VkBufferCreateInfo create_info = vkb::initializers::buffer_create_info( + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR, buffer_size); + + VK_CHECK(vkCreateBuffer(device, &create_info, nullptr, &buffer.buffer)); + + VkMemoryAllocateInfo memory_allocation_info = vkb::initializers::memory_allocate_info(); + VkMemoryAllocateFlagsInfoKHR flags_info{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR}; + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(device, buffer.buffer, &memory_requirements); + + flags_info.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + memory_allocation_info.pNext = &flags_info; + + memory_allocation_info.allocationSize = memory_requirements.size; + memory_allocation_info.memoryTypeIndex = get_device().get_gpu().get_memory_type(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + set_memory_debug_label("Allocating Pointer Buffer "); + VK_CHECK(vkAllocateMemory(get_device().get_handle(), &memory_allocation_info, nullptr, &buffer.memory)); + + set_memory_debug_label("Binding Pointer Buffer "); + VK_CHECK(vkBindBufferMemory(get_device().get_handle(), buffer.buffer, buffer.memory, 0)); + + VkBufferDeviceAddressInfoKHR address_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR}; + address_info.buffer = buffer.buffer; + buffer.gpu_address = vkGetBufferDeviceAddressKHR(device, &address_info); + + return buffer; +} + +void DeviceFault::update_pointer_buffer(VkCommandBuffer cmd) +{ + // Wait with updating the pointer buffer until previous frame's vertex shading is complete. + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, nullptr, 0, nullptr, 0, nullptr); + + std::vector pointers; + pointers.reserve(test_buffers.size()); + for (auto &test_buffer : test_buffers) + { + pointers.push_back(test_buffer.gpu_address); + } + + // Simple approach. A proxy for a compute shader which culls meshlets. + vkCmdUpdateBuffer(cmd, pointer_buffer.buffer, 0, test_buffers.size() * sizeof(VkDeviceAddress), pointers.data()); + + VkMemoryBarrier global_memory_barrier = vkb::initializers::memory_barrier(); + global_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + global_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, + 1, &global_memory_barrier, 0, nullptr, 0, nullptr); +} + +void DeviceFault::update_meshlets(VkCommandBuffer cmd) +{ + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipelines.compute_update_pipeline); + + PushCompute push_compute{}; + + // Here we push a pointer to a buffer, which holds pointers to all the VBO "meshlets". + push_compute.table = pointer_buffer.gpu_address; + + // So we can create a wave-like animation. + push_compute.fract_time = accumulated_time; + + // So we can trigger device fault using invalid address + push_compute.trigger_device_fault_invalid_address = triggerDeviceFaultInvalidAddress; + + vkCmdPushConstants(cmd, pipelines.compute_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(push_compute), &push_compute); + + // Write-after-read hazard is implicitly handled by the earlier pointer buffer update where + // we did VERTEX -> TRANSFER -> COMPUTE chain of barriers. + // Update all meshlets. + vkCmdDispatch(cmd, mesh_width / 8, mesh_height / 8, static_cast(test_buffers.size())); + + VkMemoryBarrier global_memory_barrier = vkb::initializers::memory_barrier(); + global_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + global_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 1, &global_memory_barrier, 0, nullptr, 0, nullptr); +} + +void DeviceFault::render(float delta_time) +{ + // Since it will be reporting each buffers binding information in every frame, + // we don't want to print everything for better visibility when VK_ERROR_DEVICE_LOST occurs. + deviceBindingReportEnabled = false; + + ApiVulkanSample::prepare_frame(); + VK_CHECK(vkWaitForFences(get_device().get_handle(), 1, &wait_fences[current_buffer], VK_TRUE, UINT64_MAX)); + VK_CHECK(vkResetFences(get_device().get_handle(), 1, &wait_fences[current_buffer])); + + VkViewport viewport = {0.0f, 0.0f, static_cast(width), static_cast(height), 0.0f, 1.0f}; + VkRect2D scissor = {{0, 0}, {width, height}}; + + recreate_current_command_buffer(); + auto cmd = draw_cmd_buffers[current_buffer]; + auto begin_info = vkb::initializers::command_buffer_begin_info(); + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(cmd, &begin_info); + + // First thing is to update the pointer buffer. + // We could use a compute shader here if we're doing + // GPU-driven rendering for example. + update_pointer_buffer(cmd); + + // Arbitrary value between 0 and 1 to create some animation. + accumulated_time += 0.2f * delta_time; + accumulated_time = glm::fract(accumulated_time); + + // Update VBOs through buffer_device_address. + update_meshlets(cmd); + + VkRenderPassBeginInfo render_pass_begin = vkb::initializers::render_pass_begin_info(); + render_pass_begin.renderPass = render_pass; + render_pass_begin.renderArea.extent.width = width; + render_pass_begin.renderArea.extent.height = height; + render_pass_begin.clearValueCount = 2; + VkClearValue clears[2] = {}; + clears[0].color.float32[0] = 0.033f; + clears[0].color.float32[1] = 0.073f; + clears[0].color.float32[2] = 0.133f; + render_pass_begin.pClearValues = clears; + render_pass_begin.framebuffer = framebuffers[current_buffer]; + + vkCmdBeginRenderPass(cmd, &render_pass_begin, VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.bindless_vbo_pipeline); + vkCmdSetViewport(cmd, 0, 1, &viewport); + vkCmdSetScissor(cmd, 0, 1, &scissor); + + PushVertex push_vertex{}; + + // Create an ad-hoc perspective matrix. + push_vertex.view_projection = + glm::perspective(0.5f * glm::pi(), static_cast(width) / static_cast(height), 1.0f, 100.0f) * + glm::lookAt(glm::vec3(0.0f, 0.0f, 5.0f), glm::vec3(0.0f), glm::vec3(0.0f, 1.0f, 0.0f)); + + // Push pointer to array of meshlets. + // Every instance renders its own meshlet. + push_vertex.table = pointer_buffer.gpu_address; + vkCmdPushConstants(cmd, pipelines.graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(push_vertex), &push_vertex); + vkCmdBindIndexBuffer(cmd, index_buffer->get_handle(), 0, VK_INDEX_TYPE_UINT16); + vkCmdDrawIndexed(cmd, mesh_num_indices, static_cast(test_buffers.size()), 0, 0, 0); + + draw_ui(cmd); + + vkCmdEndRenderPass(cmd); + + VK_CHECK(vkEndCommandBuffer(cmd)); + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw_cmd_buffers[current_buffer]; + + VK_CHECK(vkQueueSubmit(queue, 1, &submit_info, wait_fences[current_buffer])); + + // A bit of a hack. This is usually seated in ApiVulkanSample::submit_frame(), but that throws immediately if the device enters an error state. + // So we incorrectly call wait_idle here, so we can get the GPU in error state, and we can query it for device_fault before an exception is thrown. + VkResult error = get_device().get_queue_by_present(0).wait_idle(); + + try + { + ApiVulkanSample::submit_frame(); + } + catch (std::runtime_error const &e) + { + check_device_fault(); + std::rethrow_exception(std::current_exception()); + } + +} + +void DeviceFault::request_gpu_features(vkb::core::PhysicalDeviceC &gpu) +{ + LOGI("Requesting features from GPU."); + // Need to enable the bufferDeviceAddress feature. + REQUEST_REQUIRED_FEATURE(gpu, + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR, + bufferDeviceAddress); + + // Enable the deviceFault feature for handling hardware faults. + REQUEST_REQUIRED_FEATURE(gpu, + VkPhysicalDeviceFaultFeaturesEXT, + deviceFault); + + // Enable binding report for getting more information on GPU virtual address spaces + REQUEST_OPTIONAL_FEATURE(gpu, + VkPhysicalDeviceAddressBindingReportFeaturesEXT, + reportAddressBinding); +} + +void DeviceFault::request_instance_extensions(std::unordered_map &requested_extensions) const +{ + ApiVulkanSample::request_instance_extensions(requested_extensions); + requested_extensions[VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME] = vkb::RequestMode::Required; + requested_extensions[VK_KHR_DEVICE_GROUP_CREATION_EXTENSION_NAME] = vkb::RequestMode::Required; + requested_extensions[VK_EXT_DEBUG_UTILS_EXTENSION_NAME] = vkb::RequestMode::Required; +} + +std::unique_ptr create_device_fault() +{ + return std::make_unique(); +} + +void DeviceFault::set_memory_debug_label(std::string current_address_reporter) +{ + this->current_memory_label = std::move(current_address_reporter); +} \ No newline at end of file diff --git a/samples/extensions/device_fault/device_fault.h b/samples/extensions/device_fault/device_fault.h new file mode 100644 index 0000000000..e2fe657059 --- /dev/null +++ b/samples/extensions/device_fault/device_fault.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2026, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "api_vulkan_sample.h" +#include "vulkan/vulkan_to_string.hpp" +#include +#include + +class DeviceFault : public ApiVulkanSample +{ +public: + DeviceFault(); + ~DeviceFault(); + +private: + virtual void request_gpu_features(vkb::core::PhysicalDeviceC &gpu) override; + virtual void render(float delta_time) override; + virtual void build_command_buffers() override; + virtual void on_update_ui_overlay(vkb::Drawer &drawer) override; + virtual bool prepare(const vkb::ApplicationOptions &options) override; + + void create_pipelines(); + VkPipelineLayout create_pipeline_layout(bool graphics); + void create_compute_pipeline(); + void create_graphics_pipeline(); + void check_device_fault(); + void update_pointer_buffer(VkCommandBuffer cmd); + void update_meshlets(VkCommandBuffer cmd); + + bool triggerDeviceFaultInvalidAddress = false; + bool deviceBindingReportEnabled = true; + + VkDebugUtilsMessengerEXT debug_utils_messenger{VK_NULL_HANDLE}; + void handle_address_binding(const VkDeviceAddressBindingCallbackDataEXT& callbackData); + + static VKAPI_ATTR VkBool32 VKAPI_CALL debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, + void* pUserData); + + struct Pipelines + { + VkPipelineLayout compute_pipeline_layout{}; + VkPipelineLayout graphics_pipeline_layout{}; + VkPipeline bindless_vbo_pipeline{}; + VkPipeline compute_update_pipeline{}; + } pipelines; + + struct TestBuffer + { + VkBuffer buffer{}; + VkDeviceMemory memory{}; + VkDeviceAddress gpu_address{}; + }; + + std::vector test_buffers; + void create_vbo_buffers(); + TestBuffer create_vbo_buffer(); + TestBuffer create_pointer_buffer(); + TestBuffer pointer_buffer; + + // Simple tagging variable to follow memory address changes in pipeline + std::string current_memory_label; + void set_memory_debug_label(std::string current_address_reporter); + + std::unique_ptr create_index_buffer(); + std::unique_ptr index_buffer; + + std::default_random_engine rnd{42}; + std::uniform_real_distribution distribution{0.0f, 0.1f}; + uint32_t descriptor_offset{}; + float accumulated_time{}; + uint32_t num_indices_per_mesh{}; + +protected: + void request_instance_extensions(std::unordered_map &requested_extensions) const override; +}; + +std::unique_ptr create_device_fault(); diff --git a/samples/extensions/device_fault/images/binding_address_info.png b/samples/extensions/device_fault/images/binding_address_info.png new file mode 100644 index 0000000000..53f07629c2 Binary files /dev/null and b/samples/extensions/device_fault/images/binding_address_info.png differ diff --git a/samples/extensions/device_fault/images/device_fault_ui.png b/samples/extensions/device_fault/images/device_fault_ui.png new file mode 100644 index 0000000000..573743540b Binary files /dev/null and b/samples/extensions/device_fault/images/device_fault_ui.png differ diff --git a/samples/extensions/device_fault/images/failed_logcat_output.png b/samples/extensions/device_fault/images/failed_logcat_output.png new file mode 100644 index 0000000000..4090f76be5 Binary files /dev/null and b/samples/extensions/device_fault/images/failed_logcat_output.png differ diff --git a/samples/extensions/device_fault/images/logcat_output.png b/samples/extensions/device_fault/images/logcat_output.png new file mode 100644 index 0000000000..72c9143e50 Binary files /dev/null and b/samples/extensions/device_fault/images/logcat_output.png differ diff --git a/shaders/device_fault/glsl/render.frag b/shaders/device_fault/glsl/render.frag new file mode 100644 index 0000000000..3c556328d8 --- /dev/null +++ b/shaders/device_fault/glsl/render.frag @@ -0,0 +1,27 @@ +/* Copyright (c) 2026, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Nothing interesting, just interpolate color from vertex. +layout(location = 0) flat in vec4 in_color; +layout(location = 0) out vec4 out_color; + +void main() +{ + // DEFAULT FRAGMENT SHADER + out_color = in_color; +} diff --git a/shaders/device_fault/glsl/render.frag.spv b/shaders/device_fault/glsl/render.frag.spv new file mode 100644 index 0000000000..0b85e325cb Binary files /dev/null and b/shaders/device_fault/glsl/render.frag.spv differ diff --git a/shaders/device_fault/glsl/render.vert b/shaders/device_fault/glsl/render.vert new file mode 100644 index 0000000000..e5f38adc68 --- /dev/null +++ b/shaders/device_fault/glsl/render.vert @@ -0,0 +1,83 @@ +/* Copyright (c) 2026, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require + +// Since we did not enable vertexPipelineStoresAndAtomics, we must mark everything readonly. +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // Represents an array of pointers, where each pointer points to its own VBO (Position). + // The size of a pointer (VkDeviceAddress) is always 8 in Vulkan. + Position buffers[]; +}; + +layout(push_constant) uniform Registers +{ + mat4 view_projection; + + // This is a pointer to an array of pointers, essentially: + // const VBO * const *vbos + PositionReferences references; +} registers; + +// Flat shading looks a little cooler here :) +layout(location = 0) flat out vec4 out_color; + +void main() +{ + int slice = gl_InstanceIndex; + + // One VBO per instance, load the VBO pointer. + // The cool thing here is that a compute shader could hypothetically + // write the pointer list where vertices are stored. + // With vertex attributes we do not have the luxury to modify VBO bindings on the GPU. + // The best we can do is to just modify the vertexOffset in an indirect draw call, + // but that's not always flexible enough, and enforces a very specific engine design to work. + // We can even modify the attribute layout per slice here, since we can just cast the pointer + // to something else if we want. + restrict Position positions = registers.references.buffers[slice]; + + // Load the vertex based on VertexIndex instead of an attribute. Fully flexible. + // Only downside is that we do not get format conversion for free like we do with normal vertex attributes. + vec2 pos = positions.positions[gl_VertexIndex] * 2.5; + + // Place the quad meshes on screen and center it. + pos += 3.0 * (vec2(slice % 8, slice / 8) - 3.5); + + // Normal projection. + gl_Position = registers.view_projection * vec4(pos, 0.0, 1.0); + + // Color the vertex. Use a combination of a wave and checkerboard, completely arbitrary. + int index_x = gl_VertexIndex % 16; + int index_y = gl_VertexIndex / 16; + + float r = 0.5 + 0.3 * sin(float(index_x)); + float g = 0.5 + 0.3 * sin(float(index_y)); + + int checkerboard = (index_x ^ index_y) & 1; + r *= float(checkerboard) * 0.8 + 0.2; + g *= float(checkerboard) * 0.8 + 0.2; + + out_color = vec4(r, g, 0.15, 1.0); +} diff --git a/shaders/device_fault/glsl/render.vert.spv b/shaders/device_fault/glsl/render.vert.spv new file mode 100644 index 0000000000..1943da836d Binary files /dev/null and b/shaders/device_fault/glsl/render.vert.spv differ diff --git a/shaders/device_fault/glsl/update_vbo.comp b/shaders/device_fault/glsl/update_vbo.comp new file mode 100644 index 0000000000..3da8c0be68 --- /dev/null +++ b/shaders/device_fault/glsl/update_vbo.comp @@ -0,0 +1,116 @@ +/* Copyright (c) 2026, Arm Limited and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +// Allows buffer_reference. +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require + +layout(local_size_x = 8, local_size_y = 8) in; + +// If we mark a buffer as buffer_reference, this is treated as a pointer type. +// A variable with the type Position is a 64-bit pointer to the data within. +// We can freely cast between pointer types if we wish, but that is not necessary in this sample. +// buffer_reference_align is used to let the underlying implementation know which alignment to expect. +// The pointer can have scalar alignment, which is something the compiler cannot know unless you tell it. +// It is best to use vector alignment when you can for optimal performance, but scalar alignment is sometimes useful. +// With SSBOs, the API has a minimum offset alignment which guarantees a minimum level of alignment from API side. + +// It is possible to forward reference a pointer, so you can contain a pointer to yourself inside a struct. +// Useful if you need something like a linked list on the GPU. +// Here it's not particularly useful, but something to know about. +layout(buffer_reference) buffer Position; + +layout(std430, buffer_reference, buffer_reference_align = 8) writeonly buffer Position +{ + vec2 positions[]; +}; + +layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences +{ + // This buffer contains an array of pointers to other buffers. + Position buffers[]; +}; + +// In push constant we place a pointer to VBO pointers, spicy! +// This way we don't need any descriptor sets, but there's nothing wrong with combining use of descriptor sets and buffer device addresses. +// It is mostly done for convenience here. +layout(push_constant) uniform Registers +{ + PositionReferences references; + // A buffer reference is 64-bit, so offset of fract_time is 8 bytes. + float fract_time; + bool trigger_device_fault_invalid_address; +} registers; + +void main() +{ + // Every slice is a 8x8 grid of vertices which we update here in compute. + uvec2 local_offset = gl_GlobalInvocationID.xy; + uint local_index = local_offset.y * gl_WorkGroupSize.x * gl_NumWorkGroups.x + local_offset.x; + uint slice = gl_WorkGroupID.z; + + restrict Position positions = registers.references.buffers[slice]; + + // This is a trivial wave-like function. Arbitrary for demonstration purposes. + const float TWO_PI = 3.1415628 * 2.0; + float offset = TWO_PI * fract(registers.fract_time + float(slice) * 0.1); + + // Simple grid. + vec2 pos = vec2(local_offset); + + // Wobble, wobble. + pos.x += 0.2 * sin(2.2 * pos.x + offset); + pos.y += 0.2 * sin(2.25 * pos.y + 2.0 * offset); + pos.x += 0.2 * cos(1.8 * pos.y + 3.0 * offset); + pos.y += 0.2 * cos(2.85 * pos.x + 4.0 * offset); + pos.x += 0.5 * sin(offset); + pos.y += 0.5 * sin(offset + 0.3); + + // Center the mesh in [-0.5, 0.5] range. + // Here we write to a raw pointer. + // Be aware, there is no robustness support for buffer_device_address since we don't have a complete descriptor! + positions.positions[local_index] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5; + + // --- VK_ERROR_DEVICE_LOST demo triggers ------------------------------------- + // Prereqs (host-side): + // • Enable the extension: VK_EXT_device_fault. + // • Enable its feature struct at device creation (so fault capture works). + // • If you want the OOB write to reliably fault, DISABLE robust buffer access + // (robust/robust2 can clamp OOB and prevent a device fault). + // + // What you’ll get (host-side) after a device loss: + // • vkGetDeviceFaultInfoEXT(...) often returns a human-readable description, + // plus optional address and vendor info. Exact wording is driver-specific. + // + + // ACCESSING AN INVALID ADDRESS (storage-buffer OOB write) + // What it does: + // Forces an out-of-bounds write into the positions SSBO by indexing far + // beyond the bound (local_index * 0xFFFFF). On drivers without robust + // buffer access, this typically touches unmapped VA and trips the GPU. + // + // What VK_EXT_device_fault usually reports: + // • Description mentioning "invalid address", "page fault", or "memory fault". + // • An address info entry (operation = LOAD/WRITE to a storage buffer address). + // • Sometimes a resource/binding hint and an instruction address. + // Sample below: + if (registers.trigger_device_fault_invalid_address) + { + positions.positions[1000000] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5; + } +} diff --git a/shaders/device_fault/glsl/update_vbo.comp.spv b/shaders/device_fault/glsl/update_vbo.comp.spv new file mode 100644 index 0000000000..2a1c99d215 Binary files /dev/null and b/shaders/device_fault/glsl/update_vbo.comp.spv differ