diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 6cdd8b6a39779..7ee6c86256a4d 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -597,6 +597,20 @@ typedef struct SDL_GPUCopyPass SDL_GPUCopyPass; */ typedef struct SDL_GPUFence SDL_GPUFence; +/** + * An opaque handle representing a query pool. + * + * \since This struct is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + * \sa SDL_ReleaseGPUQueryPool + * \sa SDL_BeginGPUQuery + * \sa SDL_EndGPUQuery + * \sa SDL_DownloadGPUQueryResults + * \sa SDL_GetGPUTimestampFrequency + */ +typedef struct SDL_GPUQueryPool SDL_GPUQueryPool; + /** * Specifies the primitive topology of a graphics pipeline. * @@ -1370,6 +1384,20 @@ typedef enum SDL_GPUSwapchainComposition SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2084 } SDL_GPUSwapchainComposition; +/** + * Specifies a kind of GPU Query. + * + * \since This enum is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +typedef enum SDL_GPUQueryType +{ + SDL_GPU_QUERY_TIMESTAMP, + SDL_GPU_QUERY_BINARY_OCCLUSION, + SDL_GPU_QUERY_PRECISE_OCCLUSION +} SDL_GPUQueryType; + /* Structures */ /** @@ -1816,6 +1844,21 @@ typedef struct SDL_GPUTransferBufferCreateInfo SDL_PropertiesID props; /**< A properties ID for extensions. Should be 0 if no extensions are needed. */ } SDL_GPUTransferBufferCreateInfo; +/** + * A structure specifying the parameters of a query pool. + * + * \since This struct is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +typedef struct SDL_GPUQueryPoolCreateInfo +{ + SDL_GPUQueryType type; /**< The type of query intended to be used by the client. */ + Uint32 query_count; /**< The maximum number of queries in the pool. */ + + SDL_PropertiesID props; /**< A properties ID for extensions. Should be 0 if no extensions are needed. */ +} SDL_GPUQueryPoolCreateInfo; + /* Pipeline state structures */ /** @@ -4028,6 +4071,30 @@ extern SDL_DECLSPEC void SDLCALL SDL_DownloadFromGPUBuffer( const SDL_GPUBufferRegion *source, const SDL_GPUTransferBufferLocation *destination); +/** + * Copies results of a GPU query to a buffer. + * + * This data is not guaranteed to be copied until the command buffer fence is + * signaled. + * + * After this function is called, the data in the query pool is no longer valid, + * so don't call this function multiple times before performing another query. + * + * \param copy_pass a copy pass handle. + * \param pool a query pool handle. + * \param first_query starting index of the queries to copy. + * \param count the number of queries to copy. + * \param destination the destination buffer and offset. + * + * \since This struct is available since SDL 3.6.0. + */ +extern SDL_DECLSPEC void SDLCALL SDL_DownloadGPUQueryResults( + SDL_GPUCopyPass *copy_pass, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + SDL_GPUTransferBufferLocation *destination); + /** * Ends the current copy pass. * @@ -4492,6 +4559,94 @@ extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUFence( SDL_GPUDevice *device, SDL_GPUFence *fence); +/** + * Gets GPU timestamp frequency. + * + * Use this to compute wall clock times from timestamps. + * + * \param device a GPU context. + * \returns the number of nanoseconds required for a timestamp query to be incremented by 1. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +extern SDL_DECLSPEC float SDLCALL SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device); + +/** + * Creates a query pool object to be used in queries. + * + * \param device a GPU context. + * \param createinfo a struct describing the state of the pool to create. + * \returns a query pool object on success, or NULL on failure; call + * SDL_GetError() for more information. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_GetGPUTimestampFrequency + * \sa SDL_BeginGPUQuery + * \sa SDL_EndGPUQuery + * \sa SDL_DownloadGPUQueryResults + * \sa SDL_ReleaseGPUQueryPool + */ +extern SDL_DECLSPEC SDL_GPUQueryPool * SDLCALL SDL_CreateGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPoolCreateInfo *createinfo); + +/** + * Begins a query on a command buffer. + * + * For timestamp queries, this will produce a timestamp as soon as all previous commands are taken by the command queue. + * Note that this means for timestamp queries you should use a different index from the one you use in SDL_EndGPUQuery. + * + * \param command_buffer a command buffer. + * \param pool a query pool. + * \param index the index within the pool for the query. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_EndGPUQuery + */ +extern SDL_DECLSPEC void SDLCALL SDL_BeginGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index); + +/** + * Ends a query on a command buffer. + * + * For timestamp queries, this will produce a timestamp as soon as all previous commands are finished in the command queue. + * Note that this means for timestamp queries you should use a different index from the one you used in SDL_BeginGPUQuery. + * + * \param command_buffer a command buffer. + * \param pool a query pool. + * \param index the index within the pool for the query. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_BeginGPUQuery + */ +extern SDL_DECLSPEC void SDLCALL SDL_EndGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index); + +/** + * Frees the given query pool as soon as it is safe to do so. + * + * You must not reference the query pool after calling this function. + * + * \param device a GPU context. + * \param pool a query pool. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPool *pool); + /* Format Info */ /** diff --git a/src/dynapi/SDL_dynapi.exports b/src/dynapi/SDL_dynapi.exports index 9864557071c3e..87e4072ecd8ff 100644 --- a/src/dynapi/SDL_dynapi.exports +++ b/src/dynapi/SDL_dynapi.exports @@ -1290,3 +1290,9 @@ _SDL_LoadJPG _SDL_HasSVE2 _SDL_GamepadHasCapSense _SDL_GetGamepadCapSense +_SDL_DownloadGPUQueryResults +_SDL_GetGPUTimestampFrequency +_SDL_CreateGPUQueryPool +_SDL_BeginGPUQuery +_SDL_EndGPUQuery +_SDL_ReleaseGPUQueryPool diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index 3958a52aa60af..4871a32753526 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -1291,6 +1291,12 @@ SDL3_0.0.0 { SDL_HasSVE2; SDL_GamepadHasCapSense; SDL_GetGamepadCapSense; + SDL_DownloadGPUQueryResults; + SDL_GetGPUTimestampFrequency; + SDL_CreateGPUQueryPool; + SDL_BeginGPUQuery; + SDL_EndGPUQuery; + SDL_ReleaseGPUQueryPool; # extra symbols go here (don't modify this line) local: *; }; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index b54d32ae6dcf5..16435fbe87dbb 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -1317,3 +1317,9 @@ #define SDL_HasSVE2 SDL_HasSVE2_REAL #define SDL_GamepadHasCapSense SDL_GamepadHasCapSense_REAL #define SDL_GetGamepadCapSense SDL_GetGamepadCapSense_REAL +#define SDL_DownloadGPUQueryResults SDL_DownloadGPUQueryResults_REAL +#define SDL_GetGPUTimestampFrequency SDL_GetGPUTimestampFrequency_REAL +#define SDL_CreateGPUQueryPool SDL_CreateGPUQueryPool_REAL +#define SDL_BeginGPUQuery SDL_BeginGPUQuery_REAL +#define SDL_EndGPUQuery SDL_EndGPUQuery_REAL +#define SDL_ReleaseGPUQueryPool SDL_ReleaseGPUQueryPool_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 4f8ac0ba0cbb4..c099b31c77287 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1325,3 +1325,9 @@ SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return) SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return) SDL_DYNAPI_PROC(bool,SDL_GamepadHasCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) SDL_DYNAPI_PROC(bool,SDL_GetGamepadCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) +SDL_DYNAPI_PROC(void,SDL_DownloadGPUQueryResults,(SDL_GPUCopyPass *a,SDL_GPUQueryPool *b,Uint32 c,Uint32 d,SDL_GPUTransferBufferLocation *e),(a,b,c,d,e),) +SDL_DYNAPI_PROC(float,SDL_GetGPUTimestampFrequency,(SDL_GPUDevice *a),(a),return) +SDL_DYNAPI_PROC(SDL_GPUQueryPool*,SDL_CreateGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPoolCreateInfo *b),(a,b),return) +SDL_DYNAPI_PROC(void,SDL_BeginGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) +SDL_DYNAPI_PROC(void,SDL_EndGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) +SDL_DYNAPI_PROC(void,SDL_ReleaseGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPool *b),(a,b),) diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index 01e1eb5e9cfa7..fa57b49d333de 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -3020,6 +3020,36 @@ void SDL_DownloadFromGPUBuffer( destination); } +void SDL_DownloadGPUQueryResults( + SDL_GPUCopyPass *copy_pass, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + SDL_GPUTransferBufferLocation *destination) +{ + CHECK_PARAM(copy_pass == NULL) { + SDL_InvalidParamError("copy_pass"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + CHECK_PARAM(destination == NULL) { + SDL_InvalidParamError("destination"); + return; + } + + COPYPASS_DEVICE->DownloadQueryResults( + COPYPASS_COMMAND_BUFFER, + pool, + first_query, + count, + destination); +} + void SDL_EndGPUCopyPass( SDL_GPUCopyPass *copy_pass) { @@ -3511,6 +3541,86 @@ void SDL_ReleaseGPUFence( fence); } +float SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device) +{ + CHECK_DEVICE_MAGIC(device, 0); + + return device->GetTimestampFrequency(device->driverData); +} + +SDL_GPUQueryPool *SDL_CreateGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPoolCreateInfo *createinfo) +{ + CHECK_DEVICE_MAGIC(device, NULL); + + CHECK_PARAM(createinfo == NULL) { + SDL_InvalidParamError("createinfo"); + return NULL; + } + + return device->CreateQueryPool( + device->driverData, + createinfo); +} + +void SDL_BeginGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + CHECK_PARAM(command_buffer == NULL) { + SDL_InvalidParamError("command_buffer"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + COMMAND_BUFFER_DEVICE->BeginQuery( + command_buffer, + pool, + index); +} + +void SDL_EndGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + CHECK_PARAM(command_buffer == NULL) { + SDL_InvalidParamError("command_buffer"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + COMMAND_BUFFER_DEVICE->EndQuery( + command_buffer, + pool, + index); +} + +void SDL_ReleaseGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPool *pool) +{ + CHECK_DEVICE_MAGIC(device, ); + + CHECK_PARAM(pool == NULL) { + return; + } + + return device->ReleaseQueryPool( + device->driverData, + pool); +} + Uint32 SDL_CalculateGPUTextureFormatSize( SDL_GPUTextureFormat format, Uint32 width, diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index 604be78b5b9b2..d5deb16666c1e 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -995,6 +995,13 @@ struct SDL_GPUDevice Uint32 size, bool cycle); + void (*DownloadQueryResults)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + const SDL_GPUTransferBufferLocation *destination); + void (*GenerateMipmaps)( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUTexture *texture); @@ -1097,6 +1104,28 @@ struct SDL_GPUDevice SDL_GPURenderer *driverData, SDL_GPUFence *fence); + float (*GetTimestampFrequency)( + SDL_GPURenderer *device + ); + + SDL_GPUQueryPool *(*CreateQueryPool)( + SDL_GPURenderer *driverData, + SDL_GPUQueryPoolCreateInfo *createinfo); + + void (*BeginQuery)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index); + + void (*EndQuery)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index); + + void (*ReleaseQueryPool)( + SDL_GPURenderer *driverData, + SDL_GPUQueryPool *pool); + // Feature Queries bool (*SupportsTextureFormat)( @@ -1193,6 +1222,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(DownloadFromBuffer, name) \ ASSIGN_DRIVER_FUNC(CopyTextureToTexture, name) \ ASSIGN_DRIVER_FUNC(CopyBufferToBuffer, name) \ + ASSIGN_DRIVER_FUNC(DownloadQueryResults, name) \ ASSIGN_DRIVER_FUNC(GenerateMipmaps, name) \ ASSIGN_DRIVER_FUNC(EndCopyPass, name) \ ASSIGN_DRIVER_FUNC(Blit, name) \ @@ -1214,6 +1244,11 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(WaitForFences, name) \ ASSIGN_DRIVER_FUNC(QueryFence, name) \ ASSIGN_DRIVER_FUNC(ReleaseFence, name) \ + ASSIGN_DRIVER_FUNC(GetTimestampFrequency, name) \ + ASSIGN_DRIVER_FUNC(CreateQueryPool, name) \ + ASSIGN_DRIVER_FUNC(BeginQuery, name) \ + ASSIGN_DRIVER_FUNC(EndQuery, name) \ + ASSIGN_DRIVER_FUNC(ReleaseQueryPool, name) \ ASSIGN_DRIVER_FUNC(SupportsTextureFormat, name) \ ASSIGN_DRIVER_FUNC(SupportsSampleCount, name) diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index 3b9efe5c45be2..fb1f28fab59db 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -468,6 +468,12 @@ static VkSamplerAddressMode SDLToVK_SamplerAddressMode[] = { VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE }; +static VkQueryType SDLToVK_QueryType[] = { + VK_QUERY_TYPE_TIMESTAMP, + VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_TYPE_OCCLUSION +}; + // Structures typedef struct VulkanRenderer VulkanRenderer; @@ -967,6 +973,13 @@ typedef struct FramebufferHashTableKey Uint32 height; } FramebufferHashTableKey; +typedef struct VulkanQueryPool +{ + VkQueryPool pool; + SDL_GPUQueryType type; + SDL_AtomicInt referenceCount; +} VulkanQueryPool; + // Command structures typedef struct VulkanFencePool @@ -1110,6 +1123,10 @@ typedef struct VulkanCommandBuffer Sint32 usedComputePipelineCount; Sint32 usedComputePipelineCapacity; + VulkanQueryPool **usedQueryPools; + Sint32 usedQueryPoolCount; + Sint32 usedQueryPoolCapacity; + VulkanFramebuffer **usedFramebuffers; Sint32 usedFramebufferCount; Sint32 usedFramebufferCapacity; @@ -1251,6 +1268,10 @@ struct VulkanRenderer Uint32 shadersToDestroyCount; Uint32 shadersToDestroyCapacity; + VulkanQueryPool **queryPoolsToDestroy; + Uint32 queryPoolsToDestroyCount; + Uint32 queryPoolsToDestroyCapacity; + VulkanFramebuffer **framebuffersToDestroy; Uint32 framebuffersToDestroyCount; Uint32 framebuffersToDestroyCapacity; @@ -2558,6 +2579,19 @@ static void VULKAN_INTERNAL_TrackComputePipeline( computePipeline->referenceCount); } +static void VULKAN_INTERNAL_TrackQueryPool( + VulkanCommandBuffer *commandBuffer, + VulkanQueryPool *pool) +{ + TRACK_RESOURCE( + pool, + VulkanQueryPool *, + usedQueryPools, + usedQueryPoolCount, + usedQueryPoolCapacity, + pool->referenceCount); +} + static void VULKAN_INTERNAL_TrackFramebuffer( VulkanCommandBuffer *commandBuffer, VulkanFramebuffer *framebuffer) @@ -3246,6 +3280,7 @@ static void VULKAN_INTERNAL_DestroyCommandPool( SDL_free(commandBuffer->usedSamplers); SDL_free(commandBuffer->usedGraphicsPipelines); SDL_free(commandBuffer->usedComputePipelines); + SDL_free(commandBuffer->usedQueryPools); SDL_free(commandBuffer->usedFramebuffers); SDL_free(commandBuffer->usedUniformBuffers); @@ -3335,6 +3370,18 @@ static void VULKAN_INTERNAL_DestroySampler( SDL_free(vulkanSampler); } +static void VULKAN_INTERNAL_DestroyQueryPool( + VulkanRenderer *renderer, + VulkanQueryPool *vulkanQueryPool) +{ + renderer->vkDestroyQueryPool( + renderer->logicalDevice, + vulkanQueryPool->pool, + NULL); + + SDL_free(vulkanQueryPool); +} + static void VULKAN_INTERNAL_DestroySwapchainImage( VulkanRenderer *renderer, WindowData *windowData) @@ -5095,6 +5142,7 @@ static void VULKAN_DestroyDevice( SDL_free(renderer->computePipelinesToDestroy); SDL_free(renderer->shadersToDestroy); SDL_free(renderer->samplersToDestroy); + SDL_free(renderer->queryPoolsToDestroy); SDL_free(renderer->framebuffersToDestroy); SDL_free(renderer->allocationsToDefrag); @@ -7075,6 +7123,46 @@ static SDL_GPUTransferBuffer *VULKAN_CreateTransferBuffer( debugName); } +static float VULKAN_GetTimestampFrequency(SDL_GPURenderer *driverData) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + return renderer->physicalDeviceProperties.properties.limits.timestampPeriod; +} + +static SDL_GPUQueryPool *VULKAN_CreateQueryPool( + SDL_GPURenderer *driverData, + SDL_GPUQueryPoolCreateInfo *createinfo) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + VkQueryPoolCreateInfo vkQueryPoolCreateInfo; + VkResult result; + VulkanQueryPool *pool = SDL_malloc(sizeof(VulkanQueryPool)); + + vkQueryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + vkQueryPoolCreateInfo.pNext = NULL; + vkQueryPoolCreateInfo.flags = 0; + vkQueryPoolCreateInfo.pipelineStatistics = 0; + vkQueryPoolCreateInfo.queryCount = createinfo->query_count; + vkQueryPoolCreateInfo.queryType = SDLToVK_QueryType[createinfo->type]; + + result = renderer->vkCreateQueryPool( + renderer->logicalDevice, + &vkQueryPoolCreateInfo, + NULL, + &pool->pool + ); + + if (result != VK_SUCCESS) { + SDL_free(pool); + CHECK_VULKAN_ERROR_AND_RETURN(result, vkCreateQueryPool, NULL); + } + + SDL_SetAtomicInt(&pool->referenceCount, 0); + pool->type = createinfo->type; + + return (SDL_GPUQueryPool *)pool; +} + static void VULKAN_INTERNAL_ReleaseTexture( VulkanRenderer *renderer, VulkanTexture *vulkanTexture) @@ -7285,6 +7373,28 @@ static void VULKAN_ReleaseGraphicsPipeline( SDL_UnlockMutex(renderer->disposeLock); } +static void VULKAN_ReleaseQueryPool( + SDL_GPURenderer *driverData, + SDL_GPUQueryPool *pool) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + SDL_LockMutex(renderer->disposeLock); + + EXPAND_ARRAY_IF_NEEDED( + renderer->queryPoolsToDestroy, + VulkanQueryPool *, + renderer->queryPoolsToDestroyCount + 1, + renderer->queryPoolsToDestroyCapacity, + renderer->queryPoolsToDestroyCapacity * 2); + + renderer->queryPoolsToDestroy[renderer->queryPoolsToDestroyCount] = vulkanQueryPool; + renderer->queryPoolsToDestroyCount += 1; + + SDL_UnlockMutex(renderer->disposeLock); +} + // Command Buffer render state static VkRenderPass VULKAN_INTERNAL_FetchRenderPass( @@ -9258,6 +9368,38 @@ static void VULKAN_CopyBufferToBuffer( SDL_UnlockRWLock(renderer->defragLock); } +static void VULKAN_DownloadQueryResults( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 firstQuery, + Uint32 count, + const SDL_GPUTransferBufferLocation *destination) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + VulkanBufferContainer *dstContainer = (VulkanBufferContainer *)destination->transfer_buffer; + + SDL_LockRWLockForReading(renderer->defragLock); + + // Note that the transfer buffer does not need a barrier, as it is synced by the client + + renderer->vkCmdCopyQueryPoolResults( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + firstQuery, + count, + dstContainer->activeBuffer->buffer, + destination->offset, + 8, // Result for timing and occlusion is one 64-bit integer + VK_QUERY_RESULT_64_BIT); + + VULKAN_INTERNAL_TrackQueryPool(vulkanCommandBuffer, vulkanQueryPool); + VULKAN_INTERNAL_TrackBuffer(vulkanCommandBuffer, dstContainer->activeBuffer); + + SDL_UnlockRWLock(renderer->defragLock); +} + static void VULKAN_GenerateMipmaps( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUTexture *texture) @@ -9597,6 +9739,11 @@ static bool VULKAN_INTERNAL_AllocateCommandBuffer( commandBuffer->usedComputePipelines = SDL_malloc( commandBuffer->usedComputePipelineCapacity * sizeof(VulkanComputePipeline *)); + commandBuffer->usedQueryPoolCapacity = 4; + commandBuffer->usedQueryPoolCount = 0; + commandBuffer->usedQueryPools = SDL_malloc( + commandBuffer->usedQueryPoolCapacity * sizeof(VulkanQueryPool *)); + commandBuffer->usedFramebufferCapacity = 4; commandBuffer->usedFramebufferCount = 0; commandBuffer->usedFramebuffers = SDL_malloc( @@ -9864,6 +10011,65 @@ static void VULKAN_ReleaseFence( } } +static void VULKAN_BeginQuery( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + renderer->vkCmdResetQueryPool( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index, + 1); + + // Timestamp queries don't begin and end, we just need a distinction between + // a timestamp written when preceding commands are taken and when preceding commands are finished. + if (vulkanQueryPool->type == SDL_GPU_QUERY_TIMESTAMP) { + renderer->vkCmdWriteTimestamp( + vulkanCommandBuffer->commandBuffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vulkanQueryPool->pool, + index); + } + else { + renderer->vkCmdBeginQuery( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index, + vulkanQueryPool->type == SDL_GPU_QUERY_PRECISE_OCCLUSION ? VK_QUERY_CONTROL_PRECISE_BIT : 0); + } +} + +static void VULKAN_EndQuery( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + // Timestamp queries don't begin and end, we just need a distinction between + // a timestamp written when preceding commands are taken and when preceding commands are finished. + if (vulkanQueryPool->type == SDL_GPU_QUERY_TIMESTAMP) { + renderer->vkCmdWriteTimestamp( + vulkanCommandBuffer->commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vulkanQueryPool->pool, + index); + } + else { + renderer->vkCmdEndQuery( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index); + } +} + static WindowData *VULKAN_INTERNAL_FetchWindowData( SDL_Window *window) { @@ -10610,6 +10816,17 @@ static void VULKAN_INTERNAL_PerformPendingDestroys( } } + for (Sint32 i = renderer->queryPoolsToDestroyCount - 1; i >= 0; i -= 1){ + if (SDL_GetAtomicInt(&renderer->queryPoolsToDestroy[i]->referenceCount) == 0) { + VULKAN_INTERNAL_DestroyQueryPool( + renderer, + renderer->queryPoolsToDestroy[i]); + + renderer->queryPoolsToDestroy[i] = renderer->queryPoolsToDestroy[renderer->queryPoolsToDestroyCount - 1]; + renderer->queryPoolsToDestroyCount -= 1; + } + } + for (Sint32 i = renderer->framebuffersToDestroyCount - 1; i >= 0; i -= 1) { if (SDL_GetAtomicInt(&renderer->framebuffersToDestroy[i]->referenceCount) == 0) { VULKAN_INTERNAL_DestroyFramebuffer( @@ -10687,6 +10904,11 @@ static void VULKAN_INTERNAL_CleanCommandBuffer( } commandBuffer->usedComputePipelineCount = 0; + for (Sint32 i = 0; i < commandBuffer->usedQueryPoolCount; i += 1) { + (void)(SDL_AtomicDecRef(&commandBuffer->usedQueryPools[i]->referenceCount)); + } + commandBuffer->usedQueryPoolCount = 0; + for (Sint32 i = 0; i < commandBuffer->usedFramebufferCount; i += 1) { (void)SDL_AtomicDecRef(&commandBuffer->usedFramebuffers[i]->referenceCount); } @@ -13741,6 +13963,13 @@ static SDL_GPUDevice *VULKAN_CreateDevice(bool debugMode, bool preferLowPower, S sizeof(VulkanShader *) * renderer->shadersToDestroyCapacity); + renderer->queryPoolsToDestroyCapacity = 16; + renderer->queryPoolsToDestroyCount = 0; + + renderer->queryPoolsToDestroy = SDL_malloc( + sizeof(VulkanQueryPool *) * + renderer->queryPoolsToDestroyCapacity); + renderer->framebuffersToDestroyCapacity = 16; renderer->framebuffersToDestroyCount = 0; renderer->framebuffersToDestroy = SDL_malloc( diff --git a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h index a56beff11eff1..518523311f9d2 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h +++ b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h @@ -86,6 +86,7 @@ VULKAN_DEVICE_FUNCTION(vkBeginCommandBuffer) VULKAN_DEVICE_FUNCTION(vkBindBufferMemory) VULKAN_DEVICE_FUNCTION(vkBindImageMemory) VULKAN_DEVICE_FUNCTION(vkCmdBeginRenderPass) +VULKAN_DEVICE_FUNCTION(vkCmdBeginQuery) VULKAN_DEVICE_FUNCTION(vkCmdBindDescriptorSets) VULKAN_DEVICE_FUNCTION(vkCmdBindIndexBuffer) VULKAN_DEVICE_FUNCTION(vkCmdBindPipeline) @@ -98,6 +99,7 @@ VULKAN_DEVICE_FUNCTION(vkCmdCopyBuffer) VULKAN_DEVICE_FUNCTION(vkCmdCopyImage) VULKAN_DEVICE_FUNCTION(vkCmdCopyBufferToImage) VULKAN_DEVICE_FUNCTION(vkCmdCopyImageToBuffer) +VULKAN_DEVICE_FUNCTION(vkCmdCopyQueryPoolResults) VULKAN_DEVICE_FUNCTION(vkCmdDispatch) VULKAN_DEVICE_FUNCTION(vkCmdDispatchIndirect) VULKAN_DEVICE_FUNCTION(vkCmdDraw) @@ -105,13 +107,16 @@ VULKAN_DEVICE_FUNCTION(vkCmdDrawIndexed) VULKAN_DEVICE_FUNCTION(vkCmdDrawIndexedIndirect) VULKAN_DEVICE_FUNCTION(vkCmdDrawIndirect) VULKAN_DEVICE_FUNCTION(vkCmdEndRenderPass) +VULKAN_DEVICE_FUNCTION(vkCmdEndQuery) VULKAN_DEVICE_FUNCTION(vkCmdPipelineBarrier) +VULKAN_DEVICE_FUNCTION(vkCmdResetQueryPool) VULKAN_DEVICE_FUNCTION(vkCmdResolveImage) VULKAN_DEVICE_FUNCTION(vkCmdSetBlendConstants) VULKAN_DEVICE_FUNCTION(vkCmdSetDepthBias) VULKAN_DEVICE_FUNCTION(vkCmdSetScissor) VULKAN_DEVICE_FUNCTION(vkCmdSetStencilReference) VULKAN_DEVICE_FUNCTION(vkCmdSetViewport) +VULKAN_DEVICE_FUNCTION(vkCmdWriteTimestamp) VULKAN_DEVICE_FUNCTION(vkCreateBuffer) VULKAN_DEVICE_FUNCTION(vkCreateCommandPool) VULKAN_DEVICE_FUNCTION(vkCreateDescriptorPool) @@ -128,6 +133,7 @@ VULKAN_DEVICE_FUNCTION(vkCreateRenderPass) VULKAN_DEVICE_FUNCTION(vkCreateSampler) VULKAN_DEVICE_FUNCTION(vkCreateSemaphore) VULKAN_DEVICE_FUNCTION(vkCreateShaderModule) +VULKAN_DEVICE_FUNCTION(vkCreateQueryPool) VULKAN_DEVICE_FUNCTION(vkDestroyBuffer) VULKAN_DEVICE_FUNCTION(vkDestroyCommandPool) VULKAN_DEVICE_FUNCTION(vkDestroyDescriptorPool) @@ -144,6 +150,7 @@ VULKAN_DEVICE_FUNCTION(vkDestroyRenderPass) VULKAN_DEVICE_FUNCTION(vkDestroySampler) VULKAN_DEVICE_FUNCTION(vkDestroySemaphore) VULKAN_DEVICE_FUNCTION(vkDestroyShaderModule) +VULKAN_DEVICE_FUNCTION(vkDestroyQueryPool) VULKAN_DEVICE_FUNCTION(vkDeviceWaitIdle) VULKAN_DEVICE_FUNCTION(vkEndCommandBuffer) VULKAN_DEVICE_FUNCTION(vkFreeCommandBuffers)