|
10 | 10 |
|
11 | 11 | #include <assert.h> |
12 | 12 |
|
| 13 | +static void create_execution_fence(kore_gpu_device *device) { |
| 14 | + kore_metal_execution_fence *execution_fence = &device->metal.execution_fence; |
| 15 | + |
| 16 | + for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) { |
| 17 | + execution_fence->commend_buffer_execution_indices[fence_index] = 0; |
| 18 | + } |
| 19 | + |
| 20 | + execution_fence->completed_index = 0; |
| 21 | + execution_fence->next_execution_index = 1; |
| 22 | +} |
| 23 | + |
| 24 | +static uint64_t find_completed_execution(kore_gpu_device *device) { |
| 25 | + kore_metal_execution_fence *execution_fence = &device->metal.execution_fence; |
| 26 | + |
| 27 | + for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) { |
| 28 | + if (execution_fence->commend_buffer_execution_indices[fence_index] != 0) { |
| 29 | + id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)execution_fence->command_buffers[fence_index]; |
| 30 | + if ([command_buffer status] == MTLCommandBufferStatusCompleted) { |
| 31 | + if (execution_fence->commend_buffer_execution_indices[fence_index] > execution_fence->completed_index) { |
| 32 | + execution_fence->completed_index = execution_fence->commend_buffer_execution_indices[fence_index]; |
| 33 | + } |
| 34 | + |
| 35 | + CFRelease(execution_fence->command_buffers[fence_index]); |
| 36 | + execution_fence->command_buffers[fence_index] = NULL; |
| 37 | + |
| 38 | + execution_fence->commend_buffer_execution_indices[fence_index] = 0; |
| 39 | + } |
| 40 | + } |
| 41 | + } |
| 42 | + |
| 43 | + return execution_fence->completed_index; |
| 44 | +} |
| 45 | + |
| 46 | +static void wait_for_execution(kore_gpu_device *device, uint64_t index) { |
| 47 | + kore_metal_execution_fence *execution_fence = &device->metal.execution_fence; |
| 48 | + |
| 49 | + uint64_t completed = execution_fence->completed_index; |
| 50 | + |
| 51 | + if (completed >= index) { |
| 52 | + return; |
| 53 | + } |
| 54 | + |
| 55 | + completed = find_completed_execution(device); |
| 56 | + |
| 57 | + if (completed >= index) { |
| 58 | + return; |
| 59 | + } |
| 60 | + |
| 61 | + bool fence_found = false; |
| 62 | + |
| 63 | + for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) { |
| 64 | + uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index]; |
| 65 | + |
| 66 | + if (value == index) { |
| 67 | + id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)execution_fence->command_buffers[fence_index]; |
| 68 | + [command_buffer waitUntilCompleted]; |
| 69 | + |
| 70 | + CFRelease(execution_fence->command_buffers[fence_index]); |
| 71 | + execution_fence->command_buffers[fence_index] = NULL; |
| 72 | + |
| 73 | + execution_fence->commend_buffer_execution_indices[fence_index] = 0; |
| 74 | + |
| 75 | + execution_fence->completed_index = value; |
| 76 | + |
| 77 | + fence_found = true; |
| 78 | + |
| 79 | + break; |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + assert(fence_found); |
| 84 | +} |
| 85 | + |
| 86 | +static void set_next_fence(kore_gpu_device *device, void *fence) { |
| 87 | + kore_metal_execution_fence *execution_fence = &device->metal.execution_fence; |
| 88 | + |
| 89 | + for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) { |
| 90 | + uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index]; |
| 91 | + |
| 92 | + if (value == 0) { |
| 93 | + execution_fence->commend_buffer_execution_indices[fence_index] = execution_fence->next_execution_index; |
| 94 | + ++execution_fence->next_execution_index; |
| 95 | + |
| 96 | + execution_fence->command_buffers[fence_index] = fence; |
| 97 | + return; |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + uint64_t lowest_value = UINT64_MAX; |
| 102 | + void *lowest_fence = NULL; |
| 103 | + uint32_t lowest_fence_index = UINT32_MAX; |
| 104 | + |
| 105 | + for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) { |
| 106 | + uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index]; |
| 107 | + |
| 108 | + if (value < lowest_value) { |
| 109 | + lowest_fence = execution_fence->command_buffers[fence_index]; |
| 110 | + lowest_value = value; |
| 111 | + lowest_fence_index = fence_index; |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)lowest_fence; |
| 116 | + [command_buffer waitUntilCompleted]; |
| 117 | + |
| 118 | + CFRelease(lowest_fence); |
| 119 | + execution_fence->command_buffers[lowest_fence_index] = fence; |
| 120 | + |
| 121 | + execution_fence->commend_buffer_execution_indices[lowest_fence_index] = execution_fence->next_execution_index; |
| 122 | + ++execution_fence->next_execution_index; |
| 123 | +} |
| 124 | + |
13 | 125 | void kore_metal_device_create(kore_gpu_device *device, const kore_gpu_device_wishlist *wishlist) { |
14 | 126 | id<MTLDevice> metal_device = MTLCreateSystemDefaultDevice(); |
15 | 127 | getMetalLayer().device = metal_device; |
16 | 128 | device->metal.device = (__bridge_retained void *)metal_device; |
17 | 129 | device->metal.library = (__bridge_retained void *)[metal_device newDefaultLibrary]; |
| 130 | + |
| 131 | + create_execution_fence(device); |
18 | 132 | } |
19 | 133 |
|
20 | 134 | void kore_metal_device_destroy(kore_gpu_device *device) {} |
@@ -125,35 +239,27 @@ kore_gpu_texture_format kore_metal_device_framebuffer_format(kore_gpu_device *de |
125 | 239 | return KORE_GPU_TEXTURE_FORMAT_BGRA8_UNORM; |
126 | 240 | } |
127 | 241 |
|
128 | | -static void *last_running_command_buffer = NULL; |
129 | | - |
130 | 242 | void kore_metal_device_execute_command_list(kore_gpu_device *device, kore_gpu_command_list *list) { |
131 | 243 | kore_metal_command_list_end_compute_pass(list); |
132 | 244 | kore_metal_command_list_end_blit_pass(list); |
133 | 245 |
|
134 | 246 | id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)list->metal.command_buffer; |
135 | 247 | [command_buffer commit]; |
136 | 248 |
|
137 | | - last_running_command_buffer = (__bridge_retained void *)command_buffer; |
138 | | - |
139 | 249 | id<MTLCommandQueue> command_queue = (__bridge id<MTLCommandQueue>)list->metal.command_queue; |
140 | 250 | command_buffer = [command_queue commandBuffer]; |
141 | 251 | list->metal.command_buffer = (__bridge_retained void *)[command_queue commandBuffer]; |
| 252 | + set_next_fence(device, list->metal.command_buffer); |
142 | 253 | } |
143 | 254 |
|
144 | 255 | void kore_metal_device_wait_until_idle(kore_gpu_device *device) { |
145 | | - if (last_running_command_buffer != NULL) { |
146 | | - id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)last_running_command_buffer; |
147 | | - [command_buffer waitUntilCompleted]; |
148 | | - last_running_command_buffer = NULL; |
149 | | - } |
| 256 | + wait_for_execution(device, device->metal.execution_fence.next_execution_index - 1); |
150 | 257 | } |
151 | 258 |
|
152 | 259 | void kore_metal_device_create_descriptor_set_buffer(kore_gpu_device *device, uint64_t encoded_length, kore_gpu_buffer *buffer) { |
153 | 260 | id<MTLDevice> metal_device = (__bridge id<MTLDevice>)device->metal.device; |
154 | 261 |
|
155 | | - MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined; |
156 | | - options |= MTLResourceStorageModeShared; |
| 262 | + MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeShared; |
157 | 263 | id<MTLBuffer> metal_buffer = [metal_device newBufferWithLength:encoded_length options:options]; |
158 | 264 | buffer->metal.buffer = (__bridge_retained void *)metal_buffer; |
159 | 265 | } |
|
0 commit comments