Skip to content

Commit 80f274e

Browse files
committed
[Metal] Set execution indices
1 parent 566dcbd commit 80f274e

2 files changed

Lines changed: 129 additions & 11 deletions

File tree

backends/gpu/metal/includes/kore3/metal/device_structs.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,21 @@ extern "C" {
77

88
#define KORE_METAL_FRAME_COUNT 2
99

10+
#define KORE_METAL_EXECUTION_FENCE_COUNT 8
11+
12+
typedef struct kore_metal_execution_fence {
13+
void *command_buffers[KORE_METAL_EXECUTION_FENCE_COUNT];
14+
uint64_t commend_buffer_execution_indices[KORE_METAL_EXECUTION_FENCE_COUNT];
15+
16+
uint64_t next_execution_index;
17+
uint64_t completed_index;
18+
} kore_metal_execution_fence;
19+
1020
typedef struct kore_metal_device {
1121
void *device;
1222
void *library;
23+
24+
kore_metal_execution_fence execution_fence;
1325
} kore_metal_device;
1426

1527
typedef struct kore_metal_query_set {

backends/gpu/metal/sources/device.m

Lines changed: 117 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,125 @@
1010

1111
#include <assert.h>
1212

13+
static void create_execution_fence(kore_gpu_device *device) {
14+
kore_metal_execution_fence *execution_fence = &device->metal.execution_fence;
15+
16+
for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) {
17+
execution_fence->commend_buffer_execution_indices[fence_index] = 0;
18+
}
19+
20+
execution_fence->completed_index = 0;
21+
execution_fence->next_execution_index = 1;
22+
}
23+
24+
static uint64_t find_completed_execution(kore_gpu_device *device) {
25+
kore_metal_execution_fence *execution_fence = &device->metal.execution_fence;
26+
27+
for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) {
28+
if (execution_fence->commend_buffer_execution_indices[fence_index] != 0) {
29+
id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)execution_fence->command_buffers[fence_index];
30+
if ([command_buffer status] == MTLCommandBufferStatusCompleted) {
31+
if (execution_fence->commend_buffer_execution_indices[fence_index] > execution_fence->completed_index) {
32+
execution_fence->completed_index = execution_fence->commend_buffer_execution_indices[fence_index];
33+
}
34+
35+
CFRelease(execution_fence->command_buffers[fence_index]);
36+
execution_fence->command_buffers[fence_index] = NULL;
37+
38+
execution_fence->commend_buffer_execution_indices[fence_index] = 0;
39+
}
40+
}
41+
}
42+
43+
return execution_fence->completed_index;
44+
}
45+
46+
static void wait_for_execution(kore_gpu_device *device, uint64_t index) {
47+
kore_metal_execution_fence *execution_fence = &device->metal.execution_fence;
48+
49+
uint64_t completed = execution_fence->completed_index;
50+
51+
if (completed >= index) {
52+
return;
53+
}
54+
55+
completed = find_completed_execution(device);
56+
57+
if (completed >= index) {
58+
return;
59+
}
60+
61+
bool fence_found = false;
62+
63+
for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) {
64+
uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index];
65+
66+
if (value == index) {
67+
id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)execution_fence->command_buffers[fence_index];
68+
[command_buffer waitUntilCompleted];
69+
70+
CFRelease(execution_fence->command_buffers[fence_index]);
71+
execution_fence->command_buffers[fence_index] = NULL;
72+
73+
execution_fence->commend_buffer_execution_indices[fence_index] = 0;
74+
75+
execution_fence->completed_index = value;
76+
77+
fence_found = true;
78+
79+
break;
80+
}
81+
}
82+
83+
assert(fence_found);
84+
}
85+
86+
static void set_next_fence(kore_gpu_device *device, void *fence) {
87+
kore_metal_execution_fence *execution_fence = &device->metal.execution_fence;
88+
89+
for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) {
90+
uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index];
91+
92+
if (value == 0) {
93+
execution_fence->commend_buffer_execution_indices[fence_index] = execution_fence->next_execution_index;
94+
++execution_fence->next_execution_index;
95+
96+
execution_fence->command_buffers[fence_index] = fence;
97+
return;
98+
}
99+
}
100+
101+
uint64_t lowest_value = UINT64_MAX;
102+
void *lowest_fence = NULL;
103+
uint32_t lowest_fence_index = UINT32_MAX;
104+
105+
for (uint32_t fence_index = 0; fence_index < KORE_METAL_EXECUTION_FENCE_COUNT; ++fence_index) {
106+
uint64_t value = execution_fence->commend_buffer_execution_indices[fence_index];
107+
108+
if (value < lowest_value) {
109+
lowest_fence = execution_fence->command_buffers[fence_index];
110+
lowest_value = value;
111+
lowest_fence_index = fence_index;
112+
}
113+
}
114+
115+
id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)lowest_fence;
116+
[command_buffer waitUntilCompleted];
117+
118+
CFRelease(lowest_fence);
119+
execution_fence->command_buffers[lowest_fence_index] = fence;
120+
121+
execution_fence->commend_buffer_execution_indices[lowest_fence_index] = execution_fence->next_execution_index;
122+
++execution_fence->next_execution_index;
123+
}
124+
13125
void kore_metal_device_create(kore_gpu_device *device, const kore_gpu_device_wishlist *wishlist) {
14126
id<MTLDevice> metal_device = MTLCreateSystemDefaultDevice();
15127
getMetalLayer().device = metal_device;
16128
device->metal.device = (__bridge_retained void *)metal_device;
17129
device->metal.library = (__bridge_retained void *)[metal_device newDefaultLibrary];
130+
131+
create_execution_fence(device);
18132
}
19133

20134
void kore_metal_device_destroy(kore_gpu_device *device) {}
@@ -125,35 +239,27 @@ kore_gpu_texture_format kore_metal_device_framebuffer_format(kore_gpu_device *de
125239
return KORE_GPU_TEXTURE_FORMAT_BGRA8_UNORM;
126240
}
127241

128-
static void *last_running_command_buffer = NULL;
129-
130242
void kore_metal_device_execute_command_list(kore_gpu_device *device, kore_gpu_command_list *list) {
131243
kore_metal_command_list_end_compute_pass(list);
132244
kore_metal_command_list_end_blit_pass(list);
133245

134246
id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)list->metal.command_buffer;
135247
[command_buffer commit];
136248

137-
last_running_command_buffer = (__bridge_retained void *)command_buffer;
138-
139249
id<MTLCommandQueue> command_queue = (__bridge id<MTLCommandQueue>)list->metal.command_queue;
140250
command_buffer = [command_queue commandBuffer];
141251
list->metal.command_buffer = (__bridge_retained void *)[command_queue commandBuffer];
252+
set_next_fence(device, list->metal.command_buffer);
142253
}
143254

144255
void kore_metal_device_wait_until_idle(kore_gpu_device *device) {
145-
if (last_running_command_buffer != NULL) {
146-
id<MTLCommandBuffer> command_buffer = (__bridge id<MTLCommandBuffer>)last_running_command_buffer;
147-
[command_buffer waitUntilCompleted];
148-
last_running_command_buffer = NULL;
149-
}
256+
wait_for_execution(device, device->metal.execution_fence.next_execution_index - 1);
150257
}
151258

152259
void kore_metal_device_create_descriptor_set_buffer(kore_gpu_device *device, uint64_t encoded_length, kore_gpu_buffer *buffer) {
153260
id<MTLDevice> metal_device = (__bridge id<MTLDevice>)device->metal.device;
154261

155-
MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined;
156-
options |= MTLResourceStorageModeShared;
262+
MTLResourceOptions options = MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeShared;
157263
id<MTLBuffer> metal_buffer = [metal_device newBufferWithLength:encoded_length options:options];
158264
buffer->metal.buffer = (__bridge_retained void *)metal_buffer;
159265
}

0 commit comments

Comments
 (0)