Skip to content

Commit 5d6b644

Browse files
committed
perf: eliminate runtime Vulkan sync stalls and per-frame GPU allocations
1 parent 62b8e1d commit 5d6b644

7 files changed

Lines changed: 532 additions & 753 deletions

File tree

src/client/FarHorizonsClient.zig

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -449,17 +449,6 @@ pub const FarHorizonsClient = struct {
449449

450450
if (self.entity_renderer) |*er| {
451451
if (self.ecs_world) |*ecs_world| {
452-
// Retire old entity buffers before updateFromECS creates new ones
453-
if (er.vertex_buffer != null) {
454-
self.render_system.retireBuffer(er.vertex_buffer, er.vertex_buffer_memory);
455-
er.vertex_buffer = null;
456-
er.vertex_buffer_memory = null;
457-
}
458-
if (er.index_buffer != null) {
459-
self.render_system.retireBuffer(er.index_buffer, er.index_buffer_memory);
460-
er.index_buffer = null;
461-
er.index_buffer_memory = null;
462-
}
463452
er.updateFromECS(ecs_world, partial_tick) catch |err| {
464453
logger.err("Failed to update entity meshes: {}", .{err});
465454
};

src/client/entity/EntityRenderer.zig

Lines changed: 127 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ const stb_image = @import("stb_image");
1717
pub const EntityRenderer = struct {
1818
const Self = @This();
1919
const logger = Logger.scoped(Self);
20+
// 3 slots = MAX_FRAMES_IN_FLIGHT + 1, ensures GPU is done with a slot before CPU overwrites it
21+
const ENTITY_BUFFER_SLOTS = 3;
22+
const INITIAL_VERTEX_CAPACITY = 4096;
23+
const INITIAL_INDEX_CAPACITY = 8192;
2024

2125
allocator: std.mem.Allocator,
2226
gpu_device: *GpuDevice,
@@ -26,11 +30,18 @@ pub const EntityRenderer = struct {
2630
cow_model: CowModel,
2731
baby_cow_model: BabyCowModel,
2832

29-
// GPU buffers for entity rendering
30-
vertex_buffer: vk.VkBuffer = null,
31-
vertex_buffer_memory: vk.VkDeviceMemory = null,
32-
index_buffer: vk.VkBuffer = null,
33-
index_buffer_memory: vk.VkDeviceMemory = null,
33+
// Triple-buffered persistently-mapped GPU buffers (one per slot, rotated each frame)
34+
vertex_buffers: [ENTITY_BUFFER_SLOTS]vk.VkBuffer = .{null} ** ENTITY_BUFFER_SLOTS,
35+
vertex_buffer_memories: [ENTITY_BUFFER_SLOTS]vk.VkDeviceMemory = .{null} ** ENTITY_BUFFER_SLOTS,
36+
vertex_buffer_mapped: [ENTITY_BUFFER_SLOTS]?[*]Vertex = .{null} ** ENTITY_BUFFER_SLOTS,
37+
vertex_buffer_capacity: [ENTITY_BUFFER_SLOTS]u32 = .{0} ** ENTITY_BUFFER_SLOTS,
38+
39+
index_buffers: [ENTITY_BUFFER_SLOTS]vk.VkBuffer = .{null} ** ENTITY_BUFFER_SLOTS,
40+
index_buffer_memories: [ENTITY_BUFFER_SLOTS]vk.VkDeviceMemory = .{null} ** ENTITY_BUFFER_SLOTS,
41+
index_buffer_mapped: [ENTITY_BUFFER_SLOTS]?[*]u32 = .{null} ** ENTITY_BUFFER_SLOTS,
42+
index_buffer_capacity: [ENTITY_BUFFER_SLOTS]u32 = .{0} ** ENTITY_BUFFER_SLOTS,
43+
44+
current_slot: u32 = ENTITY_BUFFER_SLOTS - 1,
3445

3546
// Adult cow texture (legacy non-bindless)
3647
texture_image: vk.VkImage = null,
@@ -114,21 +125,29 @@ pub const EntityRenderer = struct {
114125
}
115126

116127
fn destroyBuffers(self: *Self) void {
117-
if (self.vertex_buffer != null) {
118-
self.gpu_device.destroyBufferRaw(.{
119-
.handle = self.vertex_buffer,
120-
.memory = self.vertex_buffer_memory,
121-
});
122-
self.vertex_buffer = null;
123-
self.vertex_buffer_memory = null;
124-
}
125-
if (self.index_buffer != null) {
126-
self.gpu_device.destroyBufferRaw(.{
127-
.handle = self.index_buffer,
128-
.memory = self.index_buffer_memory,
129-
});
130-
self.index_buffer = null;
131-
self.index_buffer_memory = null;
128+
for (0..ENTITY_BUFFER_SLOTS) |i| {
129+
if (self.vertex_buffers[i] != null) {
130+
self.gpu_device.destroyMappedBufferRaw(.{
131+
.handle = self.vertex_buffers[i],
132+
.memory = self.vertex_buffer_memories[i],
133+
.mapped = if (self.vertex_buffer_mapped[i]) |p| @ptrCast(p) else null,
134+
});
135+
self.vertex_buffers[i] = null;
136+
self.vertex_buffer_memories[i] = null;
137+
self.vertex_buffer_mapped[i] = null;
138+
self.vertex_buffer_capacity[i] = 0;
139+
}
140+
if (self.index_buffers[i] != null) {
141+
self.gpu_device.destroyMappedBufferRaw(.{
142+
.handle = self.index_buffers[i],
143+
.memory = self.index_buffer_memories[i],
144+
.mapped = if (self.index_buffer_mapped[i]) |p| @ptrCast(p) else null,
145+
});
146+
self.index_buffers[i] = null;
147+
self.index_buffer_memories[i] = null;
148+
self.index_buffer_mapped[i] = null;
149+
self.index_buffer_capacity[i] = 0;
150+
}
132151
}
133152
}
134153

@@ -236,6 +255,9 @@ pub const EntityRenderer = struct {
236255
/// Update entity meshes from ECS World and upload to GPU
237256
/// ECS version - uses Transform, Animation, HeadRotation, RenderData, Health components
238257
pub fn updateFromECS(self: *Self, world: *ecs.World, partial_tick: f32) !void {
258+
// Advance to next buffer slot (triple-buffered to avoid write-before-fence-wait race)
259+
self.current_slot = (self.current_slot + 1) % ENTITY_BUFFER_SLOTS;
260+
239261
var all_vertices: std.ArrayList(Vertex) = .empty;
240262
defer all_vertices.deinit(self.allocator);
241263
var all_indices: std.ArrayList(u32) = .empty;
@@ -392,71 +414,123 @@ pub const EntityRenderer = struct {
392414
return;
393415
}
394416

395-
// Old buffers are retired by the caller (FarHorizonsClient) before calling updateFromECS
417+
const vert_count: u32 = @intCast(all_vertices.items.len);
418+
const idx_count: u32 = @intCast(all_indices.items.len);
419+
const slot = self.current_slot;
396420

397-
const vertex_result = try self.gpu_device.createBufferWithDataRaw(
398-
Vertex,
399-
all_vertices.items,
400-
vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
401-
);
402-
self.vertex_buffer = vertex_result.handle;
403-
self.vertex_buffer_memory = vertex_result.memory;
421+
// Grow persistent buffers if needed
422+
try self.ensureBufferCapacity(slot, vert_count, idx_count);
404423

405-
const index_result = try self.gpu_device.createBufferWithDataRaw(
406-
u32,
407-
all_indices.items,
408-
vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
409-
);
410-
self.index_buffer = index_result.handle;
411-
self.index_buffer_memory = index_result.memory;
424+
// Copy data into persistently-mapped buffers (HOST_COHERENT, no flush needed)
425+
const vert_dst = self.vertex_buffer_mapped[slot].?;
426+
@memcpy(vert_dst[0..vert_count], all_vertices.items);
427+
428+
const idx_dst = self.index_buffer_mapped[slot].?;
429+
@memcpy(idx_dst[0..idx_count], all_indices.items);
430+
431+
self.vertex_count = vert_count;
432+
self.index_count = idx_count;
433+
}
434+
435+
/// Ensure buffer slot has enough capacity, creating or growing if needed
436+
fn ensureBufferCapacity(self: *Self, slot: u32, vertex_count: u32, index_count: u32) !void {
437+
if (self.vertex_buffer_capacity[slot] < vertex_count) {
438+
// Destroy old buffer if it exists
439+
if (self.vertex_buffers[slot] != null) {
440+
self.gpu_device.destroyMappedBufferRaw(.{
441+
.handle = self.vertex_buffers[slot],
442+
.memory = self.vertex_buffer_memories[slot],
443+
.mapped = if (self.vertex_buffer_mapped[slot]) |p| @ptrCast(p) else null,
444+
});
445+
self.vertex_buffers[slot] = null;
446+
self.vertex_buffer_memories[slot] = null;
447+
self.vertex_buffer_mapped[slot] = null;
448+
self.vertex_buffer_capacity[slot] = 0;
449+
}
450+
451+
const new_capacity = @max(vertex_count, @max(INITIAL_VERTEX_CAPACITY, self.vertex_buffer_capacity[slot] * 2));
452+
const size: u64 = @sizeOf(Vertex) * @as(u64, new_capacity);
453+
const result = try self.gpu_device.createMappedBufferRaw(size, vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
454+
455+
self.vertex_buffers[slot] = result.handle;
456+
self.vertex_buffer_memories[slot] = result.memory;
457+
self.vertex_buffer_mapped[slot] = @ptrCast(@alignCast(result.mapped.?));
458+
self.vertex_buffer_capacity[slot] = new_capacity;
459+
}
460+
461+
if (self.index_buffer_capacity[slot] < index_count) {
462+
if (self.index_buffers[slot] != null) {
463+
self.gpu_device.destroyMappedBufferRaw(.{
464+
.handle = self.index_buffers[slot],
465+
.memory = self.index_buffer_memories[slot],
466+
.mapped = if (self.index_buffer_mapped[slot]) |p| @ptrCast(p) else null,
467+
});
468+
self.index_buffers[slot] = null;
469+
self.index_buffer_memories[slot] = null;
470+
self.index_buffer_mapped[slot] = null;
471+
self.index_buffer_capacity[slot] = 0;
472+
}
412473

413-
self.vertex_count = @intCast(all_vertices.items.len);
414-
self.index_count = @intCast(all_indices.items.len);
474+
const new_capacity = @max(index_count, @max(INITIAL_INDEX_CAPACITY, self.index_buffer_capacity[slot] * 2));
475+
const size: u64 = @sizeOf(u32) * @as(u64, new_capacity);
476+
const result = try self.gpu_device.createMappedBufferRaw(size, vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
477+
478+
self.index_buffers[slot] = result.handle;
479+
self.index_buffer_memories[slot] = result.memory;
480+
self.index_buffer_mapped[slot] = @ptrCast(@alignCast(result.mapped.?));
481+
self.index_buffer_capacity[slot] = new_capacity;
482+
}
415483
}
416484

417485
/// Record draw commands into command buffer (draws all entities with a single texture)
418486
/// DEPRECATED: Use recordAdultDrawCommands/recordBabyDrawCommands for proper texture separation
419487
pub fn recordDrawCommands(self: *const Self, command_buffer: vk.VkCommandBuffer) void {
420-
if (self.index_count == 0 or self.vertex_buffer == null) return;
488+
const vb = self.vertex_buffers[self.current_slot];
489+
const ib = self.index_buffers[self.current_slot];
490+
if (self.index_count == 0 or vb == null) return;
421491

422492
const vkCmdBindVertexBuffers = vk.vkCmdBindVertexBuffers orelse return;
423493
const vkCmdBindIndexBuffer = vk.vkCmdBindIndexBuffer orelse return;
424494
const vkCmdDrawIndexed = vk.vkCmdDrawIndexed orelse return;
425495

426-
const vertex_buffers = [_]vk.VkBuffer{self.vertex_buffer};
496+
const bufs = [_]vk.VkBuffer{vb};
427497
const offsets = [_]vk.VkDeviceSize{0};
428-
vkCmdBindVertexBuffers(command_buffer, 0, 1, &vertex_buffers, &offsets);
429-
vkCmdBindIndexBuffer(command_buffer, self.index_buffer, 0, vk.VK_INDEX_TYPE_UINT32);
498+
vkCmdBindVertexBuffers(command_buffer, 0, 1, &bufs, &offsets);
499+
vkCmdBindIndexBuffer(command_buffer, ib, 0, vk.VK_INDEX_TYPE_UINT32);
430500
vkCmdDrawIndexed(command_buffer, self.index_count, 1, 0, 0, 0);
431501
}
432502

433503
/// Record draw commands for adult cows only (use with adult cow texture)
434504
pub fn recordAdultDrawCommands(self: *const Self, command_buffer: vk.VkCommandBuffer) void {
435-
if (self.adult_index_count == 0 or self.vertex_buffer == null) return;
505+
const vb = self.vertex_buffers[self.current_slot];
506+
const ib = self.index_buffers[self.current_slot];
507+
if (self.adult_index_count == 0 or vb == null) return;
436508

437509
const vkCmdBindVertexBuffers = vk.vkCmdBindVertexBuffers orelse return;
438510
const vkCmdBindIndexBuffer = vk.vkCmdBindIndexBuffer orelse return;
439511
const vkCmdDrawIndexed = vk.vkCmdDrawIndexed orelse return;
440512

441-
const vertex_buffers = [_]vk.VkBuffer{self.vertex_buffer};
513+
const bufs = [_]vk.VkBuffer{vb};
442514
const offsets = [_]vk.VkDeviceSize{0};
443-
vkCmdBindVertexBuffers(command_buffer, 0, 1, &vertex_buffers, &offsets);
444-
vkCmdBindIndexBuffer(command_buffer, self.index_buffer, 0, vk.VK_INDEX_TYPE_UINT32);
515+
vkCmdBindVertexBuffers(command_buffer, 0, 1, &bufs, &offsets);
516+
vkCmdBindIndexBuffer(command_buffer, ib, 0, vk.VK_INDEX_TYPE_UINT32);
445517
vkCmdDrawIndexed(command_buffer, self.adult_index_count, 1, 0, 0, 0);
446518
}
447519

448520
/// Record draw commands for baby cows only (use with baby cow texture)
449521
pub fn recordBabyDrawCommands(self: *const Self, command_buffer: vk.VkCommandBuffer) void {
450-
if (self.baby_index_count == 0 or self.vertex_buffer == null) return;
522+
const vb = self.vertex_buffers[self.current_slot];
523+
const ib = self.index_buffers[self.current_slot];
524+
if (self.baby_index_count == 0 or vb == null) return;
451525

452526
const vkCmdBindVertexBuffers = vk.vkCmdBindVertexBuffers orelse return;
453527
const vkCmdBindIndexBuffer = vk.vkCmdBindIndexBuffer orelse return;
454528
const vkCmdDrawIndexed = vk.vkCmdDrawIndexed orelse return;
455529

456-
const vertex_buffers = [_]vk.VkBuffer{self.vertex_buffer};
530+
const bufs = [_]vk.VkBuffer{vb};
457531
const offsets = [_]vk.VkDeviceSize{0};
458-
vkCmdBindVertexBuffers(command_buffer, 0, 1, &vertex_buffers, &offsets);
459-
vkCmdBindIndexBuffer(command_buffer, self.index_buffer, 0, vk.VK_INDEX_TYPE_UINT32);
532+
vkCmdBindVertexBuffers(command_buffer, 0, 1, &bufs, &offsets);
533+
vkCmdBindIndexBuffer(command_buffer, ib, 0, vk.VK_INDEX_TYPE_UINT32);
460534
vkCmdDrawIndexed(command_buffer, self.baby_index_count, 1, self.baby_index_start, 0, 0);
461535
}
462536

@@ -472,14 +546,14 @@ pub const EntityRenderer = struct {
472546
return self.baby_index_count > 0;
473547
}
474548

475-
/// Get vertex buffer for external rendering
549+
/// Get vertex buffer for external rendering (returns current slot's buffer)
476550
pub fn getVertexBuffer(self: *const Self) ?vk.VkBuffer {
477-
return self.vertex_buffer;
551+
return self.vertex_buffers[self.current_slot];
478552
}
479553

480-
/// Get index buffer for external rendering
554+
/// Get index buffer for external rendering (returns current slot's buffer)
481555
pub fn getIndexBuffer(self: *const Self) ?vk.VkBuffer {
482-
return self.index_buffer;
556+
return self.index_buffers[self.current_slot];
483557
}
484558

485559
/// Get index count for external rendering (total)

0 commit comments

Comments
 (0)