Skip to content

Commit 035d8ae

Browse files
authored
Only calculate the world-space mesh center for an object when we know that object is transparent. (#23711)
PR #22041 made us use the center of the mesh AABB as the pivot point for transparent sorting. This is fine in and of itself, but the way it was implemented involved adding a matrix multiplication to `collect_meshes_for_gpu_building` for all meshes, not just transparent meshes. It also added several fields to `RenderMeshInstance` and related instances for each mesh, even opaque ones. Since most meshes are opaque, and `collect_meshes_for_gpu_building` is a performance-critical system, this doesn't strike me as the right tradeoff. This PR moves the calculation of the mesh center to `queue_material_meshes`, to take place only after a mesh has been deemed to be transparent. Not only does this make `collect_meshes_for_gpu_building` faster, but it also allows us to remove the various `center` fields, which stored redundant information. Note that this comes with two tradeoffs: 1. The transparent sorting no longer takes a custom `Aabb` component on the mesh into account. I doubt anybody was relying on this behavior. 2. We do have to calculate the AABB for the mesh when importing it to the render world for the first time. On `bevy_city --size 90 --no-cpu-culling`, this reduces the time spent in `collect_meshes_for_gpu_building` after the loading screen from mean 84.85 ms, median 73.4 ms to mean 70.62 ms, median 72.5 ms. Before this PR: <img width="2756" height="1800" alt="Screenshot 2026-04-07 173043" src="https://github.com/user-attachments/assets/3157b58c-b4f1-43db-8157-390e5c9c6ff0" /> After this PR: <img width="2756" height="1800" alt="Screenshot 2026-04-07 172952" src="https://github.com/user-attachments/assets/fda7100b-7695-4226-99e6-71b4c168f980" />
1 parent 6e9522c commit 035d8ae

6 files changed

Lines changed: 134 additions & 66 deletions

File tree

crates/bevy_math/src/affine3.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,28 @@
1-
use glam::{Affine3, Affine3A, Vec3Swizzles, Vec4};
1+
use glam::{vec3, Affine3, Affine3A, Mat3, Vec3Swizzles, Vec4, Vec4Swizzles};
22

33
/// Extension trait for [`Affine3`]
44
pub trait Affine3Ext {
5+
/// Generates an [`Affine3`] from a transposed 3x4 matrix.
6+
///
7+
/// This is the inverse of [`Self::to_transpose`].
8+
fn from_transpose(transposed: [Vec4; 3]) -> Self;
59
/// Calculates the transpose of the affine 4x3 matrix to a 3x4 and formats it for packing into GPU buffers
610
fn to_transpose(self) -> [Vec4; 3];
711
/// Calculates the inverse transpose of the 3x3 matrix and formats it for packing into GPU buffers
812
fn inverse_transpose_3x3(self) -> ([Vec4; 2], f32);
913
}
1014

1115
impl Affine3Ext for Affine3 {
16+
fn from_transpose(transposed: [Vec4; 3]) -> Self {
17+
let transpose_3x3 = Mat3::from_cols(
18+
transposed[0].xyz(),
19+
transposed[1].xyz(),
20+
transposed[2].xyz(),
21+
);
22+
let translation = vec3(transposed[0].w, transposed[1].w, transposed[2].w);
23+
Affine3::from_mat3_translation(transpose_3x3.transpose(), translation)
24+
}
25+
1226
#[inline]
1327
fn to_transpose(self) -> [Vec4; 3] {
1428
let transpose_3x3 = self.matrix3.transpose();

crates/bevy_pbr/src/material.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use bevy_material::{
2929
labels::{DrawFunctionLabel, InternedShaderLabel, ShaderLabel},
3030
MaterialProperties, OpaqueRendererMethod, RenderPhaseType,
3131
};
32+
use bevy_math::{Affine3, Affine3Ext as _};
3233
use bevy_mesh::{
3334
mark_3d_meshes_as_changed_if_their_assets_changed, Mesh3d, MeshVertexBufferLayoutRef,
3435
};
@@ -37,6 +38,7 @@ use bevy_platform::collections::{HashMap, HashSet};
3738
use bevy_platform::hash::FixedHasher;
3839
use bevy_reflect::std_traits::ReflectDefault;
3940
use bevy_reflect::Reflect;
41+
use bevy_render::batching::gpu_preprocessing::BatchedInstanceBuffers;
4042
use bevy_render::camera::{DirtySpecializationSystems, DirtySpecializations, PendingQueues};
4143
use bevy_render::erased_render_asset::{
4244
ErasedRenderAsset, ErasedRenderAssetPlugin, ErasedRenderAssets, PrepareAssetError,
@@ -1114,8 +1116,12 @@ pub fn queue_material_meshes(
11141116
render_materials: Res<ErasedRenderAssets<PreparedMaterial>>,
11151117
render_mesh_instances: Res<RenderMeshInstances>,
11161118
render_material_instances: Res<RenderMaterialInstances>,
1119+
mesh_assets: Res<RenderAssets<RenderMesh>>,
11171120
mesh_allocator: Res<MeshAllocator>,
11181121
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
1122+
maybe_batched_instance_buffers: Option<
1123+
Res<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>,
1124+
>,
11191125
mut opaque_render_phases: ResMut<ViewBinnedRenderPhases<Opaque3d>>,
11201126
mut alpha_mask_render_phases: ResMut<ViewBinnedRenderPhases<AlphaMask3d>>,
11211127
mut transmissive_render_phases: ResMut<ViewSortedRenderPhases<Transmissive3d>>,
@@ -1227,7 +1233,18 @@ pub fn queue_material_meshes(
12271233
};
12281234
transmissive_phase.add(Transmissive3d {
12291235
sorting_info: TransparentSortingInfo3d::Sorted {
1230-
mesh_center: mesh_instance.center,
1236+
mesh_center: get_mesh_instance_world_from_local(
1237+
*visible_entity,
1238+
mesh_instance.current_uniform_index,
1239+
&render_mesh_instances,
1240+
maybe_batched_instance_buffers.as_deref(),
1241+
)
1242+
.transform_point3(
1243+
mesh_assets
1244+
.get(mesh_instance.mesh_asset_id())
1245+
.unwrap()
1246+
.aabb_center,
1247+
),
12311248
depth_bias: material.properties.depth_bias,
12321249
},
12331250
entity: (Entity::PLACEHOLDER, *visible_entity),
@@ -1316,7 +1333,18 @@ pub fn queue_material_meshes(
13161333
};
13171334
transparent_phase.add(Transparent3d {
13181335
sorting_info: TransparentSortingInfo3d::Sorted {
1319-
mesh_center: mesh_instance.center,
1336+
mesh_center: get_mesh_instance_world_from_local(
1337+
*visible_entity,
1338+
mesh_instance.current_uniform_index,
1339+
&render_mesh_instances,
1340+
maybe_batched_instance_buffers.as_deref(),
1341+
)
1342+
.transform_point3(
1343+
mesh_assets
1344+
.get(mesh_instance.mesh_asset_id())
1345+
.unwrap()
1346+
.aabb_center,
1347+
),
13201348
depth_bias: material.properties.depth_bias,
13211349
},
13221350
entity: (Entity::PLACEHOLDER, *visible_entity),
@@ -1784,3 +1812,37 @@ pub fn write_material_bind_group_buffers(
17841812
allocator.write_buffers(&render_device, &render_queue);
17851813
}
17861814
}
1815+
1816+
/// Returns the world-from-local transform for the given mesh instance.
1817+
pub fn get_mesh_instance_world_from_local(
1818+
entity: MainEntity,
1819+
current_uniform_index: InputUniformIndex,
1820+
render_mesh_instances: &RenderMeshInstances,
1821+
maybe_batched_instance_buffers: Option<&BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>,
1822+
) -> Affine3 {
1823+
// The way we fetch the world-from-local transform depends on whether we're
1824+
// doing CPU or GPU preprocessing. If we're doing CPU preprocessing, we have
1825+
// the world-from-local transform handy in `RenderMeshInstancesCpu`.
1826+
// Otherwise, if we're doing GPU preprocessing, we need to pull the
1827+
// transform out of the `MeshInputUniform` GPU buffer.
1828+
match *render_mesh_instances {
1829+
RenderMeshInstances::CpuBuilding(ref render_mesh_instances_cpu) => {
1830+
let Some(render_mesh_instance) = render_mesh_instances_cpu.get(&entity) else {
1831+
return Affine3::IDENTITY;
1832+
};
1833+
render_mesh_instance.transforms.world_from_local
1834+
}
1835+
RenderMeshInstances::GpuBuilding(_) => {
1836+
let Some(batched_instance_buffers) = maybe_batched_instance_buffers else {
1837+
return Affine3::IDENTITY;
1838+
};
1839+
let Some(mesh_input_uniform) = batched_instance_buffers
1840+
.current_input_buffer
1841+
.get(current_uniform_index.0)
1842+
else {
1843+
return Affine3::IDENTITY;
1844+
};
1845+
Affine3::from_transpose(mesh_input_uniform.world_from_local)
1846+
}
1847+
}
1848+
}

crates/bevy_pbr/src/render/mesh.rs

Lines changed: 8 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -835,11 +835,7 @@ pub struct RenderMeshInstanceSharedFlat {
835835
asset_id: MeshAssetIdFlat,
836836
material_bindings_index: MaterialBindingId,
837837
lightmap_slab_index: LightmapSlabIndexFlat,
838-
// FIXME: Remove this. This should be a field on `Mesh`.
839-
model_space_center: Vec3,
840838
tag: u32,
841-
// FIXME: Remove this. This should be recomputed on demand, not cached here.
842-
world_space_center: Vec3,
843839
current_uniform_index: u32,
844840
flags: RenderMeshInstanceFlags,
845841
}
@@ -858,7 +854,6 @@ impl_atomic_pod!(
858854
lightmap_slab_index_flat,
859855
set_lightmap_slab_index_flat
860856
),
861-
field(model_space_center: Vec3, model_space_center, set_model_space_center),
862857
field(tag: u32, tag, set_tag),
863858
field(flags: RenderMeshInstanceFlags, flags, set_flags),
864859
);
@@ -896,15 +891,12 @@ impl RenderMeshInstanceSharedFlatBlob {
896891
#[derive(Clone, Copy, Default, Pod, Zeroable)]
897892
#[repr(C)]
898893
pub struct RenderMeshInstanceGpuFlat {
899-
// FIXME: Remove this. This should be recomputed on demand, not cached here.
900-
world_space_center: Vec3,
901894
current_uniform_index: u32,
902895
}
903896

904897
impl_atomic_pod!(
905898
RenderMeshInstanceGpuFlat,
906899
RenderMeshInstanceGpuFlatBlob,
907-
field(world_space_center: Vec3, world_space_center, set_world_space_center),
908900
field(current_uniform_index: u32, current_uniform_index, set_current_uniform_index),
909901
);
910902

@@ -1105,7 +1097,6 @@ impl RenderMeshInstanceSharedFlat {
11051097
not_shadow_caster: bool,
11061098
no_automatic_batching: bool,
11071099
no_cpu_culling: bool,
1108-
aabb: Option<&Aabb>,
11091100
) -> Self {
11101101
Self::new(
11111102
previous_transform,
@@ -1115,7 +1106,6 @@ impl RenderMeshInstanceSharedFlat {
11151106
not_shadow_caster,
11161107
no_automatic_batching,
11171108
no_cpu_culling,
1118-
aabb,
11191109
)
11201110
}
11211111

@@ -1127,7 +1117,6 @@ impl RenderMeshInstanceSharedFlat {
11271117
material_bindings_index: MaterialBindingId,
11281118
not_shadow_caster: bool,
11291119
no_automatic_batching: bool,
1130-
aabb: Option<&Aabb>,
11311120
) -> Self {
11321121
Self::new(
11331122
previous_transform,
@@ -1137,7 +1126,6 @@ impl RenderMeshInstanceSharedFlat {
11371126
not_shadow_caster,
11381127
no_automatic_batching,
11391128
false,
1140-
aabb,
11411129
)
11421130
}
11431131

@@ -1149,7 +1137,6 @@ impl RenderMeshInstanceSharedFlat {
11491137
not_shadow_caster: bool,
11501138
no_automatic_batching: bool,
11511139
no_cpu_culling: bool,
1152-
aabb: Option<&Aabb>,
11531140
) -> Self {
11541141
let mut mesh_instance_flags = RenderMeshInstanceFlags::empty();
11551142
mesh_instance_flags.set(RenderMeshInstanceFlags::SHADOW_CASTER, !not_shadow_caster);
@@ -1166,14 +1153,11 @@ impl RenderMeshInstanceSharedFlat {
11661153
RenderMeshInstanceSharedFlat {
11671154
asset_id: mesh.id().into(),
11681155
material_bindings_index,
1169-
model_space_center: aabb.map_or(Vec3::ZERO, |aabb| aabb.center.into()),
11701156
tag: tag.map_or(0, |i| **i),
11711157
flags: mesh_instance_flags,
11721158
// Filled in later.
11731159
lightmap_slab_index: LightmapSlabIndexFlat::default(),
11741160
// Filled in later.
1175-
world_space_center: Vec3::ZERO,
1176-
// Filled in later.
11771161
current_uniform_index: 0,
11781162
}
11791163
}
@@ -1254,20 +1238,12 @@ impl RenderMeshInstancesCpu {
12541238
}
12551239

12561240
fn render_mesh_queue_data(&self, entity: MainEntity) -> Option<RenderMeshQueueData<'_>> {
1257-
self.get(&entity).map(|render_mesh_instance| {
1258-
let world_from_local = &render_mesh_instance.transforms.world_from_local;
1259-
let center = world_from_local
1260-
.matrix3
1261-
.mul_vec3(render_mesh_instance.shared.model_space_center())
1262-
+ world_from_local.translation;
1263-
1264-
RenderMeshQueueData {
1241+
self.get(&entity)
1242+
.map(|render_mesh_instance| RenderMeshQueueData {
12651243
shared: &render_mesh_instance.shared,
12661244
render_layers: render_mesh_instance.render_layers.clone(),
1267-
center,
12681245
current_uniform_index: InputUniformIndex::default(),
1269-
}
1270-
})
1246+
})
12711247
}
12721248

12731249
/// Inserts the given flags into the render mesh instance data for the given
@@ -1290,7 +1266,6 @@ impl RenderMeshInstancesGpu {
12901266
.map(|render_mesh_instance| RenderMeshQueueData {
12911267
shared: &render_mesh_instance.shared,
12921268
render_layers: render_mesh_instance.render_layers.clone(),
1293-
center: render_mesh_instance.gpu_specific.world_space_center(),
12941269
current_uniform_index: InputUniformIndex(
12951270
render_mesh_instance.gpu_specific.current_uniform_index(),
12961271
),
@@ -1528,16 +1503,9 @@ impl RenderMeshInstanceGpuBuilder {
15281503
morph_descriptor_index,
15291504
};
15301505

1531-
let world_from_local = &self.world_from_local;
1532-
let center = world_from_local
1533-
.matrix3
1534-
.mul_vec3(self.shared.model_space_center)
1535-
+ world_from_local.translation;
1536-
15371506
Some(RenderMeshInstanceGpuPrepared {
15381507
shared: self.shared,
15391508
mesh_input_uniform,
1540-
center,
15411509
render_layers: self.render_layers,
15421510
})
15431511
}
@@ -1549,8 +1517,6 @@ pub struct RenderMeshInstanceGpuPrepared {
15491517
shared: RenderMeshInstanceSharedFlat,
15501518
/// The data that will be uploaded to the GPU as a [`MeshInputUniform`].
15511519
mesh_input_uniform: MeshInputUniform,
1552-
/// The world-space center of the mesh instance, used for culling and sorting.
1553-
center: Vec3,
15541520
/// The render layers that this mesh instance belongs to.
15551521
render_layers: Option<RenderLayers>,
15561522
}
@@ -1590,9 +1556,6 @@ impl RenderMeshInstanceGpuPrepared {
15901556
// Write the instance.
15911557
let existing_instance = occupied_entry.get_mut();
15921558
self.shared.write_to_blob(&existing_instance.shared);
1593-
existing_instance
1594-
.gpu_specific
1595-
.set_world_space_center(self.center);
15961559
existing_instance
15971560
.gpu_specific
15981561
.set_current_uniform_index(current_uniform_index);
@@ -1610,7 +1573,6 @@ impl RenderMeshInstanceGpuPrepared {
16101573
});
16111574
self.shared.write_to_blob(&new_instance.shared);
16121575
RenderMeshInstanceGpuFlat {
1613-
world_space_center: self.center,
16141576
current_uniform_index,
16151577
}
16161578
.write_to_blob(&new_instance.gpu_specific);
@@ -1690,11 +1652,6 @@ pub struct RenderMeshQueueData<'a> {
16901652
pub shared: &'a RenderMeshInstanceSharedFlatBlob,
16911653
/// The render layers that this mesh instance belongs to.
16921654
pub render_layers: Option<RenderLayers>,
1693-
/// The representative position of the mesh instance in world-space.
1694-
///
1695-
/// This world-space center is used as a spatial proxy for view-dependent
1696-
/// operations such as distance computation and render-order sorting.
1697-
pub center: Vec3,
16981655
/// The index of the [`MeshInputUniform`] in the GPU buffer for this mesh
16991656
/// instance.
17001657
pub current_uniform_index: InputUniformIndex,
@@ -1815,7 +1772,6 @@ pub fn extract_meshes_for_cpu_building(
18151772
Has<NoAutomaticBatching>,
18161773
Option<&VisibilityRange>,
18171774
Option<&RenderLayers>,
1818-
Option<&Aabb>,
18191775
)>,
18201776
>,
18211777
) {
@@ -1836,7 +1792,6 @@ pub fn extract_meshes_for_cpu_building(
18361792
no_automatic_batching,
18371793
visibility_range,
18381794
render_layers,
1839-
aabb,
18401795
)| {
18411796
if !view_visibility.get() {
18421797
return;
@@ -1870,19 +1825,19 @@ pub fn extract_meshes_for_cpu_building(
18701825
material_bindings_index,
18711826
not_shadow_caster,
18721827
no_automatic_batching,
1873-
aabb,
18741828
);
18751829

18761830
let world_from_local = transform.affine();
1831+
let previous_world_from_local = previous_transform
1832+
.map(|previous_transform| previous_transform.0)
1833+
.unwrap_or(world_from_local);
1834+
18771835
queue.push((
18781836
entity,
18791837
RenderMeshInstanceCpu {
18801838
transforms: MeshTransforms {
18811839
world_from_local: world_from_local.into(),
1882-
previous_world_from_local: (previous_transform
1883-
.map(|t| t.0)
1884-
.unwrap_or(world_from_local))
1885-
.into(),
1840+
previous_world_from_local: previous_world_from_local.into(),
18861841
flags: mesh_flags.bits(),
18871842
},
18881843
shared: (&shared).into(),
@@ -2178,7 +2133,6 @@ fn extract_mesh_for_gpu_building(
21782133
not_shadow_caster,
21792134
no_automatic_batching,
21802135
no_cpu_culling,
2181-
aabb,
21822136
);
21832137

21842138
// Calculate the lightmap UV rect, if applicable.
@@ -2626,9 +2580,6 @@ pub fn collect_meshes_for_gpu_building(
26262580
// only fields that changed are POD fields.
26272581

26282582
prepared.shared.write_to_blob(&render_mesh_instance.shared);
2629-
render_mesh_instance
2630-
.gpu_specific
2631-
.set_world_space_center(prepared.center);
26322583

26332584
let current_uniform_index =
26342585
render_mesh_instance.gpu_specific.current_uniform_index();

0 commit comments

Comments
 (0)