Skip to content

Commit b9c385b

Browse files
authored
Solari: Improve ReSTIR DI performance (#23809)
Remove a couple of redundant loads from global memory to improve perf. Was like ~0.8ms -> ~0.5ms when testing solari's many lights on an RTX 5080.
1 parent 61127f6 commit b9c385b

1 file changed

Lines changed: 23 additions & 14 deletions

File tree

crates/bevy_solari/src/realtime/restir_di.wgsl

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ enable wgpu_ray_query;
1111
#import bevy_solari::brdf::{evaluate_diffuse_brdf, evaluate_specular_brdf}
1212
#import bevy_solari::gbuffer_utils::{gpixel_resolve, pixel_dissimilar, permute_pixel}
1313
#import bevy_solari::presample_light_tiles::unpack_resolved_light_sample
14-
#import bevy_solari::sampling::{LightSample, NULL_LIGHT_ID, calculate_resolved_light_contribution, resolve_and_calculate_light_contribution, resolve_light_sample, trace_light_visibility, balance_heuristic}
14+
#import bevy_solari::sampling::{LightSample, ResolvedLightSample, NULL_LIGHT_ID, calculate_resolved_light_contribution, resolve_and_calculate_light_contribution, resolve_light_sample, trace_light_visibility, balance_heuristic}
1515
#import bevy_solari::scene_bindings::{light_sources, previous_frame_light_id_translations, LIGHT_NOT_PRESENT_THIS_FRAME, RAY_T_MIN}
1616
#import bevy_solari::specular_gi::SPECULAR_GI_FOR_DI_ROUGHNESS_THRESHOLD
1717
#import bevy_solari::realtime_bindings::{view_output, light_tile_samples, light_tile_resolved_samples, di_reservoirs_a, di_reservoirs_b, gbuffer, depth_buffer, motion_vectors, previous_gbuffer, previous_depth_buffer, view, previous_view, constants, ResolvedLightSamplePacked}
@@ -70,8 +70,7 @@ fn spatial_and_shade(@builtin(global_invocation_id) global_id: vec3<u32>) {
7070
#endif
7171

7272
if reservoir_valid(combined_reservoir) {
73-
let resolved_light_sample = resolve_light_sample(combined_reservoir.sample, light_sources[combined_reservoir.sample.light_id >> 16u]);
74-
combined_reservoir.unbiased_contribution_weight *= trace_light_visibility(surface.world_position + (surface.world_normal * RAY_T_MIN), resolved_light_sample.world_position);
73+
combined_reservoir.unbiased_contribution_weight *= trace_light_visibility(surface.world_position + (surface.world_normal * RAY_T_MIN), merge_result.selected_light_world_position);
7574
}
7675

7776
// More stability, less accuracy (shadows extend further out than they should)
@@ -262,6 +261,7 @@ struct ReservoirMergeResult {
262261
merged_reservoir: Reservoir,
263262
selected_sample_radiance: vec3<f32>,
264263
wi: vec3<f32>,
264+
selected_light_world_position: vec4<f32>,
265265
}
266266

267267
fn merge_reservoirs(
@@ -275,13 +275,23 @@ fn merge_reservoirs(
275275
other_diffuse_brdf: vec3<f32>,
276276
rng: ptr<function, u32>,
277277
) -> ReservoirMergeResult {
278-
// Contributions for resampling
279-
let canonical_contribution_canonical_sample = reservoir_contribution(canonical_reservoir, canonical_world_position, canonical_world_normal, canonical_diffuse_brdf);
280-
let canonical_contribution_other_sample = reservoir_contribution(other_reservoir, canonical_world_position, canonical_world_normal, canonical_diffuse_brdf);
278+
// Resolve each light sample once, then evaluate at both positions
279+
var canonical_resolved: ResolvedLightSample;
280+
if reservoir_valid(canonical_reservoir) {
281+
canonical_resolved = resolve_light_sample(canonical_reservoir.sample, light_sources[canonical_reservoir.sample.light_id >> 16u]);
282+
}
283+
var other_resolved: ResolvedLightSample;
284+
if reservoir_valid(other_reservoir) {
285+
other_resolved = resolve_light_sample(other_reservoir.sample, light_sources[other_reservoir.sample.light_id >> 16u]);
286+
}
287+
288+
// Contributions for resampling (evaluate resolved samples at canonical position)
289+
let canonical_contribution_canonical_sample = resolved_reservoir_contribution(canonical_reservoir, canonical_resolved, canonical_world_position, canonical_world_normal, canonical_diffuse_brdf);
290+
let canonical_contribution_other_sample = resolved_reservoir_contribution(other_reservoir, other_resolved, canonical_world_position, canonical_world_normal, canonical_diffuse_brdf);
281291

282-
// Extra contributions for MIS
283-
let other_contribution_canonical_sample = reservoir_contribution(canonical_reservoir, other_world_position, other_world_normal, other_diffuse_brdf);
284-
let other_contribution_other_sample = reservoir_contribution(other_reservoir, other_world_position, other_world_normal, other_diffuse_brdf);
292+
// Extra contributions for MIS (evaluate resolved samples at other position)
293+
let other_contribution_canonical_sample = resolved_reservoir_contribution(canonical_reservoir, canonical_resolved, other_world_position, other_world_normal, other_diffuse_brdf);
294+
let other_contribution_other_sample = resolved_reservoir_contribution(other_reservoir, other_resolved, other_world_position, other_world_normal, other_diffuse_brdf);
285295

286296
// Resampling weight for canonical sample
287297
let canonical_sample_mis_weight = balance_heuristic(
@@ -308,14 +318,14 @@ fn merge_reservoirs(
308318
let inverse_target_function = select(0.0, 1.0 / canonical_contribution_other_sample.target_function, canonical_contribution_other_sample.target_function > 0.0);
309319
combined_reservoir.unbiased_contribution_weight = weight_sum * inverse_target_function;
310320

311-
return ReservoirMergeResult(combined_reservoir, canonical_contribution_other_sample.radiance, canonical_contribution_other_sample.wi);
321+
return ReservoirMergeResult(combined_reservoir, canonical_contribution_other_sample.radiance, canonical_contribution_other_sample.wi, other_resolved.world_position);
312322
} else {
313323
combined_reservoir.sample = canonical_reservoir.sample;
314324

315325
let inverse_target_function = select(0.0, 1.0 / canonical_contribution_canonical_sample.target_function, canonical_contribution_canonical_sample.target_function > 0.0);
316326
combined_reservoir.unbiased_contribution_weight = weight_sum * inverse_target_function;
317327

318-
return ReservoirMergeResult(combined_reservoir, canonical_contribution_canonical_sample.radiance, canonical_contribution_canonical_sample.wi);
328+
return ReservoirMergeResult(combined_reservoir, canonical_contribution_canonical_sample.radiance, canonical_contribution_canonical_sample.wi, canonical_resolved.world_position);
319329
}
320330
}
321331

@@ -325,10 +335,9 @@ struct ReservoirContribution {
325335
wi: vec3<f32>,
326336
}
327337

328-
// TODO: Have input take ResolvedLightSample instead of reservoir.light_sample
329-
fn reservoir_contribution(reservoir: Reservoir, world_position: vec3<f32>, world_normal: vec3<f32>, diffuse_brdf: vec3<f32>) -> ReservoirContribution {
338+
fn resolved_reservoir_contribution(reservoir: Reservoir, resolved: ResolvedLightSample, world_position: vec3<f32>, world_normal: vec3<f32>, diffuse_brdf: vec3<f32>) -> ReservoirContribution {
330339
if !reservoir_valid(reservoir) { return ReservoirContribution(vec3(0.0), 0.0, vec3(0.0)); }
331-
let light_contribution = resolve_and_calculate_light_contribution(reservoir.sample, world_position, world_normal);
340+
let light_contribution = calculate_resolved_light_contribution(resolved, world_position, world_normal);
332341
let target_function = luminance(light_contribution.radiance * diffuse_brdf * saturate(dot(light_contribution.wi, world_normal)));
333342
return ReservoirContribution(light_contribution.radiance, target_function, light_contribution.wi);
334343
}

0 commit comments

Comments
 (0)