Skip to content

Commit 3524a86

Browse files
committed
[path_tracing] fixed blurry surface, and implmeneted some missing paper aspects
1 parent 07f57fb commit 3524a86

17 files changed

Lines changed: 770 additions & 248 deletions

data/shaders/light_composition.hlsl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2424
#include "fog.hlsl"
2525
//====================
2626

27+
// must match restir_albedo_demodulator in restir_reservoir.hlsl, the half res restir shading
28+
// divides the gi by this and the bilateral upsample plus the multiply below re-applies the
29+
// full res value, recovering fine albedo detail that the upsample would otherwise blur away
30+
float3 restir_albedo_demodulator(float3 albedo)
31+
{
32+
return max(albedo, float3(0.04f, 0.04f, 0.04f));
33+
}
34+
2735
// edge-aware bilateral upsample of the half-res restir gi texture (tex6)
2836
// destination depth and normal come from the full-res g-buffer via Surface
2937
// source depth and normal are read at gi texel centers from the same g-buffer
@@ -141,16 +149,24 @@ void main_cs(uint3 thread_id : SV_DispatchThreadID)
141149
alpha = surface.alpha;
142150
distance_from_camera = surface.camera_to_pixel_length;
143151

144-
// restir_pt outputs pre-shaded gi (diffuse_brdf * cos * radiance * W),
145-
// so it bypasses the *albedo multiply below and is added directly
152+
// restir_pt outputs gi already demodulated by the half res primary albedo so the
153+
// bilateral upsample averages a smoother lighting only signal and we re-apply the
154+
// full res albedo here, this preserves fine material detail that would otherwise be
155+
// lost when the half res restir shading + upsample blurs the albedo into the gi
146156
// gi is at restir_pt_scale of render resolution, so use a join-bilateral
147157
// upsample (depth + normal aware) to avoid bleeding across edges
148158
// also multiply by surface.occlusion to recover contact shadows that
149159
// restir's spatial reuse and denoiser smear away at small scales
160+
// debug mode writes a heatmap into the gi slot, the remodulator is skipped there so
161+
// the viridis colors are not tinted by surface albedo
150162
if (is_restir_pt_enabled())
151163
{
152164
float depth_dst_lin = linearize_depth(surface.depth);
153165
light_gi = sample_gi_bilateral(surface.uv, depth_dst_lin, surface.normal);
166+
if (uint(buffer_frame.restir_pt_debug_mode) == 0u)
167+
{
168+
light_gi *= restir_albedo_demodulator(surface.albedo);
169+
}
154170
light_gi *= surface.occlusion;
155171
}
156172
}

data/shaders/light_image_based.hlsl

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,23 @@ void main_cs(uint3 thread_id : SV_DispatchThreadID)
9999
float3 diffuse_ibl = diffuse_skysphere * diffuse_occlusion * diffuse_energy * surface.albedo.rgb;
100100
float3 specular_ibl = specular_skysphere * specular_energy * specular_occlusion;
101101

102-
// when ray traced reflections are enabled, they handle specular
102+
// ray traced reflections owns specular indirect across the full roughness range when enabled
103103
if (is_ray_traced_reflections_enabled())
104104
{
105-
specular_ibl *= 0.0f; // fully handled by ray traced reflections
105+
specular_ibl *= 0.0f;
106106
}
107107

108-
// when restir path tracing is enabled, nearly disable ibl diffuse as restir fully replaces it
108+
// restir path tracing owns diffuse indirect always, and the specular indirect lobe for
109+
// moderate to high roughness surfaces (paper-faithful full brdf at the primary), zeroing
110+
// specular_ibl in that range avoids double counting when rt reflections is off, near mirror
111+
// surfaces still receive specular_ibl as the fallback when rt reflections is unavailable
109112
if (is_restir_pt_enabled())
110113
{
111-
diffuse_ibl *= 0.0f; // restir fully handles indirect diffuse
114+
diffuse_ibl *= 0.0f;
115+
if (surface.roughness >= 0.2f)
116+
{
117+
specular_ibl *= 0.0f;
118+
}
112119
}
113120

114121
// transparents take full ibl, fresnel inside the split sum already governs the reflection split

data/shaders/restir_pt.hlsl

Lines changed: 150 additions & 45 deletions
Large diffs are not rendered by default.

data/shaders/restir_pt_debug.hlsl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3232
// 1 = confidence (alpha of reservoir.tex4.w, mapped 0..1)
3333
// 2 = reservoir M (0..RESTIR_M_CAP)
3434
// 3 = reservoir W (log scaled since W has high dynamic range)
35-
// 4 = reuse ratio (placeholder, shows confidence for now)
36-
// 5 = temporal rejection (placeholder, shows 1 - confidence)
35+
// 4 = reuse ratio M/m_cap, saturation against the runtime cvar cap, identifies where temporal accumulation is healthy vs freshly reset
36+
// 5 = path length, the actual chosen sample's bounce count (1..max_path_length), highlights where paths are short and indirect contribution is missing
3737
// 6 = variance (alpha of the denoised gi, log scaled, the svgf per pixel luminance variance)
3838

3939
// viridis colormap, approximated via a small polynomial fit, returns a perceptually uniform color from a [0,1] input
@@ -93,18 +93,22 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
9393
visualization_t = saturate(log2(W + 1.0f) / 8.0f);
9494
break;
9595
}
96-
case 4: // reuse ratio placeholder, shows confidence (proxy for how much we have reused this reservoir)
96+
case 4: // reuse ratio M / runtime m_cap, hot pixels are saturated and benefit fully from temporal accumulation, cold pixels were reset by disocclusion / validation
9797
{
98-
uint age_conf = asuint(tex_reservoir_prev4[pixel].w);
99-
float confidence = saturate(f16tof32(age_conf >> 16u));
100-
visualization_t = confidence;
98+
float M = tex_reservoir_prev2[pixel].w;
99+
float m_cap = max(get_restir_m_cap(), 1.0f);
100+
visualization_t = saturate(M / m_cap);
101101
break;
102102
}
103-
case 5: // temporal rejection placeholder, shows 1 - confidence so freshly rejected pixels are hot
103+
case 5: // path length, decoded from the packed path_info word so we can see whether paths actually reach max bounces
104104
{
105-
uint age_conf = asuint(tex_reservoir_prev4[pixel].w);
106-
float confidence = saturate(f16tof32(age_conf >> 16u));
107-
visualization_t = saturate(1.0f - confidence);
105+
uint packed_info = asuint(tex_reservoir_prev2[pixel].y);
106+
uint path_length;
107+
uint rc_length;
108+
uint flags;
109+
unpack_path_info(packed_info, path_length, rc_length, flags);
110+
float max_path = max(float(get_restir_max_path_length()), 1.0f);
111+
visualization_t = saturate(float(path_length) / max_path);
108112
break;
109113
}
110114
case 6: // variance (alpha of the denoised gi, log scaled because per pixel variance spans several decades on disocclusion edges)

data/shaders/restir_pt_denoise_spatial.hlsl

Lines changed: 91 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,18 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323
#include "common.hlsl"
2424
//==============================
2525

26+
// svgf style a-trous filter (schied et al. 2017 spatiotemporal variance guided filter)
27+
// inputs:
28+
// tex rgba = (color, variance) from the temporal accumulation pass
29+
// tex3 rgba = (luma_M1, luma_M2, n_eff, _) moments texture
30+
// the per pixel variance estimate in tex.a drives the luma weight phi as phi_luma = sqrt(var),
31+
// instead of the previous hand tuned global luma_phi, so noisy regions are filtered more
32+
// aggressively while clean regions preserve high frequency detail
33+
// step_width comes from the pass push constant and doubles each à-trous level (1, 2, 4)
34+
2635
static const float3 luminance_weights = float3(0.299f, 0.587f, 0.114f);
2736

37+
// 3x3 gaussian weights matched to the a-trous wavelet kernel
2838
static const int2 spatial_offsets[9] =
2939
{
3040
int2(-1, -1), int2(0, -1), int2(1, -1),
@@ -34,11 +44,29 @@ static const int2 spatial_offsets[9] =
3444

3545
static const float spatial_kernel[9] =
3646
{
37-
1.0f, 2.0f, 1.0f,
38-
2.0f, 4.0f, 2.0f,
39-
1.0f, 2.0f, 1.0f
47+
1.0f / 16.0f, 2.0f / 16.0f, 1.0f / 16.0f,
48+
2.0f / 16.0f, 4.0f / 16.0f, 2.0f / 16.0f,
49+
1.0f / 16.0f, 2.0f / 16.0f, 1.0f / 16.0f
4050
};
4151

52+
// pre blurred variance, schied 2017 §4.2 applies a small gaussian blur over the per pixel
53+
// variance estimate before driving the a-trous luma weight, this prevents single pixel
54+
// fireflies from over-tightening the filter on their immediate neighborhood
55+
float gaussian_filtered_variance(int2 pixel, uint2 resolution)
56+
{
57+
float variance_sum = 0.0f;
58+
float weight_sum = 0.0f;
59+
[unroll]
60+
for (uint i = 0; i < 9; i++)
61+
{
62+
int2 sp = clamp(pixel + spatial_offsets[i], int2(0, 0), int2(resolution) - 1);
63+
float w = spatial_kernel[i];
64+
variance_sum += tex.Load(int3(sp, 0)).a * w;
65+
weight_sum += w;
66+
}
67+
return variance_sum / max(weight_sum, 1e-6f);
68+
}
69+
4270
[numthreads(THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1)]
4371
void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
4472
{
@@ -60,26 +88,37 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
6088

6189
float4 center_sample = tex.Load(int3(pixel, 0));
6290
float3 center_color = max(center_sample.rgb, 0.0f);
63-
float center_history = saturate(center_sample.a);
64-
// confidence is the high f16 of tex4.w, see reservoir packing layout
65-
uint center_age_conf = asuint(tex_reservoir_prev4[pixel].w);
66-
float reservoir_confidence = saturate(f16tof32(center_age_conf >> 16u));
67-
float center_confidence = saturate(center_history * 0.6f + reservoir_confidence * 0.4f);
91+
float center_variance = max(center_sample.a, 0.0f);
6892
float center_luma = dot(center_color, luminance_weights);
6993
float center_depth = linearize_depth(depth);
7094
float3 center_normal = get_normal(uv);
71-
float low_light_factor = saturate(1.0f - center_luma / 0.2f);
7295

73-
int step_width = max((int)pass_get_f3_value().x, 1);
74-
float depth_phi = max(center_depth * lerp(0.03f, 0.012f, center_confidence), 0.004f);
75-
float luma_phi = max((center_luma + 0.08f) * lerp(4.5f, 1.4f, center_confidence), 0.08f);
76-
float normal_power = lerp(16.0f, 48.0f, center_confidence);
77-
// tighten luma tolerance in dark regions so contact shadows don't get smeared
78-
luma_phi *= lerp(0.75f, 1.0f, 1.0f - low_light_factor);
96+
int step_width = max((int)pass_get_f3_value().x, 1);
97+
98+
// svgf phi parameters, schied 2017 typical values (their fig 4)
99+
// phi_luma = 4.0 (multiplied by sqrt(blurred variance) per pixel)
100+
// phi_depth = 1.0 (relative depth tolerance, scales with step_width and depth gradient)
101+
// phi_normal = 128 (cosine power)
102+
const float phi_luma = 4.0f;
103+
const float phi_depth = 1.0f;
104+
const float phi_normal = 128.0f;
79105

80-
float4 filtered_color = center_sample * spatial_kernel[4];
81-
float total_weight = spatial_kernel[4];
106+
float blurred_variance = gaussian_filtered_variance(int2(pixel), resolution);
107+
float luma_sigma = sqrt(max(blurred_variance, 1e-6f)) * phi_luma;
108+
luma_sigma = max(luma_sigma, 0.01f);
82109

110+
// per pixel screen space depth gradient, schied 2017 §4.5 estimates df/dz with finite
111+
// differences inside the kernel so the depth weight scales with the surface slope
112+
float depth_x = abs(linearize_depth(tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), uv + float2(1.0f / resolution.x, 0.0f), 0).r) - center_depth);
113+
float depth_y = abs(linearize_depth(tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), uv + float2(0.0f, 1.0f / resolution.y), 0).r) - center_depth);
114+
float depth_grad_max = max(max(depth_x, depth_y), 1e-3f);
115+
116+
float4 filtered_color = center_sample * spatial_kernel[4];
117+
float filtered_variance = center_variance * spatial_kernel[4] * spatial_kernel[4];
118+
float total_weight = spatial_kernel[4];
119+
float total_var_weight = spatial_kernel[4] * spatial_kernel[4];
120+
121+
[unroll]
83122
for (uint i = 0; i < 9; i++)
84123
{
85124
if (i == 4)
@@ -89,40 +128,53 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
89128
if (sample_pixel.x < 0 || sample_pixel.x >= (int)resolution.x || sample_pixel.y < 0 || sample_pixel.y >= (int)resolution.y)
90129
continue;
91130

92-
float2 sample_uv = (sample_pixel + 0.5f) / resolution;
93-
float sample_depth_raw = tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), sample_uv, 0).r;
131+
float2 sample_uv = (sample_pixel + 0.5f) / resolution;
132+
float sample_depth_raw = tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), sample_uv, 0).r;
94133
if (sample_depth_raw <= 0.0f)
95134
continue;
96135

97136
float sample_depth = linearize_depth(sample_depth_raw);
98137
float3 sample_normal = get_normal(sample_uv);
99-
float4 sample_history = tex.Load(int3(sample_pixel, 0));
100-
uint sample_age_conf = asuint(tex_reservoir_prev4[sample_pixel].w);
101-
float sample_reservoir_confidence = saturate(f16tof32(sample_age_conf >> 16u));
102-
float3 sample_color = max(sample_history.rgb, 0.0f);
138+
float4 sample_data = tex.Load(int3(sample_pixel, 0));
139+
float3 sample_color = max(sample_data.rgb, 0.0f);
140+
float sample_var = max(sample_data.a, 0.0f);
103141
float sample_luma = dot(sample_color, luminance_weights);
104142

105-
float spatial_weight = spatial_kernel[i];
106-
float depth_weight = exp(-abs(sample_depth - center_depth) / depth_phi);
107-
float normal_weight = pow(saturate(dot(center_normal, sample_normal)), normal_power);
108-
float luma_weight = exp(-abs(sample_luma - center_luma) / luma_phi);
109-
float history_confidence = max(saturate(sample_history.a), sample_reservoir_confidence);
110-
float history_weight = lerp(0.45f, 1.0f, history_confidence);
143+
// distance in pixels along the gradient direction so the depth tolerance scales with
144+
// the kernel reach (schied 2017 eq. 9), step_width drives the a-trous spread
145+
float pixel_dist = length(float2(spatial_offsets[i] * step_width));
146+
float depth_delta = abs(sample_depth - center_depth) / max(phi_depth * depth_grad_max * pixel_dist, 1e-3f);
147+
float depth_weight = exp(-depth_delta);
148+
149+
float normal_weight = pow(saturate(dot(center_normal, sample_normal)), phi_normal);
150+
151+
float luma_delta = abs(sample_luma - center_luma) / luma_sigma;
152+
float luma_weight = exp(-luma_delta);
111153

112-
float weight = spatial_weight * depth_weight * normal_weight * luma_weight * history_weight;
154+
float spatial_w = spatial_kernel[i];
155+
float weight = spatial_w * depth_weight * normal_weight * luma_weight;
113156
if (weight <= 0.0f)
114157
continue;
115158

116-
filtered_color += sample_history * weight;
117-
total_weight += weight;
159+
filtered_color += sample_data * weight;
160+
total_weight += weight;
161+
// variance combines as sum(w_i^2 * var_i) / (sum(w_i))^2 (schied 2017 eq. 10)
162+
filtered_variance += sample_var * weight * weight;
163+
total_var_weight += weight * weight;
118164
}
119165

120-
float4 filtered_sample = total_weight > 0.0f ? filtered_color / total_weight : center_sample;
121-
float filter_strength = lerp(1.0f, 0.35f, center_confidence);
122-
// gentler filtering in dark regions so shadow detail survives
123-
filter_strength = saturate(filter_strength - low_light_factor * 0.2f);
124-
float4 output_sample = lerp(center_sample, filtered_sample, filter_strength);
125-
output_sample.a = saturate(lerp(center_history, filtered_sample.a, filter_strength * 0.75f));
166+
if (total_weight > 0.0f)
167+
{
168+
filtered_color /= total_weight;
169+
filtered_variance /= max(total_weight * total_weight, 1e-6f);
170+
}
171+
else
172+
{
173+
filtered_color = center_sample;
174+
filtered_variance = center_variance;
175+
}
126176

127-
tex_uav[pixel] = validate_output(output_sample);
177+
// store color in rgb, variance in alpha so the next a-trous level has a fresh variance
178+
// estimate to drive its luma weight
179+
tex_uav[pixel] = validate_output(float4(filtered_color.rgb, max(filtered_variance, 0.0f)));
128180
}

0 commit comments

Comments
 (0)