PanosK92
diff --git a/‎data/shaders/light_composition.hlsl‎
Lines changed: 18 additions & 2 deletions b/‎data/shaders/light_composition.hlsl‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎data/shaders/light_image_based.hlsl‎
Lines changed: 11 additions & 4 deletions b/‎data/shaders/light_image_based.hlsl‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎data/shaders/restir_pt.hlsl‎
Lines changed: 150 additions & 45 deletions b/‎data/shaders/restir_pt.hlsl‎
Lines changed: 150 additions & 45 deletions
diff --git a/‎data/shaders/restir_pt_debug.hlsl‎
Lines changed: 14 additions & 10 deletions b/‎data/shaders/restir_pt_debug.hlsl‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎data/shaders/restir_pt_denoise_spatial.hlsl‎
Lines changed: 91 additions & 39 deletions b/‎data/shaders/restir_pt_denoise_spatial.hlsl‎
Lines changed: 91 additions & 39 deletions
@@ -24,6 +24,14 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "fog.hlsl"
 //====================
 
+// must match restir_albedo_demodulator in restir_reservoir.hlsl, the half res restir shading
+// divides the gi by this and the bilateral upsample plus the multiply below re-applies the
+// full res value, recovering fine albedo detail that the upsample would otherwise blur away
+float3 restir_albedo_demodulator(float3 albedo)
+{
+    return max(albedo, float3(0.04f, 0.04f, 0.04f));
+}
+
 // edge-aware bilateral upsample of the half-res restir gi texture (tex6)
 // destination depth and normal come from the full-res g-buffer via Surface
 // source depth and normal are read at gi texel centers from the same g-buffer
@@ -141,16 +149,24 @@ void main_cs(uint3 thread_id : SV_DispatchThreadID)
         alpha                = surface.alpha;
         distance_from_camera = surface.camera_to_pixel_length;
 
-        // restir_pt outputs pre-shaded gi (diffuse_brdf * cos * radiance * W),
-        // so it bypasses the *albedo multiply below and is added directly
+        // restir_pt outputs gi already demodulated by the half res primary albedo so the
+        // bilateral upsample averages a smoother lighting only signal and we re-apply the
+        // full res albedo here, this preserves fine material detail that would otherwise be
+        // lost when the half res restir shading + upsample blurs the albedo into the gi
         // gi is at restir_pt_scale of render resolution, so use a join-bilateral
         // upsample (depth + normal aware) to avoid bleeding across edges
         // also multiply by surface.occlusion to recover contact shadows that
         // restir's spatial reuse and denoiser smear away at small scales
+        // debug mode writes a heatmap into the gi slot, the remodulator is skipped there so
+        // the viridis colors are not tinted by surface albedo
         if (is_restir_pt_enabled())
         {
             float depth_dst_lin = linearize_depth(surface.depth);
             light_gi = sample_gi_bilateral(surface.uv, depth_dst_lin, surface.normal);
+            if (uint(buffer_frame.restir_pt_debug_mode) == 0u)
+            {
+                light_gi *= restir_albedo_demodulator(surface.albedo);
+            }
             light_gi *= surface.occlusion;
         }
     }
 
@@ -99,16 +99,23 @@ void main_cs(uint3 thread_id : SV_DispatchThreadID)
     float3 diffuse_ibl       = diffuse_skysphere * diffuse_occlusion * diffuse_energy * surface.albedo.rgb;
     float3 specular_ibl      = specular_skysphere * specular_energy * specular_occlusion;
 
-    // when ray traced reflections are enabled, they handle specular
+    // ray traced reflections owns specular indirect across the full roughness range when enabled
     if (is_ray_traced_reflections_enabled())
     {
-        specular_ibl *= 0.0f; // fully handled by ray traced reflections
+        specular_ibl *= 0.0f;
     }
 
-    // when restir path tracing is enabled, nearly disable ibl diffuse as restir fully replaces it
+    // restir path tracing owns diffuse indirect always, and the specular indirect lobe for
+    // moderate to high roughness surfaces (paper-faithful full brdf at the primary), zeroing
+    // specular_ibl in that range avoids double counting when rt reflections is off, near mirror
+    // surfaces still receive specular_ibl as the fallback when rt reflections is unavailable
     if (is_restir_pt_enabled())
     {
-        diffuse_ibl *= 0.0f; // restir fully handles indirect diffuse
+        diffuse_ibl *= 0.0f;
+        if (surface.roughness >= 0.2f)
+        {
+            specular_ibl *= 0.0f;
+        }
     }
 
     // transparents take full ibl, fresnel inside the split sum already governs the reflection split
 
@@ -32,8 +32,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //   1 = confidence (alpha of reservoir.tex4.w, mapped 0..1)
 //   2 = reservoir M (0..RESTIR_M_CAP)
 //   3 = reservoir W (log scaled since W has high dynamic range)
-//   4 = reuse ratio (placeholder, shows confidence for now)
-//   5 = temporal rejection (placeholder, shows 1 - confidence)
+//   4 = reuse ratio M/m_cap, saturation against the runtime cvar cap, identifies where temporal accumulation is healthy vs freshly reset
+//   5 = path length, the actual chosen sample's bounce count (1..max_path_length), highlights where paths are short and indirect contribution is missing
 //   6 = variance (alpha of the denoised gi, log scaled, the svgf per pixel luminance variance)
 
 // viridis colormap, approximated via a small polynomial fit, returns a perceptually uniform color from a [0,1] input
@@ -93,18 +93,22 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
             visualization_t = saturate(log2(W + 1.0f) / 8.0f);
             break;
         }
-        case 4: // reuse ratio placeholder, shows confidence (proxy for how much we have reused this reservoir)
+        case 4: // reuse ratio M / runtime m_cap, hot pixels are saturated and benefit fully from temporal accumulation, cold pixels were reset by disocclusion / validation
         {
-            uint  age_conf   = asuint(tex_reservoir_prev4[pixel].w);
-            float confidence = saturate(f16tof32(age_conf >> 16u));
-            visualization_t  = confidence;
+            float M       = tex_reservoir_prev2[pixel].w;
+            float m_cap   = max(get_restir_m_cap(), 1.0f);
+            visualization_t = saturate(M / m_cap);
             break;
         }
-        case 5: // temporal rejection placeholder, shows 1 - confidence so freshly rejected pixels are hot
+        case 5: // path length, decoded from the packed path_info word so we can see whether paths actually reach max bounces
         {
-            uint  age_conf   = asuint(tex_reservoir_prev4[pixel].w);
-            float confidence = saturate(f16tof32(age_conf >> 16u));
-            visualization_t  = saturate(1.0f - confidence);
+            uint  packed_info = asuint(tex_reservoir_prev2[pixel].y);
+            uint  path_length;
+            uint  rc_length;
+            uint  flags;
+            unpack_path_info(packed_info, path_length, rc_length, flags);
+            float max_path  = max(float(get_restir_max_path_length()), 1.0f);
+            visualization_t = saturate(float(path_length) / max_path);
             break;
         }
         case 6: // variance (alpha of the denoised gi, log scaled because per pixel variance spans several decades on disocclusion edges)
 
@@ -23,8 +23,18 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "common.hlsl"
 //==============================
 
+// svgf style a-trous filter (schied et al. 2017 spatiotemporal variance guided filter)
+// inputs:
+//   tex   rgba = (color, variance) from the temporal accumulation pass
+//   tex3  rgba = (luma_M1, luma_M2, n_eff, _) moments texture
+// the per pixel variance estimate in tex.a drives the luma weight phi as phi_luma = sqrt(var),
+// instead of the previous hand tuned global luma_phi, so noisy regions are filtered more
+// aggressively while clean regions preserve high frequency detail
+// step_width comes from the pass push constant and doubles each à-trous level (1, 2, 4)
+
 static const float3 luminance_weights = float3(0.299f, 0.587f, 0.114f);
 
+// 3x3 gaussian weights matched to the a-trous wavelet kernel
 static const int2 spatial_offsets[9] =
 {
     int2(-1, -1), int2(0, -1), int2(1, -1),
@@ -34,11 +44,29 @@ static const int2 spatial_offsets[9] =
 
 static const float spatial_kernel[9] =
 {
-    1.0f, 2.0f, 1.0f,
-    2.0f, 4.0f, 2.0f,
-    1.0f, 2.0f, 1.0f
+    1.0f / 16.0f, 2.0f / 16.0f, 1.0f / 16.0f,
+    2.0f / 16.0f, 4.0f / 16.0f, 2.0f / 16.0f,
+    1.0f / 16.0f, 2.0f / 16.0f, 1.0f / 16.0f
 };
 
+// pre blurred variance, schied 2017 §4.2 applies a small gaussian blur over the per pixel
+// variance estimate before driving the a-trous luma weight, this prevents single pixel
+// fireflies from over-tightening the filter on their immediate neighborhood
+float gaussian_filtered_variance(int2 pixel, uint2 resolution)
+{
+    float variance_sum = 0.0f;
+    float weight_sum   = 0.0f;
+    [unroll]
+    for (uint i = 0; i < 9; i++)
+    {
+        int2 sp = clamp(pixel + spatial_offsets[i], int2(0, 0), int2(resolution) - 1);
+        float w = spatial_kernel[i];
+        variance_sum += tex.Load(int3(sp, 0)).a * w;
+        weight_sum   += w;
+    }
+    return variance_sum / max(weight_sum, 1e-6f);
+}
+
 [numthreads(THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1)]
 void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
 {
@@ -60,26 +88,37 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
 
     float4 center_sample      = tex.Load(int3(pixel, 0));
     float3 center_color       = max(center_sample.rgb, 0.0f);
-    float  center_history     = saturate(center_sample.a);
-    // confidence is the high f16 of tex4.w, see reservoir packing layout
-    uint   center_age_conf    = asuint(tex_reservoir_prev4[pixel].w);
-    float  reservoir_confidence = saturate(f16tof32(center_age_conf >> 16u));
-    float  center_confidence  = saturate(center_history * 0.6f + reservoir_confidence * 0.4f);
+    float  center_variance    = max(center_sample.a, 0.0f);
     float  center_luma        = dot(center_color, luminance_weights);
     float  center_depth       = linearize_depth(depth);
     float3 center_normal      = get_normal(uv);
-    float  low_light_factor   = saturate(1.0f - center_luma / 0.2f);
 
-    int step_width      = max((int)pass_get_f3_value().x, 1);
-    float depth_phi     = max(center_depth * lerp(0.03f, 0.012f, center_confidence), 0.004f);
-    float luma_phi      = max((center_luma + 0.08f) * lerp(4.5f, 1.4f, center_confidence), 0.08f);
-    float normal_power  = lerp(16.0f, 48.0f, center_confidence);
-    // tighten luma tolerance in dark regions so contact shadows don't get smeared
-    luma_phi           *= lerp(0.75f, 1.0f, 1.0f - low_light_factor);
+    int    step_width = max((int)pass_get_f3_value().x, 1);
+
+    // svgf phi parameters, schied 2017 typical values (their fig 4)
+    //   phi_luma  = 4.0 (multiplied by sqrt(blurred variance) per pixel)
+    //   phi_depth = 1.0 (relative depth tolerance, scales with step_width and depth gradient)
+    //   phi_normal = 128 (cosine power)
+    const float phi_luma   = 4.0f;
+    const float phi_depth  = 1.0f;
+    const float phi_normal = 128.0f;
 
-    float4 filtered_color = center_sample * spatial_kernel[4];
-    float  total_weight   = spatial_kernel[4];
+    float blurred_variance = gaussian_filtered_variance(int2(pixel), resolution);
+    float luma_sigma       = sqrt(max(blurred_variance, 1e-6f)) * phi_luma;
+    luma_sigma             = max(luma_sigma, 0.01f);
 
+    // per pixel screen space depth gradient, schied 2017 §4.5 estimates df/dz with finite
+    // differences inside the kernel so the depth weight scales with the surface slope
+    float depth_x = abs(linearize_depth(tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), uv + float2(1.0f / resolution.x, 0.0f), 0).r) - center_depth);
+    float depth_y = abs(linearize_depth(tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), uv + float2(0.0f, 1.0f / resolution.y), 0).r) - center_depth);
+    float depth_grad_max = max(max(depth_x, depth_y), 1e-3f);
+
+    float4 filtered_color    = center_sample * spatial_kernel[4];
+    float  filtered_variance = center_variance * spatial_kernel[4] * spatial_kernel[4];
+    float  total_weight      = spatial_kernel[4];
+    float  total_var_weight  = spatial_kernel[4] * spatial_kernel[4];
+
+    [unroll]
     for (uint i = 0; i < 9; i++)
     {
         if (i == 4)
@@ -89,40 +128,53 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
         if (sample_pixel.x < 0 || sample_pixel.x >= (int)resolution.x || sample_pixel.y < 0 || sample_pixel.y >= (int)resolution.y)
             continue;
 
-        float2 sample_uv   = (sample_pixel + 0.5f) / resolution;
-        float sample_depth_raw = tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), sample_uv, 0).r;
+        float2 sample_uv       = (sample_pixel + 0.5f) / resolution;
+        float  sample_depth_raw = tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), sample_uv, 0).r;
         if (sample_depth_raw <= 0.0f)
             continue;
 
         float  sample_depth   = linearize_depth(sample_depth_raw);
         float3 sample_normal  = get_normal(sample_uv);
-        float4 sample_history = tex.Load(int3(sample_pixel, 0));
-        uint   sample_age_conf = asuint(tex_reservoir_prev4[sample_pixel].w);
-        float  sample_reservoir_confidence = saturate(f16tof32(sample_age_conf >> 16u));
-        float3 sample_color   = max(sample_history.rgb, 0.0f);
+        float4 sample_data    = tex.Load(int3(sample_pixel, 0));
+        float3 sample_color   = max(sample_data.rgb, 0.0f);
+        float  sample_var     = max(sample_data.a, 0.0f);
         float  sample_luma    = dot(sample_color, luminance_weights);
 
-        float spatial_weight = spatial_kernel[i];
-        float depth_weight   = exp(-abs(sample_depth - center_depth) / depth_phi);
-        float normal_weight  = pow(saturate(dot(center_normal, sample_normal)), normal_power);
-        float luma_weight    = exp(-abs(sample_luma - center_luma) / luma_phi);
-        float history_confidence = max(saturate(sample_history.a), sample_reservoir_confidence);
-        float history_weight = lerp(0.45f, 1.0f, history_confidence);
+        // distance in pixels along the gradient direction so the depth tolerance scales with
+        // the kernel reach (schied 2017 eq. 9), step_width drives the a-trous spread
+        float pixel_dist  = length(float2(spatial_offsets[i] * step_width));
+        float depth_delta = abs(sample_depth - center_depth) / max(phi_depth * depth_grad_max * pixel_dist, 1e-3f);
+        float depth_weight = exp(-depth_delta);
+
+        float normal_weight = pow(saturate(dot(center_normal, sample_normal)), phi_normal);
+
+        float luma_delta  = abs(sample_luma - center_luma) / luma_sigma;
+        float luma_weight = exp(-luma_delta);
 
-        float weight = spatial_weight * depth_weight * normal_weight * luma_weight * history_weight;
+        float spatial_w = spatial_kernel[i];
+        float weight    = spatial_w * depth_weight * normal_weight * luma_weight;
         if (weight <= 0.0f)
             continue;
 
-        filtered_color += sample_history * weight;
-        total_weight   += weight;
+        filtered_color    += sample_data * weight;
+        total_weight      += weight;
+        // variance combines as sum(w_i^2 * var_i) / (sum(w_i))^2 (schied 2017 eq. 10)
+        filtered_variance += sample_var * weight * weight;
+        total_var_weight  += weight * weight;
     }
 
-    float4 filtered_sample = total_weight > 0.0f ? filtered_color / total_weight : center_sample;
-    float filter_strength  = lerp(1.0f, 0.35f, center_confidence);
-    // gentler filtering in dark regions so shadow detail survives
-    filter_strength        = saturate(filter_strength - low_light_factor * 0.2f);
-    float4 output_sample   = lerp(center_sample, filtered_sample, filter_strength);
-    output_sample.a        = saturate(lerp(center_history, filtered_sample.a, filter_strength * 0.75f));
+    if (total_weight > 0.0f)
+    {
+        filtered_color    /= total_weight;
+        filtered_variance /= max(total_weight * total_weight, 1e-6f);
+    }
+    else
+    {
+        filtered_color    = center_sample;
+        filtered_variance = center_variance;
+    }
 
-    tex_uav[pixel] = validate_output(output_sample);
+    // store color in rgb, variance in alpha so the next a-trous level has a fresh variance
+    // estimate to drive its luma weight
+    tex_uav[pixel] = validate_output(float4(filtered_color.rgb, max(filtered_variance, 0.0f)));
 }