Skip to content

Commit 156b5ae

Browse files
committed
[path_tracing] implemented missing parts, fixed a lot of bugs, almost there
1 parent 90471bb commit 156b5ae

7 files changed

Lines changed: 739 additions & 118 deletions

data/shaders/restir_pt.hlsl

Lines changed: 168 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2525
//==============================
2626

2727
static const uint INITIAL_CANDIDATE_SAMPLES = 16;
28+
static const uint LIGHT_RIS_CANDIDATE_SAMPLES = 4;
2829
static const float MIN_COS_AT_PRIMARY = 1e-3f;
2930
static const float RUSSIAN_ROULETTE_PROB = 0.85f;
3031
static const uint RUSSIAN_ROULETTE_START = 2;
@@ -338,12 +339,17 @@ float3 sample_sky(float3 dir)
338339
// direction is src's primary, so the stored radiance is view-dependent at rc w.r.t. the
339340
// reconnection shift, but the rc roughness gate bounds the resulting bias during reuse
340341
// max_bounces is the remaining path budget after this vertex (including this vertex's nee)
342+
// out_first_dir receives the first outgoing direction sampled at `start` so the caller can
343+
// populate rc_outgoing_dir for future variable-rc shift work; it is zeroed when no bounce
344+
// actually leaves start (max_bounces < 2 or first sample was rejected)
341345
float3 accumulate_subpath_radiance(
342346
PathPayload start,
343347
float3 start_view_dir,
344348
uint max_bounces,
345-
inout uint seed)
349+
inout uint seed,
350+
out float3 out_first_dir)
346351
{
352+
out_first_dir = float3(0, 0, 0);
347353
float3 total = float3(0, 0, 0);
348354
float3 throughput = float3(1, 1, 1);
349355

@@ -360,6 +366,7 @@ float3 accumulate_subpath_radiance(
360366
float3 view_dir = start_view_dir;
361367
float prev_brdf_pdf = 0.0f;
362368
float3 prev_normal = start.hit_normal;
369+
bool first_dir_set = false;
363370

364371
for (uint bounce = 1; bounce < max_bounces; bounce++)
365372
{
@@ -378,6 +385,13 @@ float3 accumulate_subpath_radiance(
378385
if (pdf < RESTIR_MIN_PDF || dot(nd, cur.hit_normal) <= 0.0f || any(isnan(nd)))
379386
break;
380387

388+
// capture the very first outgoing direction at the rc vertex
389+
if (!first_dir_set)
390+
{
391+
out_first_dir = nd;
392+
first_dir_set = true;
393+
}
394+
381395
float unused_pdf;
382396
float3 brdf = evaluate_brdf(cur.albedo, cur.roughness, cur.metallic, cur.hit_normal, view_dir, nd, unused_pdf);
383397
throughput *= brdf / pdf;
@@ -414,6 +428,106 @@ float3 accumulate_subpath_radiance(
414428
return total;
415429
}
416430

431+
// builds a path sample candidate by directly sampling an analytical light or the sun cone
432+
// the candidate's rc vertex is the sampled light point (or sky direction for the sun) and
433+
// rc_radiance carries the emitted radiance toward the primary, the source_pdf returned is
434+
// in solid-angle measure at the primary so it can be combined with brdf-sampled candidates
435+
// in a single mixture-ris pool. point/spot delta lights are skipped: their solid-angle pdf
436+
// is effectively infinite and they are already handled by the spatial-pass direct lighting
437+
PathSample sample_light_candidate(
438+
float3 primary_pos,
439+
float3 primary_normal,
440+
inout uint seed,
441+
out float source_pdf)
442+
{
443+
PathSample s;
444+
s.rc_pos = float3(0, 0, 0);
445+
s.rc_normal = float3(0, 1, 0);
446+
s.rc_radiance = float3(0, 0, 0);
447+
s.rc_prev_pos = primary_pos;
448+
s.rc_outgoing_dir = float3(0, 1, 0);
449+
s.seed_path = seed;
450+
s.path_length = 1;
451+
s.rc_length = 2;
452+
s.flags = 0;
453+
source_pdf = 0.0f;
454+
455+
uint light_count = (uint)buffer_frame.restir_pt_light_count;
456+
if (light_count == 0)
457+
return s;
458+
459+
uint light_idx = min((uint)(random_float(seed) * float(light_count)), light_count - 1);
460+
LightParameters light = light_parameters[light_idx];
461+
if (light.intensity <= 0.0f)
462+
return s;
463+
464+
bool is_directional = (light.flags & (1u << 0)) != 0;
465+
bool is_area = (light.flags & (1u << 6)) != 0;
466+
float pick_pdf = 1.0f / float(light_count);
467+
468+
if (is_directional)
469+
{
470+
float3 sun_dir = -light.direction;
471+
float2 xi = random_float2(seed);
472+
float cos_max = cos(SUN_CONE_HALF_ANGLE);
473+
float z = lerp(cos_max, 1.0f, xi.x);
474+
float phi = 2.0f * PI * xi.y;
475+
float r = sqrt(max(1.0f - z * z, 0.0f));
476+
477+
float3 local = float3(cos(phi) * r, sin(phi) * r, z);
478+
float3 t, b;
479+
build_orthonormal_basis_fast(sun_dir, t, b);
480+
float3 dir = normalize(t * local.x + b * local.y + sun_dir * z);
481+
482+
if (dot(dir, primary_normal) <= MIN_COS_AT_PRIMARY)
483+
return s;
484+
485+
s.flags |= PATH_FLAG_SKY;
486+
s.rc_pos = dir;
487+
s.rc_normal = -dir;
488+
s.rc_radiance = light.color.rgb * light.intensity;
489+
490+
float sun_cone_pdf = 1.0f / (2.0f * PI * max(1.0f - cos_max, 1e-6f));
491+
source_pdf = pick_pdf * sun_cone_pdf;
492+
return s;
493+
}
494+
495+
if (is_area && light.area_width > 0.0f && light.area_height > 0.0f)
496+
{
497+
float3 light_normal = light.direction;
498+
float3 light_right, light_up;
499+
build_orthonormal_basis_fast(light_normal, light_right, light_up);
500+
501+
float2 xi = random_float2(seed);
502+
float3 p = light.position
503+
+ light_right * (xi.x - 0.5f) * light.area_width
504+
+ light_up * (xi.y - 0.5f) * light.area_height;
505+
506+
float3 to = p - primary_pos;
507+
float dist = length(to);
508+
if (dist < 1e-3f)
509+
return s;
510+
511+
float3 dir = to / dist;
512+
float cos_light = dot(-dir, light_normal);
513+
if (cos_light <= 0.0f || dot(dir, primary_normal) <= MIN_COS_AT_PRIMARY)
514+
return s;
515+
516+
s.rc_pos = p;
517+
s.rc_normal = light_normal;
518+
s.rc_radiance = light.color.rgb * light.intensity;
519+
s.flags |= PATH_FLAG_HAS_RC;
520+
521+
float area = light.area_width * light.area_height;
522+
float area_pdf = 1.0f / area;
523+
float sa_pdf = area_pdf * dist * dist / max(cos_light, 1e-6f);
524+
source_pdf = pick_pdf * sa_pdf;
525+
return s;
526+
}
527+
528+
return s;
529+
}
530+
417531
// traces a full path from the primary vertex given the first indirect direction; captures
418532
// x2 as the reconnection vertex candidate and accumulates the suffix radiance from x2
419533
// the caller samples dir via sample_brdf so the source pdf matches the primary brdf lobe
@@ -422,6 +536,7 @@ PathSample trace_path_from_primary(
422536
float3 primary_normal,
423537
float primary_roughness,
424538
float3 dir,
539+
uint replay_seed,
425540
inout uint seed)
426541
{
427542
PathSample s;
@@ -430,7 +545,9 @@ PathSample trace_path_from_primary(
430545
s.rc_radiance = float3(0, 0, 0);
431546
s.rc_prev_pos = primary_pos;
432547
s.rc_outgoing_dir = float3(0, 1, 0);
433-
s.seed_path = seed;
548+
// store the seed value that was used to generate xi for sample_brdf, so the random replay
549+
// shift can re-derive the same xi at a destination pixel and trace a matching prefix
550+
s.seed_path = replay_seed;
434551
s.path_length = 0;
435552
s.rc_length = 0;
436553
s.flags = 0;
@@ -464,9 +581,16 @@ PathSample trace_path_from_primary(
464581
s.rc_normal = hit.geometric_normal;
465582
s.rc_length = 2;
466583

467-
float3 suffix = accumulate_subpath_radiance(hit, -dir, RESTIR_MAX_PATH_LENGTH - 1, seed);
468-
s.rc_radiance = soft_saturate_radiance(suffix, RESTIR_FIREFLY_LUMA);
469-
s.path_length = 2;
584+
// store the raw suffix radiance, firefly handling is deferred to the denoise temporal
585+
// variance clamp and the composition stage so per-frame stochastic clamping does not
586+
// flicker between frames or permanently darken bright bounces
587+
float3 first_outgoing_dir;
588+
float3 suffix = accumulate_subpath_radiance(hit, -dir, RESTIR_MAX_PATH_LENGTH - 1, seed, first_outgoing_dir);
589+
if (any(isnan(suffix)) || any(isinf(suffix)))
590+
suffix = float3(0, 0, 0);
591+
s.rc_radiance = max(suffix, 0.0f);
592+
s.path_length = 2;
593+
s.rc_outgoing_dir = first_outgoing_dir;
470594

471595
// reconnection validity: the rc vertex must be rough (stored radiance is view-dependent
472596
// at rc w.r.t. src's incoming, and roughness bounds the shift error) and distant enough
@@ -527,9 +651,12 @@ void ray_gen()
527651
// every iteration calls update_reservoir so M counts every trial (paper-form unbiased ris)
528652
for (uint i = 0; i < INITIAL_CANDIDATE_SAMPLES; i++)
529653
{
530-
float2 xi = random_float2(seed);
654+
// capture the seed just before consuming xi so the random replay shift can replay
655+
// the same primary-direction sample at a destination pixel
656+
uint replay_seed = seed;
657+
float2 xi = random_float2(seed);
531658
float source_pdf;
532-
float3 dir = sample_brdf(albedo, roughness, metallic, normal_ws, view_dir, xi, source_pdf);
659+
float3 dir = sample_brdf(albedo, roughness, metallic, normal_ws, view_dir, xi, source_pdf);
533660

534661
bool dir_valid = (source_pdf >= RESTIR_MIN_PDF) &&
535662
(dot(dir, normal_ws) >= MIN_COS_AT_PRIMARY) &&
@@ -540,7 +667,7 @@ void ray_gen()
540667

541668
if (dir_valid)
542669
{
543-
candidate = trace_path_from_primary(pos_ws, normal_ws, roughness, dir, seed);
670+
candidate = trace_path_from_primary(pos_ws, normal_ws, roughness, dir, replay_seed, seed);
544671
float target_pdf = target_pdf_self(candidate, pos_ws, normal_ws, view_dir, albedo, roughness, metallic);
545672
if (target_pdf > 0.0f)
546673
weight = target_pdf / source_pdf;
@@ -549,6 +676,27 @@ void ray_gen()
549676
update_reservoir(reservoir, candidate, weight, random_float(seed));
550677
}
551678

679+
// additional ris stream over direct light samples (sun cone + area lights); each candidate
680+
// is a single primary->light path with rc_radiance = emitted radiance and source_pdf in
681+
// solid-angle measure at the primary. mixing strategies in the same reservoir keeps the
682+
// estimator unbiased (each strategy independently satisfies E[weight] = integrand) and
683+
// dramatically improves convergence on indirect bounces hitting concentrated emitters
684+
for (uint li = 0; li < LIGHT_RIS_CANDIDATE_SAMPLES; li++)
685+
{
686+
float light_source_pdf;
687+
PathSample light_candidate = sample_light_candidate(pos_ws, normal_ws, seed, light_source_pdf);
688+
689+
float light_weight = 0.0f;
690+
if (light_source_pdf >= RESTIR_MIN_PDF)
691+
{
692+
float target_pdf = target_pdf_self(light_candidate, pos_ws, normal_ws, view_dir, albedo, roughness, metallic);
693+
if (target_pdf > 0.0f)
694+
light_weight = target_pdf / light_source_pdf;
695+
}
696+
697+
update_reservoir(reservoir, light_candidate, light_weight, random_float(seed));
698+
}
699+
552700
// finalize: m_i = 1/M for the initial pass, so weight_sum becomes (1/M) * sum(p_hat/p)
553701
// and W = weight_sum / p_hat_y matches the paper-form output used by downstream merges
554702
if (reservoir.M > 0.0f)
@@ -558,10 +706,21 @@ void ray_gen()
558706
reservoir.target_pdf = final_target;
559707
reservoir.W = (final_target > 0.0f) ? (reservoir.weight_sum / final_target) : 0.0f;
560708

709+
// post-ris visibility test: kill the chosen sample if it is occluded so an obviously dead
710+
// path does not propagate into temporal accumulation. m is preserved so the validity gate
711+
// still treats this pixel as having been sampled (no spurious confidence collapse)
712+
if (reservoir.W > 0.0f && !trace_shift_visibility(reservoir.sample, pos_ws, normal_ws))
713+
{
714+
reservoir.W = 0.0f;
715+
reservoir.weight_sum = 0.0f;
716+
reservoir.target_pdf = 0.0f;
717+
}
718+
561719
float w_clamp = get_w_clamp_for_sample(reservoir.sample);
562720
reservoir.W = min(reservoir.W, w_clamp);
563721

564-
float sample_count_quality = saturate(reservoir.M / float(INITIAL_CANDIDATE_SAMPLES));
722+
float total_candidates = float(INITIAL_CANDIDATE_SAMPLES + LIGHT_RIS_CANDIDATE_SAMPLES);
723+
float sample_count_quality = saturate(reservoir.M / max(total_candidates, 1.0f));
565724
reservoir.confidence = (final_target > 0.0f) ? sample_count_quality : 0.0f;
566725
reservoir.age = 0.0f;
567726

data/shaders/restir_pt_denoise_spatial.hlsl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
6161
float4 center_sample = tex.Load(int3(pixel, 0));
6262
float3 center_color = max(center_sample.rgb, 0.0f);
6363
float center_history = saturate(center_sample.a);
64-
float reservoir_confidence = saturate(tex_reservoir_prev4[pixel].y);
64+
// confidence is the high f16 of tex4.w, see reservoir packing layout
65+
uint center_age_conf = asuint(tex_reservoir_prev4[pixel].w);
66+
float reservoir_confidence = saturate(f16tof32(center_age_conf >> 16u));
6567
float center_confidence = saturate(center_history * 0.6f + reservoir_confidence * 0.4f);
6668
float center_luma = dot(center_color, luminance_weights);
6769
float center_depth = linearize_depth(depth);
@@ -95,7 +97,8 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
9597
float sample_depth = linearize_depth(sample_depth_raw);
9698
float3 sample_normal = get_normal(sample_uv);
9799
float4 sample_history = tex.Load(int3(sample_pixel, 0));
98-
float sample_reservoir_confidence = saturate(tex_reservoir_prev4[sample_pixel].y);
100+
uint sample_age_conf = asuint(tex_reservoir_prev4[sample_pixel].w);
101+
float sample_reservoir_confidence = saturate(f16tof32(sample_age_conf >> 16u));
99102
float3 sample_color = max(sample_history.rgb, 0.0f);
100103
float sample_luma = dot(sample_color, luminance_weights);
101104

data/shaders/restir_pt_denoise_temporal.hlsl

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,83 @@ float3 clamp_history(float3 history, float3 mean, float3 sigma, float3 minimum_v
7575
return clamp(history, lower, upper);
7676
}
7777

78+
// edge-aware 4-tap reprojection, fetches the 4 nearest history samples around prev_uv and
79+
// reweights them by depth + normal compatibility before bilinear blending; this stops history
80+
// leaking across surface edges when motion lands prev_uv between two surfaces of different depth
81+
float4 sample_history_edge_aware(float2 prev_uv, float3 current_normal, float current_depth, float2 history_resolution)
82+
{
83+
float2 prev_pixel_f = prev_uv * history_resolution - 0.5f;
84+
float2 base_f = floor(prev_pixel_f);
85+
float2 frac_f = saturate(prev_pixel_f - base_f);
86+
87+
float4 bilinear_w = float4(
88+
(1.0f - frac_f.x) * (1.0f - frac_f.y),
89+
frac_f.x * (1.0f - frac_f.y),
90+
(1.0f - frac_f.x) * frac_f.y,
91+
frac_f.x * frac_f.y
92+
);
93+
94+
int2 offsets[4] =
95+
{
96+
int2(0, 0), int2(1, 0), int2(0, 1), int2(1, 1)
97+
};
98+
99+
float4 accumulated = 0.0f;
100+
float weight_sum = 0.0f;
101+
float depth_phi = max(current_depth * 0.05f, 1e-3f);
102+
103+
for (uint i = 0; i < 4; i++)
104+
{
105+
int2 tap_pixel = int2(base_f) + offsets[i];
106+
if (tap_pixel.x < 0 || tap_pixel.x >= (int)history_resolution.x ||
107+
tap_pixel.y < 0 || tap_pixel.y >= (int)history_resolution.y)
108+
{
109+
continue;
110+
}
111+
112+
float2 tap_uv = (tap_pixel + 0.5f) / history_resolution;
113+
float tap_depth_raw = tex_depth.SampleLevel(GET_SAMPLER(sampler_point_clamp), tap_uv, 0).r;
114+
if (tap_depth_raw <= 0.0f)
115+
{
116+
continue;
117+
}
118+
119+
float tap_depth = linearize_depth(tap_depth_raw);
120+
float depth_delta = abs(tap_depth - current_depth) / max(current_depth, 1e-3f);
121+
if (depth_delta > 0.1f)
122+
{
123+
continue;
124+
}
125+
126+
float3 tap_normal = get_normal(tap_uv);
127+
float normal_similarity = saturate(dot(tap_normal, current_normal));
128+
if (normal_similarity < 0.7f)
129+
{
130+
continue;
131+
}
132+
133+
float depth_weight = exp(-depth_delta / 0.05f);
134+
float normal_weight = pow(normal_similarity, 8.0f);
135+
float w = bilinear_w[i] * depth_weight * normal_weight;
136+
if (w <= 0.0f)
137+
{
138+
continue;
139+
}
140+
141+
accumulated += tex2.Load(int3(tap_pixel, 0)) * w;
142+
weight_sum += w;
143+
}
144+
145+
if (weight_sum > 0.0f)
146+
{
147+
return accumulated / weight_sum;
148+
}
149+
150+
// all taps rejected, fall back to point sample at the rounded center
151+
int2 fallback_pixel = clamp(int2(round(prev_pixel_f)), int2(0, 0), int2(history_resolution) - 1);
152+
return tex2.Load(int3(fallback_pixel, 0));
153+
}
154+
78155
bool is_history_valid(float2 current_uv, float2 prev_uv, float3 current_position, float3 current_normal, float current_depth, float2 history_resolution, out float confidence)
79156
{
80157
confidence = 0.0f;
@@ -143,7 +220,9 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
143220
float3 current_position = get_position(uv);
144221
float3 current_normal = get_normal(uv);
145222
float current_linear_z = linearize_depth(depth);
146-
float current_confidence = saturate(tex_reservoir_prev4[pixel].y);
223+
// confidence is the high f16 of tex4.w, see reservoir packing layout
224+
uint age_conf = asuint(tex_reservoir_prev4[pixel].w);
225+
float current_confidence = saturate(f16tof32(age_conf >> 16u));
147226

148227
float3 mean_color;
149228
float3 sigma_color;
@@ -164,7 +243,7 @@ void main_cs(uint3 dispatch_id : SV_DispatchThreadID)
164243

165244
if (is_history_valid(uv, prev_uv, current_position, current_normal, current_linear_z, resolution, temporal_confidence))
166245
{
167-
float4 history_sample = tex2.SampleLevel(GET_SAMPLER(sampler_bilinear_clamp), prev_uv, 0);
246+
float4 history_sample = sample_history_edge_aware(prev_uv, current_normal, current_linear_z, float2(resolution));
168247
history_confidence = saturate(history_sample.a);
169248
history_color = clamp_history(max(history_sample.rgb, 0.0f), mean_color, sigma_color, minimum_color, maximum_color, current_confidence, history_confidence);
170249

0 commit comments

Comments
 (0)