@@ -264,7 +264,7 @@ float manual_lod(float uv_w)
264264float4 sample_c_af (float2 uv, float uv_w)
265265{
266266 // HW sampler will reject bad UVs, match that here.
267- uv = any (isnan (uv) | isinf (uv)) ? float2 (0 , 0 ) : uv;
267+ uv = any (isnan (uv) | isinf (uv)) ? float2 (0.0f , 0.0f ) : uv;
268268
269269 // Large floating point values risk NaN/Inf values.
270270 // Above this value floats lose decimal precision, so seems a resonable limit for UVs.
@@ -277,119 +277,155 @@ float4 sample_c_af(float2 uv, float uv_w)
277277 float2 dX = ddx (uv) * sz;
278278 float2 dY = ddy (uv) * sz;
279279
280- // Calculate Ellipse Transform
281- bool d_zero = length (dX) == 0 || length (dY) == 0 ;
282- bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0 ;
283- bool d_per = dot (dX, dY) == 0 ;
284- bool d_inf_nan = any (isinf (dX) | isinf (dY) | isnan (dX) | isnan (dY));
280+ // Check if sample is magnification and if so, skip anisotropic calculations.
281+ // Avoid performing square root by checking squared length.
282+ float squared_length_x = dot (dX, dX);
283+ float squared_length_y = dot (dY, dY);
285284
286- if (!(d_zero || d_par || d_per || d_inf_nan))
285+ [branch]
286+ if (max (squared_length_x, squared_length_y) < (0.75f * 0.75f ))
287287 {
288- float A = dX.y * dX.y + dY.y * dY.y;
289- float B = -2 * (dX.x * dX.y + dY.x * dY.y);
290- float C = dX.x * dX.x + dY.x * dY.x;
291- float f = (dX.x * dY.y - dY.x * dX.y);
292- float F = f * f;
293-
294- float p = A - C;
295- float q = A + C;
296- float t = sqrt (p * p + B * B);
297-
298- float2 new_dX = float2 (
299- sqrt (F * (t + p) / (t * (q + t))),
300- sqrt (F * (t - p) / (t * (q + t))) * sign (B)
301- );
302-
303- float2 new_dY = float2 (
304- sqrt (F * (t - p) / (t * (q - t))) * -sign (B),
305- sqrt (F * (t + p) / (t * (q - t)))
306- );
307-
308- d_inf_nan = any (isinf (new_dX) | isinf (new_dY) | isnan (new_dX) | isnan (new_dY));
309- if (!d_inf_nan)
310- {
311- dX = new_dX;
312- dY = new_dY;
313- }
314- }
315-
316- // Compute AF values
317- float squared_length_x = dX.x * dX.x + dX.y * dX.y;
318- float squared_length_y = dY.x * dY.x + dY.y * dY.y;
319- float determinant = abs (dX.x * dY.y - dX.y * dY.x);
320- bool is_major_x = squared_length_x > squared_length_y;
321- float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
322- float length_major = sqrt (squared_length_major);
323-
324- float aniso_ratio;
325- float length_lod;
326- float2 aniso_line;
327- if (length_major <= 1.0f )
328- {
329- // A zero length_major would result in NaN Lod and break sampling.
330- // A small length_major would result in aniso_ratio getting clamped to 1.
331- // Perform isotropic filtering instead.
332- aniso_ratio = 1.0f ;
333- length_lod = length_major;
334- aniso_line = float2 (0 , 0 );
288+ #if PS_AUTOMATIC_LOD == 1
289+ return Texture.Sample (TextureSampler, uv);
290+ #else
291+ #if PS_MANUAL_LOD == 1
292+ float lod = manual_lod (uv_w);
293+ #else
294+ float lod = 0.0f ; // No Lod
295+ #endif
296+ return Texture.SampleLevel (TextureSampler, uv, lod);
297+ #endif
335298 }
336299 else
337300 {
338- float norm_major = 1.0f / length_major;
339-
340- float2 aniso_line_dir = float2 (
341- (is_major_x ? dX.x : dY.x) * norm_major,
342- (is_major_x ? dX.y : dY.y) * norm_major
343- );
344-
345- aniso_ratio = squared_length_major / determinant;
301+ // Calculate Ellipse Transform
302+ bool d_zero = squared_length_x < 0.001f || squared_length_y < 0.001f ;
303+ float f = (dX.x * dY.y - dX.y * dY.x);
304+ bool d_par = f == 0.0f < 0.001f ;
305+ bool d_per = dot (dX, dY) == 0.0f < 0.001f ;
306+ bool d_inf_nan = any (isinf (dX) | isinf (dY) | isnan (dX) | isnan (dY));
307+
308+ if (!(d_zero || d_par || d_per || d_inf_nan))
309+ {
310+ float2 dXYy = float2 (dX.y, dY.y);
311+ float2 dXYx = float2 (dX.x, dY.x);
312+ float A = dot (dXYy, dXYy);
313+ float B = -2.0f * dot (dXYx, dXYy);
314+ float C = dot (dXYx, dXYx);
315+ float F = f * f;
316+
317+ float p = A - C;
318+ float q = A + C;
319+ float t = sqrt (p * p + B * B);
320+
321+ float sqrt_num_plus = sqrt (F * (t + p));
322+ float sqrt_num_minus = sqrt (F * (t - p));
323+
324+ float inv_sqrt_denom_plus = rsqrt (t * (q + t));
325+ float inv_sqrt_denom_minus = rsqrt (t * (q - t));
326+
327+ float signB = sign (B);
328+
329+ float2 new_dX = float2 (
330+ sqrt_num_plus * inv_sqrt_denom_plus,
331+ sqrt_num_minus * inv_sqrt_denom_plus * signB
332+ );
333+
334+ float2 new_dY = float2 (
335+ sqrt_num_minus * inv_sqrt_denom_minus * -signB,
336+ sqrt_num_plus * inv_sqrt_denom_minus
337+ );
338+
339+ d_inf_nan = any (isinf (new_dX) | isinf (new_dY) | isnan (new_dX) | isnan (new_dY));
340+ if (!d_inf_nan)
341+ {
342+ dX = new_dX;
343+ dY = new_dY;
344+ squared_length_x = dot (dX, dX);
345+ squared_length_y = dot (dY, dY);
346+ f = dX.x * dY.y - dX.y * dY.x;
347+ }
348+ }
346349
347- // Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
348- if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
350+ // Compute AF values
351+ float determinant = abs (f);
352+ bool is_major_x = squared_length_x > squared_length_y;
353+ float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
354+ float length_major = sqrt (squared_length_major);
355+
356+ float aniso_ratio;
357+ float length_lod;
358+ float2 aniso_line;
359+ if (length_major <= 1.0f )
349360 {
350- // ratio is clamped - Lod is based on ratio (preserves area)
351- aniso_ratio = PS_ANISOTROPIC_FILTERING;
352- length_lod = length_major / PS_ANISOTROPIC_FILTERING;
361+ // A zero length_major would result in NaN Lod and break sampling.
362+ // A small length_major would result in aniso_ratio getting clamped to 1.
363+ // Perform isotropic filtering instead.
364+ aniso_ratio = 1.0f ;
365+ length_lod = length_major;
366+ aniso_line = float2 (0.0f , 0.0f );
353367 }
354368 else
355369 {
356- // ratio not clamped - Lod is based on area
357- length_lod = determinant / length_major;
358- }
370+ float norm_major = 1.0f / length_major;
359371
360- // clamp to top Lod
361- if (length_lod < 1.0f )
362- aniso_ratio = max (1.0f , aniso_ratio * length_lod);
372+ float2 aniso_line_dir = float2 (
373+ (is_major_x ? dX.x : dY.x) * norm_major,
374+ (is_major_x ? dX.y : dY.y) * norm_major
375+ );
376+
377+ aniso_ratio = squared_length_major / determinant;
378+
379+ // Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
380+ if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
381+ {
382+ // ratio is clamped - Lod is based on ratio (preserves area)
383+ aniso_ratio = PS_ANISOTROPIC_FILTERING;
384+ length_lod = length_major / PS_ANISOTROPIC_FILTERING;
385+ }
386+ else
387+ {
388+ // ratio not clamped - Lod is based on area
389+ length_lod = determinant / length_major;
390+ }
391+
392+ // clamp to top Lod
393+ if (length_lod < 1.0f )
394+ aniso_ratio = max (1.0f , aniso_ratio * length_lod);
395+
396+ aniso_ratio = round (aniso_ratio);
397+ aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
398+ }
363399
364- aniso_ratio = round (aniso_ratio);
365- aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
366- }
367-
368400#if PS_AUTOMATIC_LOD == 1
369- float lod = log2 (length_lod);
401+ float lod = log2 (length_lod);
370402#elif PS_MANUAL_LOD == 1
371- float lod = manual_lod (uv_w);
403+ float lod = manual_lod (uv_w);
372404#else
373- float lod = 0 ; // No Lod
405+ float lod = 0.0f ; // No Lod
374406#endif
375-
376- float4 colour;
377- if (aniso_ratio == 1.0f )
378- colour = Texture.SampleLevel (TextureSampler, uv, lod);
379- else
380- {
381- float4 num = float4 (0 , 0 , 0 , 0 );
382- for (int i = 0 ; i < aniso_ratio; i++)
383- {
384- float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
385- float2 uv_sample = uv + d;
386- float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
387- num += sample_colour;
388- }
389407
390- colour = num / aniso_ratio;
408+ float4 colour;
409+ if (aniso_ratio == 1.0f )
410+ colour = Texture.SampleLevel (TextureSampler, uv, lod);
411+ else
412+ {
413+ float4 num = float4 (0.0f , 0.0f , 0.0f , 0.0f );
414+ float2 segment = (2.0f * aniso_line) / aniso_ratio;
415+
416+ int aniso_ratio_i = (int )aniso_ratio;
417+ for (int i = 0 ; i < aniso_ratio_i; i++)
418+ {
419+ float2 d = -aniso_line + (0.5f + i) * segment;
420+ float2 uv_sample = uv + d;
421+ float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
422+ num += sample_colour;
423+ }
424+
425+ colour = num / aniso_ratio;
426+ }
427+ return colour;
391428 }
392- return colour;
393429}
394430#endif
395431
0 commit comments