@@ -355,124 +355,123 @@ bool2 nan_or_inf(float2 xy)
355355float4 sample_c_af (float2 uv, float uv_w)
356356{
357357 // HW sampler will reject bad UVs, match that here.
358- uv = any (nan_or_inf (uv)) ? float2 (0 , 0 ) : uv;
358+ uv = any (nan_or_inf (uv)) ? float2 (0.0f , 0.0f ) : uv;
359359
360360 // Large floating point values risk NaN/Inf values.
361361 // Above this value floats lose decimal precision, so seems a resonable limit for UVs.
362362 uv = clamp (uv, -8388608.0f , 8388608.0f );
363363
364364 // Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
365+ // And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
365366 // With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
366367 float2 sz;
367368 Texture.GetDimensions (sz.x, sz.y);
368369 float2 dX = ddx (uv) * sz;
369370 float2 dY = ddy (uv) * sz;
370371
372+ float length_x = length (dX);
373+ float length_y = length (dY);
374+
371375 // Calculate Ellipse Transform
372- bool d_zero = length (dX) == 0 || length (dY) == 0 ;
373- bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0 ;
374- bool d_per = dot (dX, dY) == 0 ;
376+ bool d_zero = length_x < 0.001f || length_y < 0.001f ;
377+ float f = (dX.x * dY.y - dX.y * dY.x);
378+ bool d_par = f < 0.001f ;
379+ bool d_per = dot (dX, dY) < 0.001f ;
375380 bool d_inf_nan = any (nan_or_inf (dX) | nan_or_inf (dY));
376381
377382 if (!(d_zero || d_par || d_per || d_inf_nan))
378383 {
379384 float A = dX.y * dX.y + dY.y * dY.y;
380385 float B = -2 * (dX.x * dX.y + dY.x * dY.y);
381386 float C = dX.x * dX.x + dY.x * dY.x;
382- float f = (dX.x * dY.y - dY.x * dX.y);
383387 float F = f * f;
384388
385389 float p = A - C;
386390 float q = A + C;
387391 float t = sqrt (p * p + B * B);
388392
393+ float sqrt_num_plus = sqrt (F * (t + p));
394+ float sqrt_num_minus = sqrt (F * (t - p));
395+
396+ float inv_sqrt_denom_plus = rsqrt (t * (q + t));
397+ float inv_sqrt_denom_minus = rsqrt (t * (q - t));
398+
399+ float signB = sign (B);
400+
389401 float2 new_dX = float2 (
390- sqrt (F * (t + p) / (t * (q + t))) ,
391- sqrt (F * (t - p) / (t * (q + t))) * sign (B)
402+ sqrt_num_plus * inv_sqrt_denom_plus ,
403+ sqrt_num_minus * inv_sqrt_denom_plus * signB
392404 );
393-
405+
394406 float2 new_dY = float2 (
395- sqrt (F * (t - p) / (t * (q - t))) * - sign (B) ,
396- sqrt (F * (t + p) / (t * (q - t)))
407+ sqrt_num_minus * inv_sqrt_denom_minus * -signB ,
408+ sqrt_num_plus * inv_sqrt_denom_minus
397409 );
398-
410+
399411 d_inf_nan = any (nan_or_inf (new_dX) | nan_or_inf (new_dY));
400412 if (!d_inf_nan)
401413 {
402414 dX = new_dX;
403415 dY = new_dY;
416+ length_x = length (dX);
417+ length_y = length (dY);
404418 }
405419 }
406420
407421 // Compute AF values
408- float squared_length_x = dX.x * dX.x + dX.y * dX.y;
409- float squared_length_y = dY.x * dY.x + dY.y * dY.y;
410- float determinant = abs (dX.x * dY.y - dX.y * dY.x);
411- bool is_major_x = squared_length_x > squared_length_y;
412- float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
413- float length_major = sqrt (squared_length_major);
422+ bool is_major_x = length_x > length_y;
423+ float length_major = is_major_x ? length_x : length_y;
424+ float length_minor = is_major_x ? length_y : length_x;
414425
415426 float aniso_ratio;
416427 float length_lod;
417428 float2 aniso_line;
429+
418430 if (length_major <= 1.0f )
419431 {
420432 // A zero length_major would result in NaN Lod and break sampling.
421433 // A small length_major would result in aniso_ratio getting clamped to 1.
422434 // Perform isotropic filtering instead.
423435 aniso_ratio = 1.0f ;
424436 length_lod = length_major;
425- aniso_line = float2 (0 , 0 );
437+ aniso_line = float2 (0.0f , 0.0f );
426438 }
427439 else
428440 {
429- float norm_major = 1.0f / length_major;
430-
431- float2 aniso_line_dir = float2 (
432- (is_major_x ? dX.x : dY.x) * norm_major,
433- (is_major_x ? dX.y : dY.y) * norm_major
434- );
435-
436- aniso_ratio = squared_length_major / determinant;
441+ float2 aniso_line_dir = is_major_x ? dX : dY;
437442
438- // Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
439- if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
440- {
441- // ratio is clamped - Lod is based on ratio (preserves area)
442- aniso_ratio = PS_ANISOTROPIC_FILTERING;
443- length_lod = length_major / PS_ANISOTROPIC_FILTERING;
444- }
445- else
446- {
447- // ratio not clamped - Lod is based on area
448- length_lod = determinant / length_major;
449- }
443+ aniso_ratio = min (length_major / length_minor, PS_ANISOTROPIC_FILTERING);
444+ length_lod = length_major / aniso_ratio;
450445
451446 // clamp to top Lod
452447 if (length_lod < 1.0f )
453448 aniso_ratio = max (1.0f , aniso_ratio * length_lod);
454449
455450 aniso_ratio = round (aniso_ratio);
456- aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
451+
452+ aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
457453 }
458-
454+
459455#if PS_AUTOMATIC_LOD == 1
460456 float lod = log2 (length_lod);
461457#elif PS_MANUAL_LOD == 1
462458 float lod = manual_lod (uv_w);
463459#else
464- float lod = 0 ; // No Lod
460+ float lod = 0.0f ; // No Lod
465461#endif
466-
462+
467463 float4 colour;
468464 if (aniso_ratio == 1.0f )
469465 colour = Texture.SampleLevel (TextureSampler, uv, lod);
470466 else
471467 {
472- float4 num = float4 (0 , 0 , 0 , 0 );
473- for (int i = 0 ; i < aniso_ratio; i++)
474- {
475- float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
468+ float4 num = float4 (0.0f , 0.0f , 0.0f , 0.0f );
469+ float2 segment = (2.0f * aniso_line) / aniso_ratio;
470+
471+ int aniso_ratio_i = (int )aniso_ratio;
472+ for (int i = 0 ; i < aniso_ratio_i; i++)
473+ {
474+ float2 d = -aniso_line + (0.5f + i) * segment;
476475 float2 uv_sample = uv + d;
477476 float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
478477 num += sample_colour;
0 commit comments