@@ -361,124 +361,123 @@ bool2 nan_or_inf(float2 xy)
361361float4 sample_c_af (float2 uv, float uv_w)
362362{
363363 // HW sampler will reject bad UVs, match that here.
364- uv = any (nan_or_inf (uv)) ? float2 (0 , 0 ) : uv;
364+ uv = any (nan_or_inf (uv)) ? float2 (0.0f , 0.0f ) : uv;
365365
366366 // Large floating point values risk NaN/Inf values.
367367 // Above this value floats lose decimal precision, so seems a resonable limit for UVs.
368368 uv = clamp (uv, -8388608.0f , 8388608.0f );
369369
370370 // Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
371+ // And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
371372 // With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
372373 float2 sz;
373374 Texture.GetDimensions (sz.x, sz.y);
374375 float2 dX = ddx (uv) * sz;
375376 float2 dY = ddy (uv) * sz;
376377
378+ float length_x = length (dX);
379+ float length_y = length (dY);
380+
377381 // Calculate Ellipse Transform
378- bool d_zero = length (dX) == 0 || length (dY) == 0 ;
379- bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0 ;
380- bool d_per = dot (dX, dY) == 0 ;
382+ bool d_zero = length_x < 0.001f || length_y < 0.001f ;
383+ float f = (dX.x * dY.y - dX.y * dY.x);
384+ bool d_par = f < 0.001f ;
385+ bool d_per = dot (dX, dY) < 0.001f ;
381386 bool d_inf_nan = any (nan_or_inf (dX) | nan_or_inf (dY));
382387
383388 if (!(d_zero || d_par || d_per || d_inf_nan))
384389 {
385390 float A = dX.y * dX.y + dY.y * dY.y;
386391 float B = -2 * (dX.x * dX.y + dY.x * dY.y);
387392 float C = dX.x * dX.x + dY.x * dY.x;
388- float f = (dX.x * dY.y - dY.x * dX.y);
389393 float F = f * f;
390394
391395 float p = A - C;
392396 float q = A + C;
393397 float t = sqrt (p * p + B * B);
394398
399+ float sqrt_num_plus = sqrt (F * (t + p));
400+ float sqrt_num_minus = sqrt (F * (t - p));
401+
402+ float inv_sqrt_denom_plus = rsqrt (t * (q + t));
403+ float inv_sqrt_denom_minus = rsqrt (t * (q - t));
404+
405+ float signB = sign (B);
406+
395407 float2 new_dX = float2 (
396- sqrt (F * (t + p) / (t * (q + t))) ,
397- sqrt (F * (t - p) / (t * (q + t))) * sign (B)
408+ sqrt_num_plus * inv_sqrt_denom_plus ,
409+ sqrt_num_minus * inv_sqrt_denom_plus * signB
398410 );
399-
411+
400412 float2 new_dY = float2 (
401- sqrt (F * (t - p) / (t * (q - t))) * - sign (B) ,
402- sqrt (F * (t + p) / (t * (q - t)))
413+ sqrt_num_minus * inv_sqrt_denom_minus * -signB ,
414+ sqrt_num_plus * inv_sqrt_denom_minus
403415 );
404-
416+
405417 d_inf_nan = any (nan_or_inf (new_dX) | nan_or_inf (new_dY));
406418 if (!d_inf_nan)
407419 {
408420 dX = new_dX;
409421 dY = new_dY;
422+ length_x = length (dX);
423+ length_y = length (dY);
410424 }
411425 }
412426
413427 // Compute AF values
414- float squared_length_x = dX.x * dX.x + dX.y * dX.y;
415- float squared_length_y = dY.x * dY.x + dY.y * dY.y;
416- float determinant = abs (dX.x * dY.y - dX.y * dY.x);
417- bool is_major_x = squared_length_x > squared_length_y;
418- float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
419- float length_major = sqrt (squared_length_major);
428+ bool is_major_x = length_x > length_y;
429+ float length_major = is_major_x ? length_x : length_y;
430+ float length_minor = is_major_x ? length_y : length_x;
420431
421432 float aniso_ratio;
422433 float length_lod;
423434 float2 aniso_line;
435+
424436 if (length_major <= 1.0f )
425437 {
426438 // A zero length_major would result in NaN Lod and break sampling.
427439 // A small length_major would result in aniso_ratio getting clamped to 1.
428440 // Perform isotropic filtering instead.
429441 aniso_ratio = 1.0f ;
430442 length_lod = length_major;
431- aniso_line = float2 (0 , 0 );
443+ aniso_line = float2 (0.0f , 0.0f );
432444 }
433445 else
434446 {
435- float norm_major = 1.0f / length_major;
436-
437- float2 aniso_line_dir = float2 (
438- (is_major_x ? dX.x : dY.x) * norm_major,
439- (is_major_x ? dX.y : dY.y) * norm_major
440- );
441-
442- aniso_ratio = squared_length_major / determinant;
447+ float2 aniso_line_dir = is_major_x ? dX : dY;
443448
444- // Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
445- if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
446- {
447- // ratio is clamped - Lod is based on ratio (preserves area)
448- aniso_ratio = PS_ANISOTROPIC_FILTERING;
449- length_lod = length_major / PS_ANISOTROPIC_FILTERING;
450- }
451- else
452- {
453- // ratio not clamped - Lod is based on area
454- length_lod = determinant / length_major;
455- }
449+ aniso_ratio = min (length_major / length_minor, PS_ANISOTROPIC_FILTERING);
450+ length_lod = length_major / aniso_ratio;
456451
457452 // clamp to top Lod
458453 if (length_lod < 1.0f )
459454 aniso_ratio = max (1.0f , aniso_ratio * length_lod);
460455
461456 aniso_ratio = round (aniso_ratio);
462- aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
457+
458+ aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
463459 }
464-
460+
465461#if PS_AUTOMATIC_LOD == 1
466462 float lod = log2 (length_lod);
467463#elif PS_MANUAL_LOD == 1
468464 float lod = manual_lod (uv_w);
469465#else
470- float lod = 0 ; // No Lod
466+ float lod = 0.0f ; // No Lod
471467#endif
472-
468+
473469 float4 colour;
474470 if (aniso_ratio == 1.0f )
475471 colour = Texture.SampleLevel (TextureSampler, uv, lod);
476472 else
477473 {
478- float4 num = float4 (0 , 0 , 0 , 0 );
479- for (int i = 0 ; i < aniso_ratio; i++)
480- {
481- float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
474+ float4 num = float4 (0.0f , 0.0f , 0.0f , 0.0f );
475+ float2 segment = (2.0f * aniso_line) / aniso_ratio;
476+
477+ int aniso_ratio_i = (int )aniso_ratio;
478+ for (int i = 0 ; i < aniso_ratio_i; i++)
479+ {
480+ float2 d = -aniso_line + (0.5f + i) * segment;
482481 float2 uv_sample = uv + d;
483482 float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
484483 num += sample_colour;
0 commit comments