@@ -275,132 +275,148 @@ bool2 nan_or_inf(float2 xy)
275275float4 sample_c_af (float2 uv, float uv_w)
276276{
277277 // HW sampler will reject bad UVs, match that here.
278- uv = any (nan_or_inf (uv)) ? float2 (0 , 0 ) : uv;
278+ uv = any (nan_or_inf (uv)) ? float2 (0.0f , 0.0f ) : uv;
279279
280280 // Large floating point values risk NaN/Inf values.
281281 // Above this value floats lose decimal precision, so seems a resonable limit for UVs.
282282 uv = clamp (uv, -8388608.0f , 8388608.0f );
283283
284284 // Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
285+ // And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
285286 // With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
286287 float2 sz;
287288 Texture.GetDimensions (sz.x, sz.y);
288289 float2 dX = ddx (uv) * sz;
289290 float2 dY = ddy (uv) * sz;
290291
291- // Calculate Ellipse Transform
292- bool d_zero = length (dX) == 0 || length (dY) == 0 ;
293- bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0 ;
294- bool d_per = dot (dX, dY) == 0 ;
295- bool d_inf_nan = any (nan_or_inf (dX) | nan_or_inf (dY));
292+ // Check if sample is magnification and if so, skip anisotropic calculations.
293+ float length_x = length (dX);
294+ float length_y = length (dY);
296295
297- if (!(d_zero || d_par || d_per || d_inf_nan))
296+ [branch]
297+ if (max (length_x, length_y) < 0.75f )
298298 {
299- float A = dX.y * dX.y + dY.y * dY.y;
300- float B = -2 * (dX.x * dX.y + dY.x * dY.y);
301- float C = dX.x * dX.x + dY.x * dY.x;
302- float f = (dX.x * dY.y - dY.x * dX.y);
303- float F = f * f;
304-
305- float p = A - C;
306- float q = A + C;
307- float t = sqrt (p * p + B * B);
308-
309- float2 new_dX = float2 (
310- sqrt (F * (t + p) / (t * (q + t))),
311- sqrt (F * (t - p) / (t * (q + t))) * sign (B)
312- );
313-
314- float2 new_dY = float2 (
315- sqrt (F * (t - p) / (t * (q - t))) * -sign (B),
316- sqrt (F * (t + p) / (t * (q - t)))
317- );
318-
319- d_inf_nan = any (nan_or_inf (new_dX) | nan_or_inf (new_dY));
320- if (!d_inf_nan)
321- {
322- dX = new_dX;
323- dY = new_dY;
324- }
325- }
326-
327- // Compute AF values
328- float squared_length_x = dX.x * dX.x + dX.y * dX.y;
329- float squared_length_y = dY.x * dY.x + dY.y * dY.y;
330- float determinant = abs (dX.x * dY.y - dX.y * dY.x);
331- bool is_major_x = squared_length_x > squared_length_y;
332- float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
333- float length_major = sqrt (squared_length_major);
334-
335- float aniso_ratio;
336- float length_lod;
337- float2 aniso_line;
338- if (length_major <= 1.0f )
339- {
340- // A zero length_major would result in NaN Lod and break sampling.
341- // A small length_major would result in aniso_ratio getting clamped to 1.
342- // Perform isotropic filtering instead.
343- aniso_ratio = 1.0f ;
344- length_lod = length_major;
345- aniso_line = float2 (0 , 0 );
299+ #if PS_AUTOMATIC_LOD == 1
300+ return Texture.Sample (TextureSampler, uv);
301+ #else
302+ #if PS_MANUAL_LOD == 1
303+ float lod = manual_lod (uv_w);
304+ #else
305+ float lod = 0.0f ; // No Lod
306+ #endif
307+ return Texture.SampleLevel (TextureSampler, uv, lod);
308+ #endif
346309 }
347310 else
348311 {
349- float norm_major = 1.0f / length_major;
350-
351- float2 aniso_line_dir = float2 (
352- (is_major_x ? dX.x : dY.x) * norm_major,
353- (is_major_x ? dX.y : dY.y) * norm_major
354- );
355-
356- aniso_ratio = squared_length_major / determinant;
312+ // Calculate Ellipse Transform
313+ bool d_zero = length_x < 0.001f || length_y < 0.001f ;
314+ float f = (dX.x * dY.y - dX.y * dY.x);
315+ bool d_par = f < 0.001f ;
316+ bool d_per = dot (dX, dY) < 0.001f ;
317+ bool d_inf_nan = any (nan_or_inf (dX) | nan_or_inf (dY));
318+
319+ if (!(d_zero || d_par || d_per || d_inf_nan))
320+ {
321+ float A = dX.y * dX.y + dY.y * dY.y;
322+ float B = -2 * (dX.x * dX.y + dY.x * dY.y);
323+ float C = dX.x * dX.x + dY.x * dY.x;
324+ float F = f * f;
325+
326+ float p = A - C;
327+ float q = A + C;
328+ float t = sqrt (p * p + B * B);
329+
330+ float sqrt_num_plus = sqrt (F * (t + p));
331+ float sqrt_num_minus = sqrt (F * (t - p));
332+
333+ float inv_sqrt_denom_plus = rsqrt (t * (q + t));
334+ float inv_sqrt_denom_minus = rsqrt (t * (q - t));
335+
336+ float signB = sign (B);
337+
338+ float2 new_dX = float2 (
339+ sqrt_num_plus * inv_sqrt_denom_plus,
340+ sqrt_num_minus * inv_sqrt_denom_plus * signB
341+ );
342+
343+ float2 new_dY = float2 (
344+ sqrt_num_minus * inv_sqrt_denom_minus * -signB,
345+ sqrt_num_plus * inv_sqrt_denom_minus
346+ );
347+
348+ d_inf_nan = any (nan_or_inf (new_dX) | nan_or_inf (new_dY));
349+ if (!d_inf_nan)
350+ {
351+ dX = new_dX;
352+ dY = new_dY;
353+ length_x = length (dX);
354+ length_y = length (dY);
355+ }
356+ }
357357
358- // Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
359- if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
358+ // Compute AF values
359+ bool is_major_x = length_x > length_y;
360+ float length_major = is_major_x ? length_x : length_y;
361+ float length_minor = is_major_x ? length_y : length_x;
362+
363+ float aniso_ratio;
364+ float length_lod;
365+ float2 aniso_line;
366+ if (length_major <= 1.0f )
360367 {
361- // ratio is clamped - Lod is based on ratio (preserves area)
362- aniso_ratio = PS_ANISOTROPIC_FILTERING;
363- length_lod = length_major / PS_ANISOTROPIC_FILTERING;
368+ // A zero length_major would result in NaN Lod and break sampling.
369+ // A small length_major would result in aniso_ratio getting clamped to 1.
370+ // Perform isotropic filtering instead.
371+ aniso_ratio = 1.0f ;
372+ length_lod = length_major;
373+ aniso_line = float2 (0.0f , 0.0f );
364374 }
365375 else
366376 {
367- // ratio not clamped - Lod is based on area
368- length_lod = determinant / length_major;
369- }
377+ float2 aniso_line_dir = is_major_x ? dX : dY;
370378
371- // clamp to top Lod
372- if (length_lod < 1.0f )
373- aniso_ratio = max (1.0f , aniso_ratio * length_lod);
379+ aniso_ratio = min (length_major / length_minor, PS_ANISOTROPIC_FILTERING);
380+ length_lod = length_major / aniso_ratio;
381+
382+ // clamp to top Lod
383+ if (length_lod < 1.0f )
384+ aniso_ratio = max (1.0f , aniso_ratio * length_lod);
385+
386+ aniso_ratio = round (aniso_ratio);
387+
388+ aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
389+ }
374390
375- aniso_ratio = round (aniso_ratio);
376- aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
377- }
378-
379391#if PS_AUTOMATIC_LOD == 1
380- float lod = log2 (length_lod);
392+ float lod = log2 (length_lod);
381393#elif PS_MANUAL_LOD == 1
382- float lod = manual_lod (uv_w);
394+ float lod = manual_lod (uv_w);
383395#else
384- float lod = 0 ; // No Lod
396+ float lod = 0.0f ; // No Lod
385397#endif
386-
387- float4 colour;
388- if (aniso_ratio == 1.0f )
389- colour = Texture.SampleLevel (TextureSampler, uv, lod);
390- else
391- {
392- float4 num = float4 (0 , 0 , 0 , 0 );
393- for (int i = 0 ; i < aniso_ratio; i++)
394- {
395- float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
396- float2 uv_sample = uv + d;
397- float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
398- num += sample_colour;
399- }
400398
401- colour = num / aniso_ratio;
399+ float4 colour;
400+ if (aniso_ratio == 1.0f )
401+ colour = Texture.SampleLevel (TextureSampler, uv, lod);
402+ else
403+ {
404+ float4 num = float4 (0.0f , 0.0f , 0.0f , 0.0f );
405+ float2 segment = (2.0f * aniso_line) / aniso_ratio;
406+
407+ int aniso_ratio_i = (int )aniso_ratio;
408+ for (int i = 0 ; i < aniso_ratio_i; i++)
409+ {
410+ float2 d = -aniso_line + (0.5f + i) * segment;
411+ float2 uv_sample = uv + d;
412+ float4 sample_colour = Texture.SampleLevel (TextureSampler, uv_sample, lod);
413+ num += sample_colour;
414+ }
415+
416+ colour = num / aniso_ratio;
417+ }
418+ return colour;
402419 }
403- return colour;
404420}
405421#endif
406422
0 commit comments