Skip to content

Commit 0eb834b

Browse files
committed
GS/HW: Tweak AF shader
1 parent 5369d6b commit 0eb834b

5 files changed

Lines changed: 438 additions & 378 deletions

File tree

bin/resources/shaders/dx11/tfx.fx

Lines changed: 112 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -275,132 +275,148 @@ bool2 nan_or_inf(float2 xy)
275275
float4 sample_c_af(float2 uv, float uv_w)
276276
{
277277
// HW sampler will reject bad UVs, match that here.
278-
uv = any(nan_or_inf(uv)) ? float2(0, 0) : uv;
278+
uv = any(nan_or_inf(uv)) ? float2(0.0f, 0.0f) : uv;
279279

280280
// Large floating point values risk NaN/Inf values.
281281
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
282282
uv = clamp(uv, -8388608.0f, 8388608.0f);
283283

284284
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
285+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
285286
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
286287
float2 sz;
287288
Texture.GetDimensions(sz.x, sz.y);
288289
float2 dX = ddx(uv) * sz;
289290
float2 dY = ddy(uv) * sz;
290291

291-
// Calculate Ellipse Transform
292-
bool d_zero = length(dX) == 0 || length(dY) == 0;
293-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
294-
bool d_per = dot(dX, dY) == 0;
295-
bool d_inf_nan = any(nan_or_inf(dX) | nan_or_inf(dY));
292+
// Check if sample is magnification and if so, skip anisotropic calculations.
293+
float length_x = length(dX);
294+
float length_y = length(dY);
296295

297-
if (!(d_zero || d_par || d_per || d_inf_nan))
296+
[branch]
297+
if (max(length_x, length_y) < 0.75f)
298298
{
299-
float A = dX.y * dX.y + dY.y * dY.y;
300-
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
301-
float C = dX.x * dX.x + dY.x * dY.x;
302-
float f = (dX.x * dY.y - dY.x * dX.y);
303-
float F = f * f;
304-
305-
float p = A - C;
306-
float q = A + C;
307-
float t = sqrt(p * p + B * B);
308-
309-
float2 new_dX = float2(
310-
sqrt(F * (t + p) / (t * (q + t))),
311-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
312-
);
313-
314-
float2 new_dY = float2(
315-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
316-
sqrt(F * (t + p) / (t * (q - t)))
317-
);
318-
319-
d_inf_nan = any(nan_or_inf(new_dX) | nan_or_inf(new_dY));
320-
if (!d_inf_nan)
321-
{
322-
dX = new_dX;
323-
dY = new_dY;
324-
}
325-
}
326-
327-
// Compute AF values
328-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
329-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
330-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
331-
bool is_major_x = squared_length_x > squared_length_y;
332-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
333-
float length_major = sqrt(squared_length_major);
334-
335-
float aniso_ratio;
336-
float length_lod;
337-
float2 aniso_line;
338-
if (length_major <= 1.0f)
339-
{
340-
// A zero length_major would result in NaN Lod and break sampling.
341-
// A small length_major would result in aniso_ratio getting clamped to 1.
342-
// Perform isotropic filtering instead.
343-
aniso_ratio = 1.0f;
344-
length_lod = length_major;
345-
aniso_line = float2(0, 0);
299+
#if PS_AUTOMATIC_LOD == 1
300+
return Texture.Sample(TextureSampler, uv);
301+
#else
302+
#if PS_MANUAL_LOD == 1
303+
float lod = manual_lod(uv_w);
304+
#else
305+
float lod = 0.0f; // No Lod
306+
#endif
307+
return Texture.SampleLevel(TextureSampler, uv, lod);
308+
#endif
346309
}
347310
else
348311
{
349-
float norm_major = 1.0f / length_major;
350-
351-
float2 aniso_line_dir = float2(
352-
(is_major_x ? dX.x : dY.x) * norm_major,
353-
(is_major_x ? dX.y : dY.y) * norm_major
354-
);
355-
356-
aniso_ratio = squared_length_major / determinant;
312+
// Calculate Ellipse Transform
313+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
314+
float f = (dX.x * dY.y - dX.y * dY.x);
315+
bool d_par = f < 0.001f;
316+
bool d_per = dot(dX, dY) < 0.001f;
317+
bool d_inf_nan = any(nan_or_inf(dX) | nan_or_inf(dY));
318+
319+
if (!(d_zero || d_par || d_per || d_inf_nan))
320+
{
321+
float A = dX.y * dX.y + dY.y * dY.y;
322+
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
323+
float C = dX.x * dX.x + dY.x * dY.x;
324+
float F = f * f;
325+
326+
float p = A - C;
327+
float q = A + C;
328+
float t = sqrt(p * p + B * B);
329+
330+
float sqrt_num_plus = sqrt(F * (t + p));
331+
float sqrt_num_minus = sqrt(F * (t - p));
332+
333+
float inv_sqrt_denom_plus = rsqrt(t * (q + t));
334+
float inv_sqrt_denom_minus = rsqrt(t * (q - t));
335+
336+
float signB = sign(B);
337+
338+
float2 new_dX = float2(
339+
sqrt_num_plus * inv_sqrt_denom_plus,
340+
sqrt_num_minus * inv_sqrt_denom_plus * signB
341+
);
342+
343+
float2 new_dY = float2(
344+
sqrt_num_minus * inv_sqrt_denom_minus * -signB,
345+
sqrt_num_plus * inv_sqrt_denom_minus
346+
);
347+
348+
d_inf_nan = any(nan_or_inf(new_dX) | nan_or_inf(new_dY));
349+
if (!d_inf_nan)
350+
{
351+
dX = new_dX;
352+
dY = new_dY;
353+
length_x = length(dX);
354+
length_y = length(dY);
355+
}
356+
}
357357

358-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
359-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
358+
// Compute AF values
359+
bool is_major_x = length_x > length_y;
360+
float length_major = is_major_x ? length_x : length_y;
361+
float length_minor = is_major_x ? length_y : length_x;
362+
363+
float aniso_ratio;
364+
float length_lod;
365+
float2 aniso_line;
366+
if (length_major <= 1.0f)
360367
{
361-
// ratio is clamped - Lod is based on ratio (preserves area)
362-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
363-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
368+
// A zero length_major would result in NaN Lod and break sampling.
369+
// A small length_major would result in aniso_ratio getting clamped to 1.
370+
// Perform isotropic filtering instead.
371+
aniso_ratio = 1.0f;
372+
length_lod = length_major;
373+
aniso_line = float2(0.0f, 0.0f);
364374
}
365375
else
366376
{
367-
// ratio not clamped - Lod is based on area
368-
length_lod = determinant / length_major;
369-
}
377+
float2 aniso_line_dir = is_major_x ? dX : dY;
370378

371-
// clamp to top Lod
372-
if (length_lod < 1.0f)
373-
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
379+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
380+
length_lod = length_major / aniso_ratio;
381+
382+
// clamp to top Lod
383+
if (length_lod < 1.0f)
384+
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
385+
386+
aniso_ratio = round(aniso_ratio);
387+
388+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
389+
}
374390

375-
aniso_ratio = round(aniso_ratio);
376-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
377-
}
378-
379391
#if PS_AUTOMATIC_LOD == 1
380-
float lod = log2(length_lod);
392+
float lod = log2(length_lod);
381393
#elif PS_MANUAL_LOD == 1
382-
float lod = manual_lod(uv_w);
394+
float lod = manual_lod(uv_w);
383395
#else
384-
float lod = 0; // No Lod
396+
float lod = 0.0f; // No Lod
385397
#endif
386-
387-
float4 colour;
388-
if (aniso_ratio == 1.0f)
389-
colour = Texture.SampleLevel(TextureSampler, uv, lod);
390-
else
391-
{
392-
float4 num = float4(0, 0, 0, 0);
393-
for (int i = 0; i < aniso_ratio; i++)
394-
{
395-
float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
396-
float2 uv_sample = uv + d;
397-
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
398-
num += sample_colour;
399-
}
400398

401-
colour = num / aniso_ratio;
399+
float4 colour;
400+
if (aniso_ratio == 1.0f)
401+
colour = Texture.SampleLevel(TextureSampler, uv, lod);
402+
else
403+
{
404+
float4 num = float4(0.0f, 0.0f, 0.0f, 0.0f);
405+
float2 segment = (2.0f * aniso_line) / aniso_ratio;
406+
407+
int aniso_ratio_i = (int)aniso_ratio;
408+
for (int i = 0; i < aniso_ratio_i; i++)
409+
{
410+
float2 d = -aniso_line + (0.5f + i) * segment;
411+
float2 uv_sample = uv + d;
412+
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
413+
num += sample_colour;
414+
}
415+
416+
colour = num / aniso_ratio;
417+
}
418+
return colour;
402419
}
403-
return colour;
404420
}
405421
#endif
406422

0 commit comments

Comments
 (0)