Skip to content

Commit 2c704fe

Browse files
committed
GS/HW: Tweak AF shader
1 parent 48ca476 commit 2c704fe

5 files changed

Lines changed: 170 additions & 176 deletions

File tree

bin/resources/shaders/dx11/tfx.fx

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -361,124 +361,123 @@ bool2 nan_or_inf(float2 xy)
361361
float4 sample_c_af(float2 uv, float uv_w)
362362
{
363363
// HW sampler will reject bad UVs, match that here.
364-
uv = any(nan_or_inf(uv)) ? float2(0, 0) : uv;
364+
uv = any(nan_or_inf(uv)) ? float2(0.0f, 0.0f) : uv;
365365

366366
// Large floating point values risk NaN/Inf values.
367367
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
368368
uv = clamp(uv, -8388608.0f, 8388608.0f);
369369

370370
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
371+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
371372
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
372373
float2 sz;
373374
Texture.GetDimensions(sz.x, sz.y);
374375
float2 dX = ddx(uv) * sz;
375376
float2 dY = ddy(uv) * sz;
376377

378+
float length_x = length(dX);
379+
float length_y = length(dY);
380+
377381
// Calculate Ellipse Transform
378-
bool d_zero = length(dX) == 0 || length(dY) == 0;
379-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
380-
bool d_per = dot(dX, dY) == 0;
382+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
383+
float f = (dX.x * dY.y - dX.y * dY.x);
384+
bool d_par = f < 0.001f;
385+
bool d_per = dot(dX, dY) < 0.001f;
381386
bool d_inf_nan = any(nan_or_inf(dX) | nan_or_inf(dY));
382387

383388
if (!(d_zero || d_par || d_per || d_inf_nan))
384389
{
385390
float A = dX.y * dX.y + dY.y * dY.y;
386391
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
387392
float C = dX.x * dX.x + dY.x * dY.x;
388-
float f = (dX.x * dY.y - dY.x * dX.y);
389393
float F = f * f;
390394

391395
float p = A - C;
392396
float q = A + C;
393397
float t = sqrt(p * p + B * B);
394398

399+
float sqrt_num_plus = sqrt(F * (t + p));
400+
float sqrt_num_minus = sqrt(F * (t - p));
401+
402+
float inv_sqrt_denom_plus = rsqrt(t * (q + t));
403+
float inv_sqrt_denom_minus = rsqrt(t * (q - t));
404+
405+
float signB = sign(B);
406+
395407
float2 new_dX = float2(
396-
sqrt(F * (t + p) / (t * (q + t))),
397-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
408+
sqrt_num_plus * inv_sqrt_denom_plus,
409+
sqrt_num_minus * inv_sqrt_denom_plus * signB
398410
);
399-
411+
400412
float2 new_dY = float2(
401-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
402-
sqrt(F * (t + p) / (t * (q - t)))
413+
sqrt_num_minus * inv_sqrt_denom_minus * -signB,
414+
sqrt_num_plus * inv_sqrt_denom_minus
403415
);
404-
416+
405417
d_inf_nan = any(nan_or_inf(new_dX) | nan_or_inf(new_dY));
406418
if (!d_inf_nan)
407419
{
408420
dX = new_dX;
409421
dY = new_dY;
422+
length_x = length(dX);
423+
length_y = length(dY);
410424
}
411425
}
412426

413427
// Compute AF values
414-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
415-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
416-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
417-
bool is_major_x = squared_length_x > squared_length_y;
418-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
419-
float length_major = sqrt(squared_length_major);
428+
bool is_major_x = length_x > length_y;
429+
float length_major = is_major_x ? length_x : length_y;
430+
float length_minor = is_major_x ? length_y : length_x;
420431

421432
float aniso_ratio;
422433
float length_lod;
423434
float2 aniso_line;
435+
424436
if (length_major <= 1.0f)
425437
{
426438
// A zero length_major would result in NaN Lod and break sampling.
427439
// A small length_major would result in aniso_ratio getting clamped to 1.
428440
// Perform isotropic filtering instead.
429441
aniso_ratio = 1.0f;
430442
length_lod = length_major;
431-
aniso_line = float2(0, 0);
443+
aniso_line = float2(0.0f, 0.0f);
432444
}
433445
else
434446
{
435-
float norm_major = 1.0f / length_major;
436-
437-
float2 aniso_line_dir = float2(
438-
(is_major_x ? dX.x : dY.x) * norm_major,
439-
(is_major_x ? dX.y : dY.y) * norm_major
440-
);
441-
442-
aniso_ratio = squared_length_major / determinant;
447+
float2 aniso_line_dir = is_major_x ? dX : dY;
443448

444-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
445-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
446-
{
447-
// ratio is clamped - Lod is based on ratio (preserves area)
448-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
449-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
450-
}
451-
else
452-
{
453-
// ratio not clamped - Lod is based on area
454-
length_lod = determinant / length_major;
455-
}
449+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
450+
length_lod = length_major / aniso_ratio;
456451

457452
// clamp to top Lod
458453
if (length_lod < 1.0f)
459454
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
460455

461456
aniso_ratio = round(aniso_ratio);
462-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
457+
458+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
463459
}
464-
460+
465461
#if PS_AUTOMATIC_LOD == 1
466462
float lod = log2(length_lod);
467463
#elif PS_MANUAL_LOD == 1
468464
float lod = manual_lod(uv_w);
469465
#else
470-
float lod = 0; // No Lod
466+
float lod = 0.0f; // No Lod
471467
#endif
472-
468+
473469
float4 colour;
474470
if (aniso_ratio == 1.0f)
475471
colour = Texture.SampleLevel(TextureSampler, uv, lod);
476472
else
477473
{
478-
float4 num = float4(0, 0, 0, 0);
479-
for (int i = 0; i < aniso_ratio; i++)
480-
{
481-
float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
474+
float4 num = float4(0.0f, 0.0f, 0.0f, 0.0f);
475+
float2 segment = (2.0f * aniso_line) / aniso_ratio;
476+
477+
int aniso_ratio_i = (int)aniso_ratio;
478+
for (int i = 0; i < aniso_ratio_i; i++)
479+
{
480+
float2 d = -aniso_line + (0.5f + i) * segment;
482481
float2 uv_sample = uv + d;
483482
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
484483
num += sample_colour;

bin/resources/shaders/opengl/tfx_fs.glsl

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -220,61 +220,74 @@ float manual_lod(float uv_w)
220220
vec4 sample_c_af(vec2 uv, float uv_w)
221221
{
222222
// HW sampler will reject bad UVs, match that here.
223-
uv = (any(isnan(uv)) || any(isinf(uv))) ? vec2(0, 0) : uv;
223+
uv = (any(isnan(uv)) || any(isinf(uv))) ? vec2(0.0f, 0.0f) : uv;
224224

225225
// Large floating point values risk NaN/Inf values.
226226
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
227227
uv = clamp(uv, -8388608.0f, 8388608.0f);
228228

229229
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
230+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
230231
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
231232
vec2 sz = textureSize(TextureSampler, 0);
232233
vec2 dX = dFdx(uv) * sz;
233234
vec2 dY = dFdy(uv) * sz;
234235

236+
float length_x = length(dX);
237+
float length_y = length(dY);
238+
235239
// Calculate Ellipse Transform
236-
bool d_zero = length(dX) == 0 || length(dY) == 0;
237-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
238-
bool d_per = dot(dX, dY) == 0;
240+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
241+
float f = (dX.x * dY.y - dX.y * dY.x);
242+
bool d_par = f < 0.001f;
243+
bool d_per = dot(dX, dY) < 0.001f;
239244
bool d_inf_nan = any(isinf(dX)) || any(isinf(dY)) || any(isnan(dX)) || any(isnan(dY));
240245

241246
if (!(d_zero || d_par || d_per || d_inf_nan))
242247
{
243248
float A = dX.y * dX.y + dY.y * dY.y;
244249
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
245250
float C = dX.x * dX.x + dY.x * dY.x;
246-
float f = (dX.x * dY.y - dY.x * dX.y);
247251
float F = f * f;
248252

249253
float p = A - C;
250254
float q = A + C;
251255
float t = sqrt(p * p + B * B);
252256

257+
float signB = sign(B);
258+
float denom_plus = t * (q + t);
259+
float denom_minus = t * (q - t);
260+
261+
float sqrtA = sqrt(F * (t + p));
262+
float sqrtB = sqrt(F * (t - p));
263+
264+
float inv_sqrt_denom_plus = inversesqrt(denom_plus);
265+
float inv_sqrt_denom_minus = inversesqrt(denom_minus);
266+
253267
vec2 new_dX = vec2(
254-
sqrt(F * (t + p) / (t * (q + t))),
255-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
268+
sqrtA * inv_sqrt_denom_plus,
269+
sqrtB * inv_sqrt_denom_plus * signB
256270
);
257-
271+
258272
vec2 new_dY = vec2(
259-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
260-
sqrt(F * (t + p) / (t * (q - t)))
273+
sqrtB * inv_sqrt_denom_minus * -signB,
274+
sqrtA * inv_sqrt_denom_minus
261275
);
262-
276+
263277
d_inf_nan = any(isinf(new_dX)) || any(isinf(new_dY)) || any(isnan(new_dX)) || any(isnan(new_dY));
264278
if (!d_inf_nan)
265279
{
266280
dX = new_dX;
267281
dY = new_dY;
282+
length_x = length(dX);
283+
length_y = length(dY);
268284
}
269285
}
270286

271287
// Compute AF values
272-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
273-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
274-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
275-
bool is_major_x = squared_length_x > squared_length_y;
276-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
277-
float length_major = sqrt(squared_length_major);
288+
bool is_major_x = length_x > length_y;
289+
float length_major = is_major_x ? length_x : length_y;
290+
float length_minor = is_major_x ? length_y : length_x;
278291

279292
float aniso_ratio;
280293
float length_lod;
@@ -286,57 +299,42 @@ vec4 sample_c_af(vec2 uv, float uv_w)
286299
// Perform isotropic filtering instead.
287300
aniso_ratio = 1.0f;
288301
length_lod = length_major;
289-
aniso_line = vec2(0, 0);
302+
aniso_line = vec2(0.0f, 0.0f);
290303
}
291304
else
292305
{
293-
float norm_major = 1.0f / length_major;
294-
295-
vec2 aniso_line_dir = vec2(
296-
(is_major_x ? dX.x : dY.x) * norm_major,
297-
(is_major_x ? dX.y : dY.y) * norm_major
298-
);
299-
300-
aniso_ratio = squared_length_major / determinant;
306+
vec2 aniso_line_dir = is_major_x ? dX : dY;
301307

302-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
303-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
304-
{
305-
// ratio is clamped - Lod is based on ratio (preserves area)
306-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
307-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
308-
}
309-
else
310-
{
311-
// ratio not clamped - Lod is based on area
312-
length_lod = determinant / length_major;
313-
}
308+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
309+
length_lod = length_major / aniso_ratio;
314310

315311
// clamp to top Lod
316312
if (length_lod < 1.0f)
317313
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
318314

319315
aniso_ratio = round(aniso_ratio);
320-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
316+
317+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
321318
}
322-
319+
323320
#if PS_AUTOMATIC_LOD == 1
324321
float lod = log2(length_lod);
325322
#elif PS_MANUAL_LOD == 1
326323
float lod = manual_lod(uv_w);
327324
#else
328-
float lod = 0; // No Lod
325+
float lod = 0.0f; // No Lod
329326
#endif
330-
327+
331328
vec4 colour;
332329
if (aniso_ratio == 1.0f)
333330
colour = textureLod(TextureSampler, uv, lod);
334331
else
335332
{
336-
vec4 num = vec4(0, 0, 0, 0);
333+
vec4 num = vec4(0.0f, 0.0f, 0.0f, 0.0f);
334+
vec2 segment = (2.0f * aniso_line) / aniso_ratio;
337335
for (int i = 0; i < aniso_ratio; i++)
338-
{
339-
vec2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
336+
{
337+
vec2 d = -aniso_line + (0.5f + i) * segment;
340338
vec2 uv_sample = uv + d;
341339
vec4 sample_colour = textureLod(TextureSampler, uv_sample, lod);
342340
num += sample_colour;

0 commit comments

Comments
 (0)