Skip to content

Commit 7ff8b00

Browse files
committed
GS/HW: Tweak AF shader
1 parent 3df128d commit 7ff8b00

5 files changed

Lines changed: 170 additions & 176 deletions

File tree

bin/resources/shaders/dx11/tfx.fx

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -355,124 +355,123 @@ bool2 nan_or_inf(float2 xy)
355355
float4 sample_c_af(float2 uv, float uv_w)
356356
{
357357
// HW sampler will reject bad UVs, match that here.
358-
uv = any(nan_or_inf(uv)) ? float2(0, 0) : uv;
358+
uv = any(nan_or_inf(uv)) ? float2(0.0f, 0.0f) : uv;
359359

360360
// Large floating point values risk NaN/Inf values.
361361
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
362362
uv = clamp(uv, -8388608.0f, 8388608.0f);
363363

364364
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
365+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
365366
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
366367
float2 sz;
367368
Texture.GetDimensions(sz.x, sz.y);
368369
float2 dX = ddx(uv) * sz;
369370
float2 dY = ddy(uv) * sz;
370371

372+
float length_x = length(dX);
373+
float length_y = length(dY);
374+
371375
// Calculate Ellipse Transform
372-
bool d_zero = length(dX) == 0 || length(dY) == 0;
373-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
374-
bool d_per = dot(dX, dY) == 0;
376+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
377+
float f = (dX.x * dY.y - dX.y * dY.x);
378+
bool d_par = f < 0.001f;
379+
bool d_per = dot(dX, dY) < 0.001f;
375380
bool d_inf_nan = any(nan_or_inf(dX) | nan_or_inf(dY));
376381

377382
if (!(d_zero || d_par || d_per || d_inf_nan))
378383
{
379384
float A = dX.y * dX.y + dY.y * dY.y;
380385
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
381386
float C = dX.x * dX.x + dY.x * dY.x;
382-
float f = (dX.x * dY.y - dY.x * dX.y);
383387
float F = f * f;
384388

385389
float p = A - C;
386390
float q = A + C;
387391
float t = sqrt(p * p + B * B);
388392

393+
float sqrt_num_plus = sqrt(F * (t + p));
394+
float sqrt_num_minus = sqrt(F * (t - p));
395+
396+
float inv_sqrt_denom_plus = rsqrt(t * (q + t));
397+
float inv_sqrt_denom_minus = rsqrt(t * (q - t));
398+
399+
float signB = sign(B);
400+
389401
float2 new_dX = float2(
390-
sqrt(F * (t + p) / (t * (q + t))),
391-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
402+
sqrt_num_plus * inv_sqrt_denom_plus,
403+
sqrt_num_minus * inv_sqrt_denom_plus * signB
392404
);
393-
405+
394406
float2 new_dY = float2(
395-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
396-
sqrt(F * (t + p) / (t * (q - t)))
407+
sqrt_num_minus * inv_sqrt_denom_minus * -signB,
408+
sqrt_num_plus * inv_sqrt_denom_minus
397409
);
398-
410+
399411
d_inf_nan = any(nan_or_inf(new_dX) | nan_or_inf(new_dY));
400412
if (!d_inf_nan)
401413
{
402414
dX = new_dX;
403415
dY = new_dY;
416+
length_x = length(dX);
417+
length_y = length(dY);
404418
}
405419
}
406420

407421
// Compute AF values
408-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
409-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
410-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
411-
bool is_major_x = squared_length_x > squared_length_y;
412-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
413-
float length_major = sqrt(squared_length_major);
422+
bool is_major_x = length_x > length_y;
423+
float length_major = is_major_x ? length_x : length_y;
424+
float length_minor = is_major_x ? length_y : length_x;
414425

415426
float aniso_ratio;
416427
float length_lod;
417428
float2 aniso_line;
429+
418430
if (length_major <= 1.0f)
419431
{
420432
// A zero length_major would result in NaN Lod and break sampling.
421433
// A small length_major would result in aniso_ratio getting clamped to 1.
422434
// Perform isotropic filtering instead.
423435
aniso_ratio = 1.0f;
424436
length_lod = length_major;
425-
aniso_line = float2(0, 0);
437+
aniso_line = float2(0.0f, 0.0f);
426438
}
427439
else
428440
{
429-
float norm_major = 1.0f / length_major;
430-
431-
float2 aniso_line_dir = float2(
432-
(is_major_x ? dX.x : dY.x) * norm_major,
433-
(is_major_x ? dX.y : dY.y) * norm_major
434-
);
435-
436-
aniso_ratio = squared_length_major / determinant;
441+
float2 aniso_line_dir = is_major_x ? dX : dY;
437442

438-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
439-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
440-
{
441-
// ratio is clamped - Lod is based on ratio (preserves area)
442-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
443-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
444-
}
445-
else
446-
{
447-
// ratio not clamped - Lod is based on area
448-
length_lod = determinant / length_major;
449-
}
443+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
444+
length_lod = length_major / aniso_ratio;
450445

451446
// clamp to top Lod
452447
if (length_lod < 1.0f)
453448
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
454449

455450
aniso_ratio = round(aniso_ratio);
456-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
451+
452+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
457453
}
458-
454+
459455
#if PS_AUTOMATIC_LOD == 1
460456
float lod = log2(length_lod);
461457
#elif PS_MANUAL_LOD == 1
462458
float lod = manual_lod(uv_w);
463459
#else
464-
float lod = 0; // No Lod
460+
float lod = 0.0f; // No Lod
465461
#endif
466-
462+
467463
float4 colour;
468464
if (aniso_ratio == 1.0f)
469465
colour = Texture.SampleLevel(TextureSampler, uv, lod);
470466
else
471467
{
472-
float4 num = float4(0, 0, 0, 0);
473-
for (int i = 0; i < aniso_ratio; i++)
474-
{
475-
float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
468+
float4 num = float4(0.0f, 0.0f, 0.0f, 0.0f);
469+
float2 segment = (2.0f * aniso_line) / aniso_ratio;
470+
471+
int aniso_ratio_i = (int)aniso_ratio;
472+
for (int i = 0; i < aniso_ratio_i; i++)
473+
{
474+
float2 d = -aniso_line + (0.5f + i) * segment;
476475
float2 uv_sample = uv + d;
477476
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
478477
num += sample_colour;

bin/resources/shaders/opengl/tfx_fs.glsl

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -213,61 +213,74 @@ float manual_lod(float uv_w)
213213
vec4 sample_c_af(vec2 uv, float uv_w)
214214
{
215215
// HW sampler will reject bad UVs, match that here.
216-
uv = (any(isnan(uv)) || any(isinf(uv))) ? vec2(0, 0) : uv;
216+
uv = (any(isnan(uv)) || any(isinf(uv))) ? vec2(0.0f, 0.0f) : uv;
217217

218218
// Large floating point values risk NaN/Inf values.
219219
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
220220
uv = clamp(uv, -8388608.0f, 8388608.0f);
221221

222222
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
223+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
223224
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
224225
vec2 sz = textureSize(TextureSampler, 0);
225226
vec2 dX = dFdx(uv) * sz;
226227
vec2 dY = dFdy(uv) * sz;
227228

229+
float length_x = length(dX);
230+
float length_y = length(dY);
231+
228232
// Calculate Ellipse Transform
229-
bool d_zero = length(dX) == 0 || length(dY) == 0;
230-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
231-
bool d_per = dot(dX, dY) == 0;
233+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
234+
float f = (dX.x * dY.y - dX.y * dY.x);
235+
bool d_par = f < 0.001f;
236+
bool d_per = dot(dX, dY) < 0.001f;
232237
bool d_inf_nan = any(isinf(dX)) || any(isinf(dY)) || any(isnan(dX)) || any(isnan(dY));
233238

234239
if (!(d_zero || d_par || d_per || d_inf_nan))
235240
{
236241
float A = dX.y * dX.y + dY.y * dY.y;
237242
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
238243
float C = dX.x * dX.x + dY.x * dY.x;
239-
float f = (dX.x * dY.y - dY.x * dX.y);
240244
float F = f * f;
241245

242246
float p = A - C;
243247
float q = A + C;
244248
float t = sqrt(p * p + B * B);
245249

250+
float signB = sign(B);
251+
float denom_plus = t * (q + t);
252+
float denom_minus = t * (q - t);
253+
254+
float sqrtA = sqrt(F * (t + p));
255+
float sqrtB = sqrt(F * (t - p));
256+
257+
float inv_sqrt_denom_plus = inversesqrt(denom_plus);
258+
float inv_sqrt_denom_minus = inversesqrt(denom_minus);
259+
246260
vec2 new_dX = vec2(
247-
sqrt(F * (t + p) / (t * (q + t))),
248-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
261+
sqrtA * inv_sqrt_denom_plus,
262+
sqrtB * inv_sqrt_denom_plus * signB
249263
);
250-
264+
251265
vec2 new_dY = vec2(
252-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
253-
sqrt(F * (t + p) / (t * (q - t)))
266+
sqrtB * inv_sqrt_denom_minus * -signB,
267+
sqrtA * inv_sqrt_denom_minus
254268
);
255-
269+
256270
d_inf_nan = any(isinf(new_dX)) || any(isinf(new_dY)) || any(isnan(new_dX)) || any(isnan(new_dY));
257271
if (!d_inf_nan)
258272
{
259273
dX = new_dX;
260274
dY = new_dY;
275+
length_x = length(dX);
276+
length_y = length(dY);
261277
}
262278
}
263279

264280
// Compute AF values
265-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
266-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
267-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
268-
bool is_major_x = squared_length_x > squared_length_y;
269-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
270-
float length_major = sqrt(squared_length_major);
281+
bool is_major_x = length_x > length_y;
282+
float length_major = is_major_x ? length_x : length_y;
283+
float length_minor = is_major_x ? length_y : length_x;
271284

272285
float aniso_ratio;
273286
float length_lod;
@@ -279,57 +292,42 @@ vec4 sample_c_af(vec2 uv, float uv_w)
279292
// Perform isotropic filtering instead.
280293
aniso_ratio = 1.0f;
281294
length_lod = length_major;
282-
aniso_line = vec2(0, 0);
295+
aniso_line = vec2(0.0f, 0.0f);
283296
}
284297
else
285298
{
286-
float norm_major = 1.0f / length_major;
287-
288-
vec2 aniso_line_dir = vec2(
289-
(is_major_x ? dX.x : dY.x) * norm_major,
290-
(is_major_x ? dX.y : dY.y) * norm_major
291-
);
292-
293-
aniso_ratio = squared_length_major / determinant;
299+
vec2 aniso_line_dir = is_major_x ? dX : dY;
294300

295-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
296-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
297-
{
298-
// ratio is clamped - Lod is based on ratio (preserves area)
299-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
300-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
301-
}
302-
else
303-
{
304-
// ratio not clamped - Lod is based on area
305-
length_lod = determinant / length_major;
306-
}
301+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
302+
length_lod = length_major / aniso_ratio;
307303

308304
// clamp to top Lod
309305
if (length_lod < 1.0f)
310306
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
311307

312308
aniso_ratio = round(aniso_ratio);
313-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
309+
310+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
314311
}
315-
312+
316313
#if PS_AUTOMATIC_LOD == 1
317314
float lod = log2(length_lod);
318315
#elif PS_MANUAL_LOD == 1
319316
float lod = manual_lod(uv_w);
320317
#else
321-
float lod = 0; // No Lod
318+
float lod = 0.0f; // No Lod
322319
#endif
323-
320+
324321
vec4 colour;
325322
if (aniso_ratio == 1.0f)
326323
colour = textureLod(TextureSampler, uv, lod);
327324
else
328325
{
329-
vec4 num = vec4(0, 0, 0, 0);
326+
vec4 num = vec4(0.0f, 0.0f, 0.0f, 0.0f);
327+
vec2 segment = (2.0f * aniso_line) / aniso_ratio;
330328
for (int i = 0; i < aniso_ratio; i++)
331-
{
332-
vec2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
329+
{
330+
vec2 d = -aniso_line + (0.5f + i) * segment;
333331
vec2 uv_sample = uv + d;
334332
vec4 sample_colour = textureLod(TextureSampler, uv_sample, lod);
335333
num += sample_colour;

0 commit comments

Comments
 (0)