Skip to content

Commit f3d5069

Browse files
committed
iOS: Port the optimized software anisotropic filter to the Vulkan shader
Add the PCSX2 PR PCSX2#14465 sample_c_af anisotropic-filter function to the Vulkan tfx.glsl and wire it into sample_c, gated on PS_ANISOTROPIC_FILTERING > 1. This fixes a silent no-op on the Vulkan backend: GSRendererHW already sets PS_ANISOTROPIC_FILTERING from GSConfig.MaxAnisotropy for triangles and hardware sampler anisotropy is disabled, so the shader path is the active AF implementation — but the iOS Vulkan shader had no handler for the macro, so anisotropic filtering did nothing. With sample_c_af in place the setting takes effect, using the optimized early-isotropic- fallback / cheaper-math form from PR PCSX2#14465. Bumps SHADER_CACHE_VERSION 103 -> 104 so the Vulkan disk cache invalidates and the new shader compiles. Metal is unaffected (it loads a build-time metallib with no disk shader cache); the Metal AF path remains a future item since upstream never produced a validated MSL transcription of this shader.
1 parent cd703cf commit f3d5069

2 files changed

Lines changed: 140 additions & 2 deletions

File tree

app/src/main/assets/resources/shaders/vulkan/tfx.glsl

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,142 @@ layout(set = 1, binding = 3) uniform texture2D PrimMinTexture;
365365

366366
#if NEEDS_TEX
367367

368+
// Software anisotropic filter (PCSX2 PR #14465 — early isotropic fallback, cheaper
369+
// math). Enabled when PS_ANISOTROPIC_FILTERING > 1; the renderer sets that from
370+
// GSConfig.MaxAnisotropy for triangles (see GSRendererHW), and hardware sampler
371+
// anisotropy is disabled, so this is the active AF path. AF always forces
372+
// automatic_lod, so the PS_MANUAL_LOD branch below is never compiled when AF is on.
373+
#if PS_ANISOTROPIC_FILTERING > 1
374+
vec4 sample_c_af(vec2 uv, float uv_w)
375+
{
376+
// HW sampler will reject bad UVs, match that here.
377+
uv = (any(isnan(uv)) || any(isinf(uv))) ? vec2(0.0f, 0.0f) : uv;
378+
379+
// Large floating point values risk NaN/Inf values.
380+
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
381+
uv = clamp(uv, -8388608.0f, 8388608.0f);
382+
383+
// Below taken from https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#7.18.11%20LOD%20Calculations
384+
// And https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_filter_anisotropic.txt
385+
// With guidance from https://pema.dev/2025/05/09/mipmaps-too-much-detail/
386+
vec2 sz = textureSize(Texture, 0);
387+
vec2 dX = dFdx(uv) * sz;
388+
vec2 dY = dFdy(uv) * sz;
389+
390+
float length_x = length(dX);
391+
float length_y = length(dY);
392+
393+
// Calculate Ellipse Transform
394+
bool d_zero = length_x < 0.001f || length_y < 0.001f;
395+
float f = (dX.x * dY.y - dX.y * dX.x);
396+
bool d_par = f < 0.001f;
397+
bool d_per = dot(dX, dY) < 0.001f;
398+
bool d_inf_nan = any(isinf(dX)) || any(isinf(dY)) || any(isnan(dX)) || any(isnan(dY));
399+
400+
if (!(d_zero || d_par || d_per || d_inf_nan))
401+
{
402+
float A = dX.y * dX.y + dY.y * dY.y;
403+
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
404+
float C = dX.x * dX.x + dY.x * dY.x;
405+
float f = (dX.x * dY.y - dY.x * dX.y);
406+
float F = f * f;
407+
408+
float p = A - C;
409+
float q = A + C;
410+
float t = sqrt(p * p + B * B);
411+
412+
float signB = sign(B);
413+
float denom_plus = t * (q + t);
414+
float denom_minus = t * (q - t);
415+
416+
float sqrtA = sqrt(F * (t + p));
417+
float sqrtB = sqrt(F * (t - p));
418+
419+
float inv_sqrt_denom_plus = inversesqrt(denom_plus);
420+
float inv_sqrt_denom_minus = inversesqrt(denom_minus);
421+
422+
vec2 new_dX = vec2(
423+
sqrtA * inv_sqrt_denom_plus,
424+
sqrtB * inv_sqrt_denom_plus * signB
425+
);
426+
427+
vec2 new_dY = vec2(
428+
sqrtB * inv_sqrt_denom_minus * -signB,
429+
sqrtA * inv_sqrt_denom_minus
430+
);
431+
432+
d_inf_nan = any(isinf(new_dX)) || any(isinf(new_dY)) || any(isnan(new_dX)) || any(isnan(new_dY));
433+
if (!d_inf_nan)
434+
{
435+
dX = new_dX;
436+
dY = new_dY;
437+
length_x = length(dX);
438+
length_y = length(dY);
439+
}
440+
}
441+
442+
// Compute AF values
443+
bool is_major_x = length_x > length_y;
444+
float length_major = is_major_x ? length_x : length_y;
445+
float length_minor = is_major_x ? length_y : length_x;
446+
447+
float aniso_ratio;
448+
float length_lod;
449+
vec2 aniso_line;
450+
if (length_major <= 1.0f)
451+
{
452+
// A zero length_major would result in NaN Lod and break sampling.
453+
// A small length_major would result in aniso_ratio getting clamped to 1.
454+
// Perform isotropic filtering instead.
455+
aniso_ratio = 1.0f;
456+
length_lod = length_major;
457+
aniso_line = vec2(0.0f, 0.0f);
458+
}
459+
else
460+
{
461+
vec2 aniso_line_dir = is_major_x ? dX : dY;
462+
463+
aniso_ratio = min(length_major / length_minor, PS_ANISOTROPIC_FILTERING);
464+
length_lod = length_major / aniso_ratio;
465+
466+
// clamp to top Lod
467+
if (length_lod < 1.0f)
468+
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
469+
470+
aniso_ratio = round(aniso_ratio);
471+
472+
aniso_line = aniso_line_dir * 0.5f * (1.0f / sz);
473+
}
474+
475+
#if PS_AUTOMATIC_LOD == 1
476+
float lod = log2(length_lod);
477+
#elif PS_MANUAL_LOD == 1
478+
float lod = manual_lod(uv_w);
479+
#else
480+
float lod = 0.0f; // No Lod
481+
#endif
482+
483+
vec4 colour;
484+
if (aniso_ratio == 1.0f)
485+
colour = textureLod(Texture, uv, lod);
486+
else
487+
{
488+
vec4 num = vec4(0.0f, 0.0f, 0.0f, 0.0f);
489+
vec2 segment = (2.0f * aniso_line) / aniso_ratio;
490+
for (int i = 0; i < aniso_ratio; i++)
491+
{
492+
vec2 d = -aniso_line + (0.5f + i) * segment;
493+
vec2 uv_sample = uv + d;
494+
vec4 sample_colour = textureLod(Texture, uv_sample, lod);
495+
num += sample_colour;
496+
}
497+
498+
colour = num / aniso_ratio;
499+
}
500+
return colour;
501+
}
502+
#endif
503+
368504
vec4 sample_c(vec2 uv)
369505
{
370506
#if PS_TEX_IS_FB
@@ -388,7 +524,9 @@ vec4 sample_c(vec2 uv)
388524
#endif
389525
#endif
390526

391-
#if PS_AUTOMATIC_LOD == 1
527+
#if PS_ANISOTROPIC_FILTERING > 1
528+
return sample_c_af(uv, vsIn.t.w);
529+
#elif PS_AUTOMATIC_LOD == 1
392530
return texture(Texture, uv);
393531
#elif PS_MANUAL_LOD == 1
394532
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))

app/src/main/cpp/pcsx2/ShaderCacheVersion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
/// Version number for GS and other shaders. Increment whenever any of the contents of the
55
/// shaders change, to invalidate the cache.
6-
static constexpr u32 SHADER_CACHE_VERSION = 103; // Last changed in PR 14602
6+
static constexpr u32 SHADER_CACHE_VERSION = 104; // AF shader tweak (port of PCSX2 PR 14465)

0 commit comments

Comments
 (0)