Skip to content

Commit fa1a3df

Browse files
committed
GS/HW: Tweak AF shader
1 parent 0783ec1 commit fa1a3df

5 files changed

Lines changed: 517 additions & 379 deletions

File tree

bin/resources/shaders/dx11/tfx.fx

Lines changed: 132 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ float manual_lod(float uv_w)
264264
float4 sample_c_af(float2 uv, float uv_w)
265265
{
266266
// HW sampler will reject bad UVs, match that here.
267-
uv = any(isnan(uv) | isinf(uv)) ? float2(0, 0) : uv;
267+
uv = any(isnan(uv) | isinf(uv)) ? float2(0.0f, 0.0f) : uv;
268268

269269
// Large floating point values risk NaN/Inf values.
270270
// Above this value floats lose decimal precision, so seems a resonable limit for UVs.
@@ -277,119 +277,155 @@ float4 sample_c_af(float2 uv, float uv_w)
277277
float2 dX = ddx(uv) * sz;
278278
float2 dY = ddy(uv) * sz;
279279

280-
// Calculate Ellipse Transform
281-
bool d_zero = length(dX) == 0 || length(dY) == 0;
282-
bool d_par = (dX.x * dY.y - dY.x * dX.y) == 0;
283-
bool d_per = dot(dX, dY) == 0;
284-
bool d_inf_nan = any(isinf(dX) | isinf(dY) | isnan(dX) | isnan(dY));
280+
// Check if sample is magnification and if so, skip anisotropic calculations.
281+
// Avoid performing square root by checking squared length.
282+
float squared_length_x = dot(dX, dX);
283+
float squared_length_y = dot(dY, dY);
285284

286-
if (!(d_zero || d_par || d_per || d_inf_nan))
285+
[branch]
286+
if (max(squared_length_x, squared_length_y) < (0.75f * 0.75f))
287287
{
288-
float A = dX.y * dX.y + dY.y * dY.y;
289-
float B = -2 * (dX.x * dX.y + dY.x * dY.y);
290-
float C = dX.x * dX.x + dY.x * dY.x;
291-
float f = (dX.x * dY.y - dY.x * dX.y);
292-
float F = f * f;
293-
294-
float p = A - C;
295-
float q = A + C;
296-
float t = sqrt(p * p + B * B);
297-
298-
float2 new_dX = float2(
299-
sqrt(F * (t + p) / (t * (q + t))),
300-
sqrt(F * (t - p) / (t * (q + t))) * sign(B)
301-
);
302-
303-
float2 new_dY = float2(
304-
sqrt(F * (t - p) / (t * (q - t))) * -sign(B),
305-
sqrt(F * (t + p) / (t * (q - t)))
306-
);
307-
308-
d_inf_nan = any(isinf(new_dX) | isinf(new_dY) | isnan(new_dX) | isnan(new_dY));
309-
if (!d_inf_nan)
310-
{
311-
dX = new_dX;
312-
dY = new_dY;
313-
}
314-
}
315-
316-
// Compute AF values
317-
float squared_length_x = dX.x * dX.x + dX.y * dX.y;
318-
float squared_length_y = dY.x * dY.x + dY.y * dY.y;
319-
float determinant = abs(dX.x * dY.y - dX.y * dY.x);
320-
bool is_major_x = squared_length_x > squared_length_y;
321-
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
322-
float length_major = sqrt(squared_length_major);
323-
324-
float aniso_ratio;
325-
float length_lod;
326-
float2 aniso_line;
327-
if (length_major <= 1.0f)
328-
{
329-
// A zero length_major would result in NaN Lod and break sampling.
330-
// A small length_major would result in aniso_ratio getting clamped to 1.
331-
// Perform isotropic filtering instead.
332-
aniso_ratio = 1.0f;
333-
length_lod = length_major;
334-
aniso_line = float2(0, 0);
288+
#if PS_AUTOMATIC_LOD == 1
289+
return Texture.Sample(TextureSampler, uv);
290+
#else
291+
#if PS_MANUAL_LOD == 1
292+
float lod = manual_lod(uv_w);
293+
#else
294+
float lod = 0.0f; // No Lod
295+
#endif
296+
return Texture.SampleLevel(TextureSampler, uv, lod);
297+
#endif
335298
}
336299
else
337300
{
338-
float norm_major = 1.0f / length_major;
339-
340-
float2 aniso_line_dir = float2(
341-
(is_major_x ? dX.x : dY.x) * norm_major,
342-
(is_major_x ? dX.y : dY.y) * norm_major
343-
);
344-
345-
aniso_ratio = squared_length_major / determinant;
301+
// Calculate Ellipse Transform
302+
bool d_zero = squared_length_x < 0.001f || squared_length_y < 0.001f;
303+
float f = (dX.x * dY.y - dX.y * dY.x);
304+
bool d_par = f == 0.0f < 0.001f;
305+
bool d_per = dot(dX, dY) == 0.0f < 0.001f;
306+
bool d_inf_nan = any(isinf(dX) | isinf(dY) | isnan(dX) | isnan(dY));
307+
308+
if (!(d_zero || d_par || d_per || d_inf_nan))
309+
{
310+
float2 dXYy = float2(dX.y, dY.y);
311+
float2 dXYx = float2(dX.x, dY.x);
312+
float A = dot(dXYy, dXYy);
313+
float B = -2.0f * dot(dXYx, dXYy);
314+
float C = dot(dXYx, dXYx);
315+
float F = f * f;
316+
317+
float p = A - C;
318+
float q = A + C;
319+
float t = sqrt(p * p + B * B);
320+
321+
float sqrt_num_plus = sqrt(F * (t + p));
322+
float sqrt_num_minus = sqrt(F * (t - p));
323+
324+
float inv_sqrt_denom_plus = rsqrt(t * (q + t));
325+
float inv_sqrt_denom_minus = rsqrt(t * (q - t));
326+
327+
float signB = sign(B);
328+
329+
float2 new_dX = float2(
330+
sqrt_num_plus * inv_sqrt_denom_plus,
331+
sqrt_num_minus * inv_sqrt_denom_plus * signB
332+
);
333+
334+
float2 new_dY = float2(
335+
sqrt_num_minus * inv_sqrt_denom_minus * -signB,
336+
sqrt_num_plus * inv_sqrt_denom_minus
337+
);
338+
339+
d_inf_nan = any(isinf(new_dX) | isinf(new_dY) | isnan(new_dX) | isnan(new_dY));
340+
if (!d_inf_nan)
341+
{
342+
dX = new_dX;
343+
dY = new_dY;
344+
squared_length_x = dot(dX, dX);
345+
squared_length_y = dot(dY, dY);
346+
f = dX.x * dY.y - dX.y * dY.x;
347+
}
348+
}
346349

347-
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
348-
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
350+
// Compute AF values
351+
float determinant = abs(f);
352+
bool is_major_x = squared_length_x > squared_length_y;
353+
float squared_length_major = is_major_x ? squared_length_x : squared_length_y;
354+
float length_major = sqrt(squared_length_major);
355+
356+
float aniso_ratio;
357+
float length_lod;
358+
float2 aniso_line;
359+
if (length_major <= 1.0f)
349360
{
350-
// ratio is clamped - Lod is based on ratio (preserves area)
351-
aniso_ratio = PS_ANISOTROPIC_FILTERING;
352-
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
361+
// A zero length_major would result in NaN Lod and break sampling.
362+
// A small length_major would result in aniso_ratio getting clamped to 1.
363+
// Perform isotropic filtering instead.
364+
aniso_ratio = 1.0f;
365+
length_lod = length_major;
366+
aniso_line = float2(0.0f, 0.0f);
353367
}
354368
else
355369
{
356-
// ratio not clamped - Lod is based on area
357-
length_lod = determinant / length_major;
358-
}
370+
float norm_major = 1.0f / length_major;
359371

360-
// clamp to top Lod
361-
if (length_lod < 1.0f)
362-
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
372+
float2 aniso_line_dir = float2(
373+
(is_major_x ? dX.x : dY.x) * norm_major,
374+
(is_major_x ? dX.y : dY.y) * norm_major
375+
);
376+
377+
aniso_ratio = squared_length_major / determinant;
378+
379+
// Calculate the minor length of the ellipse for Lod, while also clamping the ratio of anisotropy.
380+
if (aniso_ratio > PS_ANISOTROPIC_FILTERING)
381+
{
382+
// ratio is clamped - Lod is based on ratio (preserves area)
383+
aniso_ratio = PS_ANISOTROPIC_FILTERING;
384+
length_lod = length_major / PS_ANISOTROPIC_FILTERING;
385+
}
386+
else
387+
{
388+
// ratio not clamped - Lod is based on area
389+
length_lod = determinant / length_major;
390+
}
391+
392+
// clamp to top Lod
393+
if (length_lod < 1.0f)
394+
aniso_ratio = max(1.0f, aniso_ratio * length_lod);
395+
396+
aniso_ratio = round(aniso_ratio);
397+
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
398+
}
363399

364-
aniso_ratio = round(aniso_ratio);
365-
aniso_line = aniso_line_dir * 0.5f * length_major * (1.0f / sz);
366-
}
367-
368400
#if PS_AUTOMATIC_LOD == 1
369-
float lod = log2(length_lod);
401+
float lod = log2(length_lod);
370402
#elif PS_MANUAL_LOD == 1
371-
float lod = manual_lod(uv_w);
403+
float lod = manual_lod(uv_w);
372404
#else
373-
float lod = 0; // No Lod
405+
float lod = 0.0f; // No Lod
374406
#endif
375-
376-
float4 colour;
377-
if (aniso_ratio == 1.0f)
378-
colour = Texture.SampleLevel(TextureSampler, uv, lod);
379-
else
380-
{
381-
float4 num = float4(0, 0, 0, 0);
382-
for (int i = 0; i < aniso_ratio; i++)
383-
{
384-
float2 d = -aniso_line + (0.5f + i) * (2.0f * aniso_line) / aniso_ratio;
385-
float2 uv_sample = uv + d;
386-
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
387-
num += sample_colour;
388-
}
389407

390-
colour = num / aniso_ratio;
408+
float4 colour;
409+
if (aniso_ratio == 1.0f)
410+
colour = Texture.SampleLevel(TextureSampler, uv, lod);
411+
else
412+
{
413+
float4 num = float4(0.0f, 0.0f, 0.0f, 0.0f);
414+
float2 segment = (2.0f * aniso_line) / aniso_ratio;
415+
416+
int aniso_ratio_i = (int)aniso_ratio;
417+
for (int i = 0; i < aniso_ratio_i; i++)
418+
{
419+
float2 d = -aniso_line + (0.5f + i) * segment;
420+
float2 uv_sample = uv + d;
421+
float4 sample_colour = Texture.SampleLevel(TextureSampler, uv_sample, lod);
422+
num += sample_colour;
423+
}
424+
425+
colour = num / aniso_ratio;
426+
}
427+
return colour;
391428
}
392-
return colour;
393429
}
394430
#endif
395431

0 commit comments

Comments
 (0)