Skip to content

Commit 1a8b4ae

Browse files
committed
GS/HW: Add setting to do autoflush draws with a copy loop.
Only applies to certain autoflush draws.
1 parent 83da1b6 commit 1a8b4ae

14 files changed

Lines changed: 678 additions & 48 deletions

File tree

pcsx2-qt/Settings/GraphicsHardwareFixesSettingsTab.ui

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@
267267
<string>Enabled (All Primitives)</string>
268268
</property>
269269
</item>
270+
<item>
271+
<property name="text">
272+
<string>Batch Enabled (All Primitives)</string>
273+
</property>
274+
</item>
270275
</widget>
271276
</item>
272277
<item row="7" column="0" colspan="2">

pcsx2/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ enum class GSHWAutoFlushLevel : u8
417417
Disabled,
418418
SpritesOnly,
419419
Enabled,
420+
BatchEnabled,
420421
};
421422

422423
enum class GSGPUTargetCLUTMode : u8

pcsx2/GS/GSState.cpp

Lines changed: 174 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2365,6 +2365,7 @@ void GSState::FlushDraw(GSFlushReason reason)
23652365

23662366
m_dirty_gs_regs = 0;
23672367
temp_draw_rect = GSVector4i::zero();
2368+
ResetAutoFlushList();
23682369
}
23692370

23702371
m_state_flush_reason = GSFlushReason::UNKNOWN;
@@ -2553,6 +2554,11 @@ void GSState::FlushPrim()
25532554
pxAssert((int)unused < GSUtil::GetVertexCount(PRIM->PRIM));
25542555
}
25552556

2557+
if (HasAutoFlushList())
2558+
{
2559+
UpdateAutoFlushList();
2560+
}
2561+
25562562
// If the PSM format of Z is invalid, but it is masked (no write) and ZTST is set to ALWAYS pass (no test, just allow)
25572563
// we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
25582564
#ifdef PCSX2_DEVBUILD
@@ -4745,6 +4751,126 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap(bool save_drawlist)
47454751
return GetPrimitiveOverlapDrawlist(save_drawlist);
47464752
}
47474753

4754+
template<u32 primclass, bool fst>
4755+
void GSState::ProcessAutoflushDrawlistImpl(float pos_scale, float tex_scale)
4756+
{
4757+
if (!m_drawlist.empty())
4758+
{
4759+
// Chop the barrier drawlist to fit within each autoflush draw.
4760+
std::vector<size_t> drawlist;
4761+
drawlist.reserve(m_drawlist.capacity());
4762+
for (size_t i = 0, j = 0; i < m_autoflush_list.size(); i++)
4763+
{
4764+
int prims = static_cast<int>(m_autoflush_list[i]);
4765+
while (prims > 0)
4766+
{
4767+
if (m_drawlist[j] > static_cast<size_t>(prims))
4768+
{
4769+
drawlist.push_back(prims);
4770+
m_drawlist[j] -= prims;
4771+
prims = 0;
4772+
}
4773+
else
4774+
{
4775+
drawlist.push_back(m_drawlist[j]);
4776+
prims -= m_drawlist[j];
4777+
m_drawlist[j] = 0;
4778+
j++;
4779+
}
4780+
}
4781+
}
4782+
m_drawlist = std::move(drawlist);
4783+
}
4784+
else
4785+
{
4786+
// If we don't need barrier, simply copy the autoflush list as the drawlist
4787+
// since it makes handling the cases with/without barriers simpler.
4788+
const size_t n_elems = m_autoflush_list.size();
4789+
m_drawlist.resize(n_elems);
4790+
std::memcpy(m_drawlist.data(), m_autoflush_list.data(), sizeof(m_autoflush_list[0]) * n_elems);
4791+
}
4792+
4793+
constexpr int n = GSUtil::GetClassVertexCount(primclass);
4794+
4795+
const GSVertex* RESTRICT verts = m_vertex->buff;
4796+
const u16* RESTRICT index = m_index->buff;
4797+
4798+
const auto ProcessBBox = [](GSVector4 bbox, float scale) {
4799+
bbox += GSVector4(-1.0f, -1.0f, 1.0f, 1.0f); // Expand 1 native pixel.
4800+
bbox *= scale; // Upscaling
4801+
bbox = bbox.floor().xyzw(bbox.ceil()); // Rounding.
4802+
return GSVector4i(bbox);
4803+
};
4804+
4805+
// Compute the texture bboxes.
4806+
for (size_t i = 0, idx = 0; i < m_autoflush_list.size(); i++)
4807+
{
4808+
GSVector4 bbox(FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX);
4809+
4810+
const size_t n_prims = m_autoflush_list[i];
4811+
for (size_t j = 0; j < n_prims; j++, idx += n)
4812+
{
4813+
for (size_t k = 0; k < n; k++)
4814+
{
4815+
const GSVertex& v = verts[index[idx + k]];
4816+
const float q = (primclass == GS_SPRITE_CLASS) ? verts[index[idx + 1]].RGBAQ.Q : v.RGBAQ.Q;
4817+
GSVector4 tex = GetTexCoordsImpl<fst>(v, q);
4818+
bbox = bbox.min(tex).xyzw(bbox.max(tex));
4819+
}
4820+
}
4821+
4822+
m_autoflush_bbox.push_back(ProcessBBox(bbox, tex_scale));
4823+
}
4824+
4825+
// Recompute the position bboxes if needed.
4826+
if (m_drawlist_bbox.size() > 0)
4827+
{
4828+
m_drawlist_bbox.clear();
4829+
4830+
for (size_t i = 0, idx = 0; i < m_drawlist.size(); i++)
4831+
{
4832+
GSVector4i bbox(INT_MAX, INT_MAX, INT_MIN, INT_MIN);
4833+
4834+
const size_t n_prims = m_drawlist[i];
4835+
for (size_t j = 0; j < n_prims; j++, idx += n)
4836+
{
4837+
for (size_t k = 0; k < n; k++)
4838+
{
4839+
bbox = bbox.runion(GetVertexXY(verts[index[idx + k]]));
4840+
}
4841+
}
4842+
4843+
const GSVector4i xyof = m_context->scissor.xyof.xyxy();
4844+
GSVector4 bbox_f = GSVector4(bbox - xyof) / 16.0f;
4845+
m_drawlist_bbox.push_back(ProcessBBox(bbox_f, pos_scale));
4846+
}
4847+
}
4848+
}
4849+
4850+
void GSState::ProcessAutoflushDrawlist(float pos_scale, float tex_scale)
4851+
{
4852+
pxAssertRel(PRIM->TME, "Autoflush drawlist only valid with texture mapping.");
4853+
4854+
switch (m_vt.m_primclass)
4855+
{
4856+
case GS_SPRITE_CLASS:
4857+
if (PRIM->FST)
4858+
ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, true>(pos_scale, tex_scale);
4859+
else
4860+
ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, false>(pos_scale, tex_scale);
4861+
break;
4862+
case GS_TRIANGLE_CLASS:
4863+
if (PRIM->FST)
4864+
ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, true>(pos_scale, tex_scale);
4865+
else
4866+
ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, false>(pos_scale, tex_scale);
4867+
break;
4868+
default:
4869+
pxFail("Autoflush drawlist only for triangles/sprites.");
4870+
break;
4871+
}
4872+
}
4873+
47484874
bool GSState::SpriteDrawWithoutGaps()
47494875
{
47504876
// Check that the height matches. Xenosaga 3 draws a letterbox around
@@ -5312,6 +5438,33 @@ void GSState::GetQuadRasterizedPoints(GSVector4& xy, bool keep_order)
53125438
GetQuadRasterizedPoints(xy, tex_ignore, keep_order);
53135439
}
53145440

5441+
__forceinline bool GSState::CanUseAutoFlushList() const
5442+
{
5443+
// Can combine if recursive color draw and source/RT are basically the same
5444+
// format (aside from 24/32 bit difference).
5445+
return m_context->TEX0.TBP0 == m_context->FRAME.Block() &&
5446+
(m_context->TEX0.PSM & ~1) == (m_context->FRAME.PSM & ~1);
5447+
}
5448+
5449+
__forceinline void GSState::ResetAutoFlushList()
5450+
{
5451+
m_autoflush_list.clear();
5452+
m_autoflush_bbox.clear();
5453+
m_autoflush_tail = 0;
5454+
}
5455+
5456+
__forceinline void GSState::UpdateAutoFlushList()
5457+
{
5458+
if (NumQueuedIndices() > 0)
5459+
{
5460+
const int n = GSUtil::GetVertexCount(PRIM->PRIM);
5461+
m_autoflush_list.push_back(NumQueuedIndices() / n);
5462+
m_autoflush_tail = m_index->tail;
5463+
temp_draw_rect = GSVector4i::zero(); // Reset draw rect since it's used for autoflush overlap.
5464+
m_texflush_flag = false; // Reset TEXFLUSH since this is equivalent to starting a new draw.
5465+
}
5466+
}
5467+
53155468
__forceinline bool GSState::IsAutoFlushDraw(u32 prim, int& tex_layer)
53165469
{
53175470
if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -5412,9 +5565,21 @@ template<u32 prim>
54125565
__forceinline void GSState::HandleAutoFlush()
54135566
{
54145567
// Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe)
5415-
if ((m_index->tail & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
5568+
if ((NumQueuedIndices() & 1) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
54165569
return;
54175570

5571+
const auto DoFlush = [&]() {
5572+
if (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::BatchEnabled &&
5573+
GSIsHardwareRenderer() && CanUseAutoFlushList())
5574+
{
5575+
UpdateAutoFlushList();
5576+
}
5577+
else
5578+
{
5579+
Flush(GSFlushReason::AUTOFLUSH);
5580+
}
5581+
};
5582+
54185583
// To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves.
54195584
// Because one page of the texture gets buffered in the Texture Cache (the PS2's one) if any of those pixels are overwritten, you still read the old data.
54205585
// So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
@@ -5614,7 +5779,7 @@ __forceinline void GSState::HandleAutoFlush()
56145779
return;
56155780
else if (m_texflush_flag)
56165781
{
5617-
Flush(GSFlushReason::AUTOFLUSH);
5782+
DoFlush();
56185783
return;
56195784
}
56205785

@@ -5634,8 +5799,9 @@ __forceinline void GSState::HandleAutoFlush()
56345799
const GSVector4i scissor = m_context->scissor.in;
56355800
GSVector4i old_draw_rect = GSVector4i::zero();
56365801
int current_draw_end = m_index->tail;
5802+
const int current_draw_start = static_cast<int>(m_autoflush_tail);
56375803

5638-
while (current_draw_end >= n)
5804+
while (current_draw_end >= current_draw_start + n)
56395805
{
56405806
for (int i = current_draw_end - 1; i >= current_draw_end - n; i--)
56415807
{
@@ -5685,7 +5851,7 @@ __forceinline void GSState::HandleAutoFlush()
56855851
old_draw_rect = tex_rect.rintersect(old_draw_rect);
56865852
if (!old_draw_rect.rintersect(scissor).rempty())
56875853
{
5688-
Flush(GSFlushReason::AUTOFLUSH);
5854+
DoFlush();
56895855
return;
56905856
}
56915857

@@ -5712,10 +5878,10 @@ __forceinline void GSState::HandleAutoFlush()
57125878
area_out = GSVector4i(area_out.x / frame_psm.pgs.x, area_out.y / frame_psm.pgs.y, area_out.z / frame_psm.pgs.x, area_out.w / frame_psm.pgs.y);
57135879

57145880
if (!area_out.rintersect(tex_rect).rempty())
5715-
Flush(GSFlushReason::AUTOFLUSH);
5881+
DoFlush();
57165882
}
57175883
else // Formats are too different so just flush it.
5718-
Flush(GSFlushReason::AUTOFLUSH);
5884+
DoFlush();
57195885
}
57205886
}
57215887
}
@@ -5851,10 +6017,7 @@ __forceinline void GSState::VertexKick(u32 skip)
58516017
return;
58526018
}
58536019

5854-
if (CheckOverlapVerts(n))
5855-
Flush(CONTEXTCHANGE);
5856-
5857-
if (auto_flush && skip == 0 && m_index->tail > 0 && ((m_vertex->tail + 1) - m_vertex->head) >= n)
6020+
if (auto_flush && skip == 0 && NumQueuedIndices() > 0 && ((m_vertex->tail + 1) - m_vertex->head) >= n)
58586021
{
58596022
HandleAutoFlush<prim>();
58606023
}
@@ -6097,7 +6260,7 @@ __forceinline void GSState::VertexKick(u32 skip)
60976260
// Update rectangle for the current draw. We can use the re-integer coordinates from min/max here.
60986261
const GSVector4i draw_min = pmin.zwzw();
60996262
const GSVector4i draw_max = pmax;
6100-
if (m_index->tail != n)
6263+
if (NumQueuedIndices() > n)
61016264
temp_draw_rect = temp_draw_rect.min_i32(draw_min).blend32<12>(temp_draw_rect.max_i32(draw_max));
61026265
else
61036266
temp_draw_rect = draw_min.blend32<12>(draw_max);

pcsx2/GS/GSState.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@ class GSState : public GSAlignedClass<32>
162162
GSIndexBuff* m_index;
163163

164164
GSVertexBuff m_draw_vertex = {};
165+
166+
u32 m_autoflush_tail = 0;
167+
168+
__forceinline u32 NumQueuedIndices() const
169+
{
170+
return m_index->tail - m_autoflush_tail;
171+
}
165172

166173
struct
167174
{
@@ -186,6 +193,10 @@ class GSState : public GSAlignedClass<32>
186193
void UpdateVertexKick();
187194

188195
void GrowVertexBuffer();
196+
bool CanUseAutoFlushList() const;
197+
__forceinline bool HasAutoFlushList() const { return m_autoflush_tail > 0; }
198+
void ResetAutoFlushList();
199+
void UpdateAutoFlushList();
189200
bool IsAutoFlushDraw(u32 prim, int& tex_layer);
190201
template<u32 prim> void HandleAutoFlush();
191202
bool EarlyDetectShuffle(u32 prim);
@@ -395,6 +406,9 @@ class GSState : public GSAlignedClass<32>
395406
std::vector<size_t> m_drawlist;
396407
std::vector<GSVector4i> m_drawlist_bbox;
397408

409+
std::vector<size_t> m_autoflush_list;
410+
std::vector<GSVector4i> m_autoflush_bbox;
411+
398412
struct GSPCRTCRegs
399413
{
400414
struct PCRTCDisplay
@@ -537,6 +551,9 @@ class GSState : public GSAlignedClass<32>
537551
PRIM_OVERLAP GetPrimitiveOverlapDrawlistImpl(bool save_drawlist = false, bool save_bbox = false, float bbox_scale = 1.0f);
538552
PRIM_OVERLAP GetPrimitiveOverlapDrawlist(bool save_drawlist = false, bool save_bbox = false, float bbox_scale = 1.0f);
539553
PRIM_OVERLAP PrimitiveOverlap(bool save_drawlist = false);
554+
template<u32 primclass, bool fst>
555+
void ProcessAutoflushDrawlistImpl(float pos_scale, float tex_scale);
556+
void ProcessAutoflushDrawlist(float pos_scale, float tex_scale);
540557
bool SpriteDrawWithoutGaps();
541558
void CalculatePrimitiveCoversWithoutGaps();
542559
GIFRegTEX0 GetTex0Layer(u32 lod);

pcsx2/GS/Renderers/Common/GSDevice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,9 @@ struct alignas(16) GSHWDrawConfig
822822
u32 indices_per_prim; ///< Number of indices that make up one primitive
823823
const std::vector<size_t>* drawlist; ///< For reducing barriers on sprites
824824
const std::vector<GSVector4i>* drawlist_bbox; ///< For RT copy when barriers not available.
825+
const std::vector<size_t>* autoflush_list; ///< For batched autoflush drawing.
826+
const std::vector<GSVector4i>* autoflush_bbox; ///< For batched autoflush drawing.
827+
bool autoflush; ///< Do a batched autoflush draw.
825828
GSVector4i scissor; ///< Scissor rect
826829
GSVector4i drawarea; ///< Area in the framebuffer which will be modified.
827830
Topology topology; ///< Draw topology

0 commit comments

Comments
 (0)