@@ -2365,6 +2365,7 @@ void GSState::FlushDraw(GSFlushReason reason)
23652365
23662366 m_dirty_gs_regs = 0 ;
23672367 temp_draw_rect = GSVector4i::zero ();
2368+ ResetAutoFlushList ();
23682369 }
23692370
23702371 m_state_flush_reason = GSFlushReason::UNKNOWN;
@@ -2553,6 +2554,11 @@ void GSState::FlushPrim()
25532554 pxAssert ((int )unused < GSUtil::GetVertexCount (PRIM->PRIM ));
25542555 }
25552556
2557+ if (HasAutoFlushList ())
2558+ {
2559+ UpdateAutoFlushList ();
2560+ }
2561+
25562562 // If the PSM format of Z is invalid, but it is masked (no write) and ZTST is set to ALWAYS pass (no test, just allow)
25572563 // we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
25582564#ifdef PCSX2_DEVBUILD
@@ -4745,6 +4751,126 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap(bool save_drawlist)
47454751 return GetPrimitiveOverlapDrawlist (save_drawlist);
47464752}
47474753
4754+ template <u32 primclass, bool fst>
4755+ void GSState::ProcessAutoflushDrawlistImpl (float pos_scale, float tex_scale)
4756+ {
4757+ if (!m_drawlist.empty ())
4758+ {
4759+ // Chop the barrier drawlist to fit within each autoflush draw.
4760+ std::vector<size_t > drawlist;
4761+ drawlist.reserve (m_drawlist.capacity ());
4762+ for (size_t i = 0 , j = 0 ; i < m_autoflush_list.size (); i++)
4763+ {
4764+ int prims = static_cast <int >(m_autoflush_list[i]);
4765+ while (prims > 0 )
4766+ {
4767+ if (m_drawlist[j] > static_cast <size_t >(prims))
4768+ {
4769+ drawlist.push_back (prims);
4770+ m_drawlist[j] -= prims;
4771+ prims = 0 ;
4772+ }
4773+ else
4774+ {
4775+ drawlist.push_back (m_drawlist[j]);
4776+ prims -= m_drawlist[j];
4777+ m_drawlist[j] = 0 ;
4778+ j++;
4779+ }
4780+ }
4781+ }
4782+ m_drawlist = std::move (drawlist);
4783+ }
4784+ else
4785+ {
4786+ // If we don't need barrier, simply copy the autoflush list as the drawlist
4787+ // since it makes handling the cases with/without barriers simpler.
4788+ const size_t n_elems = m_autoflush_list.size ();
4789+ m_drawlist.resize (n_elems);
4790+ std::memcpy (m_drawlist.data (), m_autoflush_list.data (), sizeof (m_autoflush_list[0 ]) * n_elems);
4791+ }
4792+
4793+ constexpr int n = GSUtil::GetClassVertexCount (primclass);
4794+
4795+ const GSVertex* RESTRICT verts = m_vertex->buff ;
4796+ const u16 * RESTRICT index = m_index->buff ;
4797+
4798+ const auto ProcessBBox = [](GSVector4 bbox, float scale) {
4799+ bbox += GSVector4 (-1 .0f , -1 .0f , 1 .0f , 1 .0f ); // Expand 1 native pixel.
4800+ bbox *= scale; // Upscaling
4801+ bbox = bbox.floor ().xyzw (bbox.ceil ()); // Rounding.
4802+ return GSVector4i (bbox);
4803+ };
4804+
4805+ // Compute the texture bboxes.
4806+ for (size_t i = 0 , idx = 0 ; i < m_autoflush_list.size (); i++)
4807+ {
4808+ GSVector4 bbox (FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX);
4809+
4810+ const size_t n_prims = m_autoflush_list[i];
4811+ for (size_t j = 0 ; j < n_prims; j++, idx += n)
4812+ {
4813+ for (size_t k = 0 ; k < n; k++)
4814+ {
4815+ const GSVertex& v = verts[index[idx + k]];
4816+ const float q = (primclass == GS_SPRITE_CLASS) ? verts[index[idx + 1 ]].RGBAQ .Q : v.RGBAQ .Q ;
4817+ GSVector4 tex = GetTexCoordsImpl<fst>(v, q);
4818+ bbox = bbox.min (tex).xyzw (bbox.max (tex));
4819+ }
4820+ }
4821+
4822+ m_autoflush_bbox.push_back (ProcessBBox (bbox, tex_scale));
4823+ }
4824+
4825+ // Recompute the position bboxes if needed.
4826+ if (m_drawlist_bbox.size () > 0 )
4827+ {
4828+ m_drawlist_bbox.clear ();
4829+
4830+ for (size_t i = 0 , idx = 0 ; i < m_drawlist.size (); i++)
4831+ {
4832+ GSVector4i bbox (INT_MAX, INT_MAX, INT_MIN, INT_MIN);
4833+
4834+ const size_t n_prims = m_drawlist[i];
4835+ for (size_t j = 0 ; j < n_prims; j++, idx += n)
4836+ {
4837+ for (size_t k = 0 ; k < n; k++)
4838+ {
4839+ bbox = bbox.runion (GetVertexXY (verts[index[idx + k]]));
4840+ }
4841+ }
4842+
4843+ const GSVector4i xyof = m_context->scissor .xyof .xyxy ();
4844+ GSVector4 bbox_f = GSVector4 (bbox - xyof) / 16 .0f ;
4845+ m_drawlist_bbox.push_back (ProcessBBox (bbox_f, pos_scale));
4846+ }
4847+ }
4848+ }
4849+
4850+ void GSState::ProcessAutoflushDrawlist (float pos_scale, float tex_scale)
4851+ {
4852+ pxAssertRel (PRIM->TME , " Autoflush drawlist only valid with texture mapping." );
4853+
4854+ switch (m_vt.m_primclass )
4855+ {
4856+ case GS_SPRITE_CLASS:
4857+ if (PRIM->FST )
4858+ ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, true >(pos_scale, tex_scale);
4859+ else
4860+ ProcessAutoflushDrawlistImpl<GS_SPRITE_CLASS, false >(pos_scale, tex_scale);
4861+ break ;
4862+ case GS_TRIANGLE_CLASS:
4863+ if (PRIM->FST )
4864+ ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, true >(pos_scale, tex_scale);
4865+ else
4866+ ProcessAutoflushDrawlistImpl<GS_TRIANGLE_CLASS, false >(pos_scale, tex_scale);
4867+ break ;
4868+ default :
4869+ pxFail (" Autoflush drawlist only for triangles/sprites." );
4870+ break ;
4871+ }
4872+ }
4873+
47484874bool GSState::SpriteDrawWithoutGaps ()
47494875{
47504876 // Check that the height matches. Xenosaga 3 draws a letterbox around
@@ -5312,6 +5438,33 @@ void GSState::GetQuadRasterizedPoints(GSVector4& xy, bool keep_order)
53125438 GetQuadRasterizedPoints (xy, tex_ignore, keep_order);
53135439}
53145440
5441+ __forceinline bool GSState::CanUseAutoFlushList () const
5442+ {
5443+ // Can combine if recursive color draw and source/RT are basically the same
5444+ // format (aside from 24/32 bit difference).
5445+ return m_context->TEX0 .TBP0 == m_context->FRAME .Block () &&
5446+ (m_context->TEX0 .PSM & ~1 ) == (m_context->FRAME .PSM & ~1 );
5447+ }
5448+
5449+ __forceinline void GSState::ResetAutoFlushList ()
5450+ {
5451+ m_autoflush_list.clear ();
5452+ m_autoflush_bbox.clear ();
5453+ m_autoflush_tail = 0 ;
5454+ }
5455+
5456+ __forceinline void GSState::UpdateAutoFlushList ()
5457+ {
5458+ if (NumQueuedIndices () > 0 )
5459+ {
5460+ const int n = GSUtil::GetVertexCount (PRIM->PRIM );
5461+ m_autoflush_list.push_back (NumQueuedIndices () / n);
5462+ m_autoflush_tail = m_index->tail ;
5463+ temp_draw_rect = GSVector4i::zero (); // Reset draw rect since it's used for autoflush overlap.
5464+ m_texflush_flag = false ; // Reset TEXFLUSH since this is equivalent to starting a new draw.
5465+ }
5466+ }
5467+
53155468__forceinline bool GSState::IsAutoFlushDraw (u32 prim, int & tex_layer)
53165469{
53175470 if (!PRIM->TME || (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::SpritesOnly && prim != GS_SPRITE))
@@ -5412,9 +5565,21 @@ template<u32 prim>
54125565__forceinline void GSState::HandleAutoFlush ()
54135566{
54145567 // Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe)
5415- if ((m_index-> tail & 1 ) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
5568+ if ((NumQueuedIndices () & 1 ) && (prim == GS_TRIANGLESTRIP || prim == GS_TRIANGLEFAN) && !m_texflush_flag)
54165569 return ;
54175570
5571+ const auto DoFlush = [&]() {
5572+ if (GSConfig.UserHacks_AutoFlush == GSHWAutoFlushLevel::BatchEnabled &&
5573+ GSIsHardwareRenderer () && CanUseAutoFlushList ())
5574+ {
5575+ UpdateAutoFlushList ();
5576+ }
5577+ else
5578+ {
5579+ Flush (GSFlushReason::AUTOFLUSH);
5580+ }
5581+ };
5582+
54185583 // To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves.
54195584 // Because one page of the texture gets buffered in the Texture Cache (the PS2's one) if any of those pixels are overwritten, you still read the old data.
54205585 // So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
@@ -5614,7 +5779,7 @@ __forceinline void GSState::HandleAutoFlush()
56145779 return ;
56155780 else if (m_texflush_flag)
56165781 {
5617- Flush (GSFlushReason::AUTOFLUSH );
5782+ DoFlush ( );
56185783 return ;
56195784 }
56205785
@@ -5634,8 +5799,9 @@ __forceinline void GSState::HandleAutoFlush()
56345799 const GSVector4i scissor = m_context->scissor .in ;
56355800 GSVector4i old_draw_rect = GSVector4i::zero ();
56365801 int current_draw_end = m_index->tail ;
5802+ const int current_draw_start = static_cast <int >(m_autoflush_tail);
56375803
5638- while (current_draw_end >= n)
5804+ while (current_draw_end >= current_draw_start + n)
56395805 {
56405806 for (int i = current_draw_end - 1 ; i >= current_draw_end - n; i--)
56415807 {
@@ -5685,7 +5851,7 @@ __forceinline void GSState::HandleAutoFlush()
56855851 old_draw_rect = tex_rect.rintersect (old_draw_rect);
56865852 if (!old_draw_rect.rintersect (scissor).rempty ())
56875853 {
5688- Flush (GSFlushReason::AUTOFLUSH );
5854+ DoFlush ( );
56895855 return ;
56905856 }
56915857
@@ -5712,10 +5878,10 @@ __forceinline void GSState::HandleAutoFlush()
57125878 area_out = GSVector4i (area_out.x / frame_psm.pgs .x , area_out.y / frame_psm.pgs .y , area_out.z / frame_psm.pgs .x , area_out.w / frame_psm.pgs .y );
57135879
57145880 if (!area_out.rintersect (tex_rect).rempty ())
5715- Flush (GSFlushReason::AUTOFLUSH );
5881+ DoFlush ( );
57165882 }
57175883 else // Formats are too different so just flush it.
5718- Flush (GSFlushReason::AUTOFLUSH );
5884+ DoFlush ( );
57195885 }
57205886 }
57215887 }
@@ -5851,10 +6017,7 @@ __forceinline void GSState::VertexKick(u32 skip)
58516017 return ;
58526018 }
58536019
5854- if (CheckOverlapVerts (n))
5855- Flush (CONTEXTCHANGE);
5856-
5857- if (auto_flush && skip == 0 && m_index->tail > 0 && ((m_vertex->tail + 1 ) - m_vertex->head ) >= n)
6020+ if (auto_flush && skip == 0 && NumQueuedIndices () > 0 && ((m_vertex->tail + 1 ) - m_vertex->head ) >= n)
58586021 {
58596022 HandleAutoFlush<prim>();
58606023 }
@@ -6097,7 +6260,7 @@ __forceinline void GSState::VertexKick(u32 skip)
60976260 // Update rectangle for the current draw. We can use the re-integer coordinates from min/max here.
60986261 const GSVector4i draw_min = pmin.zwzw ();
60996262 const GSVector4i draw_max = pmax;
6100- if (m_index-> tail != n)
6263+ if (NumQueuedIndices () > n)
61016264 temp_draw_rect = temp_draw_rect.min_i32 (draw_min).blend32 <12 >(temp_draw_rect.max_i32 (draw_max));
61026265 else
61036266 temp_draw_rect = draw_min.blend32 <12 >(draw_max);
0 commit comments