Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
385 changes: 385 additions & 0 deletions src/core/gte-instructions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,385 @@
/***************************************************************************
* Copyright (C) 2026 PCSX-Redux authors *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *
***************************************************************************/

// GTE instruction implementations and public dispatch methods.
//
// Each instruction is implemented as a template parameterized on sf (shift
// factor) and lm (limit mode). The public methods decode these bits from
// the instruction encoding and dispatch to the right instantiation.
//
// MVMVA is further templatized on mx, v, and cv for full compile-time
// elimination of the matrix/vector selection branches.

#include "core/gte.h"
#include "core/gte-internal.h"
#include "core/pgxp_debug.h"
#include "core/pgxp_gte.h"

using namespace PCSX::GTEImpl;

// ============================================================================
// Template instruction implementations
// ============================================================================

// RTPS core: perspective transform for vertex v.
// When last=true, computes the depth queue interpolation at the end.
template <bool sf, bool lm, int v>
static void rtps(bool last) {
mac1() = A1<sf>(int44(trX() << 12) +
r11() * vertexX<v>() + r12() * vertexY<v>() + r13() * vertexZ<v>());
mac2() = A2<sf>(int44(trY() << 12) +
r21() * vertexX<v>() + r22() * vertexY<v>() + r23() * vertexZ<v>());
int64_t rawMac3;
mac3() = A3<sf>(int44(trZ() << 12) +
r31() * vertexX<v>() + r32() * vertexY<v>() + r33() * vertexZ<v>(), rawMac3);

ir1() = limB1<lm>(mac1());
ir2() = limB2<lm>(mac2());
ir3() = limB3sf<sf, lm>(rawMac3);

pushZ(limD<true>(rawMac3));

int32_t hOverSz3 = gteDivide(gteH(), sz3());

sxy0() = sxy1();
sxy1() = sxy2();

double widescreenFactor = PCSX::g_emulator->config().Widescreen ? 0.75 : 1.0;
// ir1()*hOverSz3 can exceed int32_t (hOverSz3 is up to 0x1FFFF), so widen ir first
sx2() = limG1(F(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor) >> 16);
sy2() = limG2(F(gteOFY() + (int64_t)ir2() * hOverSz3) >> 16);

PGXP_pushSXYZ2s(limG1ia(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor),
limG2ia(gteOFY() + (int64_t)ir2() * hOverSz3),
std::max((int)sz3(), gteH() / 2), sxy2());
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if (last) {
int64_t rawMac0;
mac0() = F(gteDQB() + gteDQA() * hOverSz3, rawMac0);
ir0() = limH(rawMac0);
Comment on lines +73 to +75
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's examine the specific lines mentioned in the review
cat -n src/core/gte-instructions.cc | sed -n '70,80p'

Repository: grumpycoders/pcsx-redux

Length of output: 430


🏁 Script executed:

# Check lines 357-368
cat -n src/core/gte-instructions.cc | sed -n '355,370p'

Repository: grumpycoders/pcsx-redux

Length of output: 663


🏁 Script executed:

# Let's also check lines around 272-276 for NCLIP
cat -n src/core/gte-instructions.cc | sed -n '270,280p'

Repository: grumpycoders/pcsx-redux

Length of output: 569


🏁 Script executed:

# Search for the function definitions to understand their return types
rg -A 2 "int64_t|int32_t|uint32_t" src/core/gte-instructions.cc | head -100

Repository: grumpycoders/pcsx-redux

Length of output: 3279


🏁 Script executed:

# Search for PGXP_NLCIP_valid or similar
rg "PGXP_N" src/core/

Repository: grumpycoders/pcsx-redux

Length of output: 466


Widen the MAC0 math before passing it to F().

Line 74 and Lines 360-367 still do the multiply/accumulate in 32-bit first. gteDQA() * hOverSz3 and the ZSF * SZn sums can overflow before F(..., rawMac0) sees them, which gives you wrapped MAC0, wrong MAC0 flags, and wrong OTZ on large-depth cases. This contradicts the existing pattern in the same file (lines 65-66) where hOverSz3 multiplications are already widened to int64_t with an explicit comment noting the overflow risk.

Suggested fix
     if (last) {
         int64_t rawMac0;
-        mac0() = F(gteDQB() + gteDQA() * hOverSz3, rawMac0);
+        mac0() = F(gteDQB() + (int64_t)gteDQA() * hOverSz3, rawMac0);
         ir0() = limH(rawMac0);
     }
 }
@@
 void PCSX::GTE::AVSZ3(uint32_t code) {
     gteFlag() = 0;
     int64_t rawMac0;
-    mac0() = F(gteZSF3() * sz1() + gteZSF3() * sz2() + gteZSF3() * sz3(), rawMac0);
+    mac0() = F((int64_t)gteZSF3() * sz1() + (int64_t)gteZSF3() * sz2() +
+                   (int64_t)gteZSF3() * sz3(),
+               rawMac0);
     otz() = limD<true>(rawMac0);
 }
@@
 void PCSX::GTE::AVSZ4(uint32_t code) {
     gteFlag() = 0;
     int64_t rawMac0;
-    mac0() = F(gteZSF4() * sz0() + gteZSF4() * sz1() + gteZSF4() * sz2() + gteZSF4() * sz3(), rawMac0);
+    mac0() = F((int64_t)gteZSF4() * sz0() + (int64_t)gteZSF4() * sz1() +
+                   (int64_t)gteZSF4() * sz2() + (int64_t)gteZSF4() * sz3(),
+               rawMac0);
     otz() = limD<true>(rawMac0);
 }

Also applies to: 357-368

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/core/gte-instructions.cc` around lines 73 - 75, The multiply/accumulate
for MAC0 is currently done in 32-bit and can overflow before F() is called;
change the computation so the product and sum are performed in 64-bit (e.g., use
an int64_t accumulator) before calling F(), then pass that widened value to F()
and to limH(), updating the mac0() assignment site that uses gteDQB() + gteDQA()
* hOverSz3 as well as the similar ZSF * SZn summation sites around the block
handling OTZ (the other occurrence in the same function). Concretely,
create/retain rawMac0 as an int64_t, cast gteDQA(), hOverSz3, ZSF, SZn, etc. to
int64_t before multiplication/addition, compute the full 64-bit sum, then call
F(<widened sum>, rawMac0) and pass rawMac0 to limH().

}
}

// OP: outer product using rotation matrix diagonal
template <bool sf, bool lm>
void PCSX::GTE::op(uint32_t op) {
gteFlag() = 0;
mac1() = A1<sf>(r22() * ir3() - r33() * ir2());
mac2() = A2<sf>(r33() * ir1() - r11() * ir3());
mac3() = A3<sf>(r11() * ir2() - r22() * ir1());
ir1() = limB1<lm>(mac1());
ir2() = limB2<lm>(mac2());
ir3() = limB3<lm>(mac3());
}

Check warning on line 89 in src/core/gte-instructions.cc

View check run for this annotation

CodeScene Delta Analysis / CodeScene Code Health Review (main)

❌ New issue: Code Duplication

The module contains 11 functions with similar structure: PCSX::GTE::NCCS,PCSX::GTE::NCCT,PCSX::GTE::NCDS,PCSX::GTE::NCDT and 7 more functions. Avoid duplicated, aka copy-pasted, code inside the module. More duplication lowers the code health.

template <bool sf, bool lm>
void PCSX::GTE::dpcs(uint32_t op) {
gteFlag() = 0;
depthCue<sf, lm>(rgbR() << 16, rgbG() << 16, rgbB() << 16);
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::intpl(uint32_t op) {
gteFlag() = 0;
depthCue<sf, lm>(ir1() << 12, ir2() << 12, ir3() << 12);
pushColor();
}

// MVMVA: fully templatized wrapper for dispatch table
template <bool sf, bool lm, int mx, int v, int cv>
static void mvmvaImpl() {
gteFlag() = 0;
matrixVectorMultiply<sf, lm, mx, v, cv>();
}

// NCDS core: used by NCDS (v=0) and NCDT (v=0,1,2)
template <bool sf, bool lm, int v>
static void ncdsCore() {
lightTransform<sf, lm, v>();
colorMatrix<sf, lm>();
depthCueColor<sf, lm>();
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::cdp(uint32_t op) {
gteFlag() = 0;
colorMatrix<sf, lm>();
depthCueColor<sf, lm>();
pushColor();
}

// NCCS core: used by NCCS (v=0) and NCCT (v=0,1,2)
template <bool sf, bool lm, int v>
static void nccsCore() {
lightTransform<sf, lm, v>();
colorMatrix<sf, lm>();
colorApply<sf, lm>();
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::cc(uint32_t op) {
gteFlag() = 0;
colorMatrix<sf, lm>();
colorApply<sf, lm>();
pushColor();
}

// NCS core: used by NCS (v=0) and NCT (v=0,1,2)
template <bool sf, bool lm, int v>
static void ncsCore() {
lightTransform<sf, lm, v>();
colorMatrix<sf, lm>();
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::sqr(uint32_t op) {
gteFlag() = 0;
mac1() = A1<sf>(ir1() * ir1());
mac2() = A2<sf>(ir2() * ir2());
mac3() = A3<sf>(ir3() * ir3());
ir1() = limB1<lm>(mac1());
ir2() = limB2<lm>(mac2());
ir3() = limB3<lm>(mac3());
}

template <bool sf, bool lm>
void PCSX::GTE::dcpl(uint32_t op) {
gteFlag() = 0;
depthCueColor<sf, lm>();
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::dpct(uint32_t op) {
gteFlag() = 0;
for (int v = 0; v < 3; v++) {
depthCue<sf, lm>(rgb0R() << 16, rgb0G() << 16, rgb0B() << 16);
pushColor();
}
}

template <bool sf, bool lm>
void PCSX::GTE::gpf(uint32_t op) {
gteFlag() = 0;
mac1() = A1<sf>(ir0() * ir1());
mac2() = A2<sf>(ir0() * ir2());
mac3() = A3<sf>(ir0() * ir3());
ir1() = limB1<lm>(mac1());
ir2() = limB2<lm>(mac2());
ir3() = limB3<lm>(mac3());
pushColor();
}

template <bool sf, bool lm>
void PCSX::GTE::gpl(uint32_t op) {
gteFlag() = 0;
int64_t shiftedMac1, shiftedMac2, shiftedMac3;
if constexpr (sf) {
shiftedMac1 = (int64_t)mac1() << 12; // <<12 on int32_t overflows
shiftedMac2 = (int64_t)mac2() << 12;
shiftedMac3 = (int64_t)mac3() << 12;
} else {
shiftedMac1 = mac1();
shiftedMac2 = mac2();
shiftedMac3 = mac3();
}
mac1() = A1<sf>(shiftedMac1 + ir0() * ir1());
mac2() = A2<sf>(shiftedMac2 + ir0() * ir2());
mac3() = A3<sf>(shiftedMac3 + ir0() * ir3());
ir1() = limB1<lm>(mac1());
ir2() = limB2<lm>(mac2());
ir3() = limB3<lm>(mac3());
pushColor();
}

// ============================================================================
// MVMVA dispatch table (256 entries: sf * lm * mx * v * cv)
// ============================================================================

namespace {

template <bool sf, bool lm, int mx, int v, int cv>
struct MvmvaEntry {
static void fn() { mvmvaImpl<sf, lm, mx, v, cv>(); }
};

using MvmvaFn = void (*)();

constexpr auto mvmvaTable =
PCSX::GTEImpl::makeMvmvaTable<MvmvaFn, MvmvaEntry>(std::make_index_sequence<256>{});

} // anonymous namespace

// ============================================================================
// Public dispatch methods
// ============================================================================

#define GTE_DISPATCH_SF_LM(method, ...) \
do { \
uint32_t _op = code & 0x1ffffff; \
switch (sfLmIndex(_op)) { \
case 0: method<false, false>(_op, ##__VA_ARGS__); break; \
case 1: method<false, true>(_op, ##__VA_ARGS__); break; \
case 2: method<true, false>(_op, ##__VA_ARGS__); break; \
case 3: method<true, true>(_op, ##__VA_ARGS__); break; \
} \
} while (0)

void PCSX::GTE::RTPS(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: rtps<false, false, 0>(true); break;
case 1: rtps<false, true, 0>(true); break;
case 2: rtps<true, false, 0>(true); break;
case 3: rtps<true, true, 0>(true); break;
}
}

void PCSX::GTE::RTPT(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: rtps<false, false, 0>(false); rtps<false, false, 1>(false); rtps<false, false, 2>(true); break;
case 1: rtps<false, true, 0>(false); rtps<false, true, 1>(false); rtps<false, true, 2>(true); break;
case 2: rtps<true, false, 0>(false); rtps<true, false, 1>(false); rtps<true, false, 2>(true); break;
case 3: rtps<true, true, 0>(false); rtps<true, true, 1>(false); rtps<true, true, 2>(true); break;
}
}

void PCSX::GTE::NCLIP(uint32_t code) {
gteFlag() = 0;
if (PGXP_NLCIP_valid(sxy0(), sxy1(), sxy2()))
mac0() = F(PGXP_NCLIP());
else
mac0() = F((int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() -
sx0() * sy2() - sx1() * sy0() - sx2() * sy1());
Comment on lines +275 to +276
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cd src/core && head -300 gte-instructions.cc | tail -50

Repository: grumpycoders/pcsx-redux

Length of output: 1997


🏁 Script executed:

wc -l src/core/gte-instructions.cc

Repository: grumpycoders/pcsx-redux

Length of output: 99


🏁 Script executed:

sed -n '270,280p' src/core/gte-instructions.cc

Repository: grumpycoders/pcsx-redux

Length of output: 492


🏁 Script executed:

# Search for the class definition and relevant function signatures
rg -A 5 "sx0\(\)" src/core/gte-instructions.cc | head -30

Repository: grumpycoders/pcsx-redux

Length of output: 409


🏁 Script executed:

# Find the F() function definition
rg "^\s*\w+\s+F\(" src/core/gte-instructions.cc -A 3 | head -20

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Get broader context around the problematic line
sed -n '260,290p' src/core/gte-instructions.cc

Repository: grumpycoders/pcsx-redux

Length of output: 1412


🏁 Script executed:

# Find declarations of sx0, sy1, sy2, etc.
rg "sx0\(\)|sy0\(\)|sx1\(\)|sy1\(\)|sx2\(\)|sy2\(\)" src/core/gte-instructions.cc --context 2 | head -40

Repository: grumpycoders/pcsx-redux

Length of output: 667


🏁 Script executed:

# Search for class definition and member function declarations
rg "class\s+GTE" src/core -A 30 | head -50

Repository: grumpycoders/pcsx-redux

Length of output: 2227


🏁 Script executed:

# Look for the header file definition
fd -e h "gte" src/core/

Repository: grumpycoders/pcsx-redux

Length of output: 125


🏁 Script executed:

# Check gte.h for function signatures
rg "sx0|sy0|sx1|sy1|sx2|sy2" src/core/gte.h -B 2 -A 1 | head -50

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Search for sx0 and sy0 definitions in gte files
rg "sx[0-2]\(\)|sy[0-2]\(\)" src/core/gte.h -A 2 -B 2

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Try broader search in gte-internal.h
rg "sx0|sy0|sx1|sy1|sx2|sy2" src/core/gte-internal.h | head -30

Repository: grumpycoders/pcsx-redux

Length of output: 390


🏁 Script executed:

# Look for the actual register storage and accessor methods
rg "int16_t.*sx|int32_t.*sx" src/core/gte-internal.h | head -20

Repository: grumpycoders/pcsx-redux

Length of output: 387


🏁 Script executed:

# Search for these as members/properties
ast-grep --pattern 'int16_t $_' src/core/gte-internal.h

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Let's look at the gte-internal.h header
head -100 src/core/gte-internal.h

Repository: grumpycoders/pcsx-redux

Length of output: 4465


🏁 Script executed:

# Find the F() function definition
rg "F\(" src/core/gte-internal.h | head -20

Repository: grumpycoders/pcsx-redux

Length of output: 143


🏁 Script executed:

# Look for F function more carefully
rg "^\s*\w+\s+F\(" src/core/gte-internal.h -A 3

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Search in gte-instructions.cc for F definition
rg "^\s*\w+\s+F\(" src/core/gte-instructions.cc -A 3

Repository: grumpycoders/pcsx-redux

Length of output: 49


🏁 Script executed:

# Broader search for F definition
rg "^.*F\s*\(" src/core/ -A 2 | head -40

Repository: grumpycoders/pcsx-redux

Length of output: 2126


🏁 Script executed:

# Look at what happens around F() usage and mac0 assignment
sed -n '1,100p' src/core/gte-internal.h | tail -60

Repository: grumpycoders/pcsx-redux

Length of output: 2520


🏁 Script executed:

# Get F() function definition with more context
rg "inline int64_t F" src/core/gte-internal.h -A 10 | head -50

Repository: grumpycoders/pcsx-redux

Length of output: 805


🏁 Script executed:

# Let's see where F() is defined - check line numbers
rg -n "inline int64_t F" src/core/gte-internal.h

Repository: grumpycoders/pcsx-redux

Length of output: 151


🏁 Script executed:

# Now get those specific lines with context
sed -n '200,300p' src/core/gte-internal.h

Repository: grumpycoders/pcsx-redux

Length of output: 3835


🏁 Script executed:

# Let me verify the max/min range analysis with concrete calculations
python3 << 'EOF'
import sys

# int16_t range: -32768 to 32767
# int32_t range: -2147483648 to 2147483647

max_int16 = 32767
min_int16 = -32768
max_int32 = 2147483647
min_int32 = -2147483648

# Maximum product of two int16_t
max_product = max_int16 * max_int16
min_product = min_int16 * min_int16

print(f"Max int16 product: {max_product}")
print(f"Min int16 product: {min_product}")
print(f"Fits in int32: {max_product <= max_int32 and min_product <= max_int32}")

# Now with the six-term determinant
# Worst case positive: three products added, three subtracted
# Best case for overflow: all six terms are max magnitude but cause cancellation
# Worst case: all three "+" terms are max, all three "-" terms are max

worst_positive = 3 * max_product
worst_negative = 3 * min_product

print(f"\nWorst case positive sum (3 max products): {worst_positive}")
print(f"Exceeds int32 max? {worst_positive > max_int32}")
print(f"Worst case negative sum (3 min products): {worst_negative}")
print(f"Below int32 min? {worst_negative < min_int32}")

EOF

Repository: grumpycoders/pcsx-redux

Length of output: 300


🏁 Script executed:

# Let me also verify what the actual computation order is
# Looking at the exact expression again:
# (int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() - sx0() * sy2() - sx1() * sy0() - sx2() * sy1()

# The key issue is that in C++, int16_t * int16_t promotes to int (32-bit)
# Let me check the exact behavior by looking at how compiler would handle this

python3 << 'EOF'
# C++ integer promotion rules:
# - If both operands are int16_t, they are promoted to int (usually 32-bit signed)
# - int * int = int (32-bit)
# - (int64_t) int16_t * int16_t: cast applies to first operand, so int64_t * int16_t = int64_t
# - But int16_t * int16_t = int (without the cast), and if only first cast, we get:
#   (int64_t)(sx0()) * sy1() = int64_t (because cast has higher precedence than *)
#   Then: int64_t + int + int - int - int - int
#   The int additions/subtractions happen at int level before being promoted to int64_t

# This is the bug: intermediate int32_t operations can overflow

print("C++ arithmetic evaluation:")
print("(int64_t)sx0() * sy1() + sx1() * sy2() + ...")
print()
print("Evaluation order (with operator precedence and type promotion):")
print("1. (int64_t)sx0() -> int64_t cast")
print("2. (int64_t)sx0() * sy1() -> int64_t * int16_t -> int64_t")
print("3. sx1() * sy2() -> int16_t * int16_t -> int (32-bit signed)")
print("4. sx2() * sy0() -> int16_t * int16_t -> int (32-bit signed)")
print("5. (int64_t)sx0() * sy1() + sx1() * sy2() -> int64_t + int -> int64_t")
print("6. ... remaining int terms added/subtracted at int level first, then promoted")
print()
print("The bug: steps 3, 4, etc. compute products as 32-bit int, which can overflow!")
EOF

Repository: grumpycoders/pcsx-redux

Length of output: 644


Add int64_t casts to all multiplications in the NCLIP determinant expression.

The six-term area calculation overflows in 32-bit arithmetic before reaching F(). Each uncast multiplication computes as int16_t * int16_t → int32_t. Individual products fit in 32-bit (max ~1.07 billion), but three terms summed exceed INT32_MAX (2.15 billion), causing intermediate overflow with wrapped values passed to F(). This results in incorrect MAC0 values and erroneous overflow flags.

Suggested fix
-        mac0() = F((int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() -
-                    sx0() * sy2() - sx1() * sy0() - sx2() * sy1());
+        mac0() = F((int64_t)sx0() * sy1() + (int64_t)sx1() * sy2() + (int64_t)sx2() * sy0() -
+                    (int64_t)sx0() * sy2() - (int64_t)sx1() * sy0() - (int64_t)sx2() * sy1());
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/core/gte-instructions.cc` around lines 275 - 276, The NCLIP determinant
expression computing mac0() overflows because the six products are evaluated in
32-bit; update the expression that sets mac0() (using sx0(), sx1(), sx2(),
sy0(), sy1(), sy2()) so every multiplication is performed in 64-bit by casting
operands or the product to int64_t (i.e., ensure all six terms like sx0()*sy1(),
sx1()*sy2(), etc. are cast to int64_t before summing) and then pass the final
int64_t result into F().

}

void PCSX::GTE::OP(uint32_t code) { GTE_DISPATCH_SF_LM(op); }
void PCSX::GTE::DPCS(uint32_t code) { GTE_DISPATCH_SF_LM(dpcs); }
void PCSX::GTE::INTPL(uint32_t code) { GTE_DISPATCH_SF_LM(intpl); }

void PCSX::GTE::MVMVA(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
unsigned sf = (_op >> 19) & 1;
unsigned lm = (_op >> 10) & 1;
unsigned mx = (_op >> 17) & 3;
unsigned v = (_op >> 15) & 3;
unsigned cv = (_op >> 13) & 3;
unsigned idx = (sf << 7) | (lm << 6) | (mx << 4) | (v << 2) | cv;
mvmvaTable[idx]();
}

void PCSX::GTE::NCDS(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: ncdsCore<false, false, 0>(); break;
case 1: ncdsCore<false, true, 0>(); break;
case 2: ncdsCore<true, false, 0>(); break;
case 3: ncdsCore<true, true, 0>(); break;
}
}

void PCSX::GTE::CDP(uint32_t code) { GTE_DISPATCH_SF_LM(cdp); }

void PCSX::GTE::NCDT(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: ncdsCore<false, false, 0>(); ncdsCore<false, false, 1>(); ncdsCore<false, false, 2>(); break;
case 1: ncdsCore<false, true, 0>(); ncdsCore<false, true, 1>(); ncdsCore<false, true, 2>(); break;
case 2: ncdsCore<true, false, 0>(); ncdsCore<true, false, 1>(); ncdsCore<true, false, 2>(); break;
case 3: ncdsCore<true, true, 0>(); ncdsCore<true, true, 1>(); ncdsCore<true, true, 2>(); break;
}
}

void PCSX::GTE::NCCS(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: nccsCore<false, false, 0>(); break;
case 1: nccsCore<false, true, 0>(); break;
case 2: nccsCore<true, false, 0>(); break;
case 3: nccsCore<true, true, 0>(); break;
}
}

void PCSX::GTE::CC(uint32_t code) { GTE_DISPATCH_SF_LM(cc); }

void PCSX::GTE::NCS(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: ncsCore<false, false, 0>(); break;
case 1: ncsCore<false, true, 0>(); break;
case 2: ncsCore<true, false, 0>(); break;
case 3: ncsCore<true, true, 0>(); break;
}
}

void PCSX::GTE::NCT(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: ncsCore<false, false, 0>(); ncsCore<false, false, 1>(); ncsCore<false, false, 2>(); break;
case 1: ncsCore<false, true, 0>(); ncsCore<false, true, 1>(); ncsCore<false, true, 2>(); break;
case 2: ncsCore<true, false, 0>(); ncsCore<true, false, 1>(); ncsCore<true, false, 2>(); break;
case 3: ncsCore<true, true, 0>(); ncsCore<true, true, 1>(); ncsCore<true, true, 2>(); break;
}
}

void PCSX::GTE::SQR(uint32_t code) { GTE_DISPATCH_SF_LM(sqr); }
void PCSX::GTE::DCPL(uint32_t code) { GTE_DISPATCH_SF_LM(dcpl); }
void PCSX::GTE::DPCT(uint32_t code) { GTE_DISPATCH_SF_LM(dpct); }

void PCSX::GTE::AVSZ3(uint32_t code) {
gteFlag() = 0;
int64_t rawMac0;
mac0() = F(gteZSF3() * sz1() + gteZSF3() * sz2() + gteZSF3() * sz3(), rawMac0);
otz() = limD<true>(rawMac0);
}

void PCSX::GTE::AVSZ4(uint32_t code) {
gteFlag() = 0;
int64_t rawMac0;
mac0() = F(gteZSF4() * sz0() + gteZSF4() * sz1() + gteZSF4() * sz2() + gteZSF4() * sz3(), rawMac0);
otz() = limD<true>(rawMac0);
}

void PCSX::GTE::GPF(uint32_t code) { GTE_DISPATCH_SF_LM(gpf); }
void PCSX::GTE::GPL(uint32_t code) { GTE_DISPATCH_SF_LM(gpl); }

void PCSX::GTE::NCCT(uint32_t code) {
uint32_t _op = code & 0x1ffffff;
gteFlag() = 0;
switch (sfLmIndex(_op)) {
case 0: nccsCore<false, false, 0>(); nccsCore<false, false, 1>(); nccsCore<false, false, 2>(); break;
case 1: nccsCore<false, true, 0>(); nccsCore<false, true, 1>(); nccsCore<false, true, 2>(); break;
case 2: nccsCore<true, false, 0>(); nccsCore<true, false, 1>(); nccsCore<true, false, 2>(); break;
case 3: nccsCore<true, true, 0>(); nccsCore<true, true, 1>(); nccsCore<true, true, 2>(); break;
}
}

#undef GTE_DISPATCH_SF_LM
Loading
Loading