-
-
Notifications
You must be signed in to change notification settings - Fork 138
Gte rewrite #2007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Gte rewrite #2007
Changes from all commits
8ab8150
e8339de
df8c39f
3bdd5fc
e71eeb0
dd8656b
b2daebf
57dad29
47feb1f
2143c32
8bbdac7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,385 @@ | ||
| /*************************************************************************** | ||
| * Copyright (C) 2026 PCSX-Redux authors * | ||
| * * | ||
| * This program is free software; you can redistribute it and/or modify * | ||
| * it under the terms of the GNU General Public License as published by * | ||
| * the Free Software Foundation; either version 2 of the License, or * | ||
| * (at your option) any later version. * | ||
| * * | ||
| * This program is distributed in the hope that it will be useful, * | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of * | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | ||
| * GNU General Public License for more details. * | ||
| * * | ||
| * You should have received a copy of the GNU General Public License * | ||
| * along with this program; if not, write to the * | ||
| * Free Software Foundation, Inc., * | ||
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * | ||
| ***************************************************************************/ | ||
|
|
||
| // GTE instruction implementations and public dispatch methods. | ||
| // | ||
| // Each instruction is implemented as a template parameterized on sf (shift | ||
| // factor) and lm (limit mode). The public methods decode these bits from | ||
| // the instruction encoding and dispatch to the right instantiation. | ||
| // | ||
| // MVMVA is further templatized on mx, v, and cv for full compile-time | ||
| // elimination of the matrix/vector selection branches. | ||
|
|
||
| #include "core/gte.h" | ||
| #include "core/gte-internal.h" | ||
| #include "core/pgxp_debug.h" | ||
| #include "core/pgxp_gte.h" | ||
|
|
||
| using namespace PCSX::GTEImpl; | ||
|
|
||
| // ============================================================================ | ||
| // Template instruction implementations | ||
| // ============================================================================ | ||
|
|
||
| // RTPS core: perspective transform for vertex v. | ||
| // When last=true, computes the depth queue interpolation at the end. | ||
| template <bool sf, bool lm, int v> | ||
| static void rtps(bool last) { | ||
| mac1() = A1<sf>(int44(trX() << 12) + | ||
| r11() * vertexX<v>() + r12() * vertexY<v>() + r13() * vertexZ<v>()); | ||
| mac2() = A2<sf>(int44(trY() << 12) + | ||
| r21() * vertexX<v>() + r22() * vertexY<v>() + r23() * vertexZ<v>()); | ||
| int64_t rawMac3; | ||
| mac3() = A3<sf>(int44(trZ() << 12) + | ||
| r31() * vertexX<v>() + r32() * vertexY<v>() + r33() * vertexZ<v>(), rawMac3); | ||
|
|
||
| ir1() = limB1<lm>(mac1()); | ||
| ir2() = limB2<lm>(mac2()); | ||
| ir3() = limB3sf<sf, lm>(rawMac3); | ||
|
|
||
| pushZ(limD<true>(rawMac3)); | ||
|
|
||
| int32_t hOverSz3 = gteDivide(gteH(), sz3()); | ||
|
|
||
| sxy0() = sxy1(); | ||
| sxy1() = sxy2(); | ||
|
|
||
| double widescreenFactor = PCSX::g_emulator->config().Widescreen ? 0.75 : 1.0; | ||
| // ir1()*hOverSz3 can exceed int32_t (hOverSz3 is up to 0x1FFFF), so widen ir first | ||
| sx2() = limG1(F(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor) >> 16); | ||
| sy2() = limG2(F(gteOFY() + (int64_t)ir2() * hOverSz3) >> 16); | ||
|
|
||
| PGXP_pushSXYZ2s(limG1ia(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor), | ||
| limG2ia(gteOFY() + (int64_t)ir2() * hOverSz3), | ||
| std::max((int)sz3(), gteH() / 2), sxy2()); | ||
|
|
||
| if (last) { | ||
| int64_t rawMac0; | ||
| mac0() = F(gteDQB() + gteDQA() * hOverSz3, rawMac0); | ||
| ir0() = limH(rawMac0); | ||
|
Comment on lines
+73
to
+75
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's examine the specific lines mentioned in the review
cat -n src/core/gte-instructions.cc | sed -n '70,80p'Repository: grumpycoders/pcsx-redux Length of output: 430 🏁 Script executed: # Check lines 357-368
cat -n src/core/gte-instructions.cc | sed -n '355,370p'Repository: grumpycoders/pcsx-redux Length of output: 663 🏁 Script executed: # Let's also check lines around 272-276 for NCLIP
cat -n src/core/gte-instructions.cc | sed -n '270,280p'Repository: grumpycoders/pcsx-redux Length of output: 569 🏁 Script executed: # Search for the function definitions to understand their return types
rg -A 2 "int64_t|int32_t|uint32_t" src/core/gte-instructions.cc | head -100Repository: grumpycoders/pcsx-redux Length of output: 3279 🏁 Script executed: # Search for PGXP_NLCIP_valid or similar
rg "PGXP_N" src/core/Repository: grumpycoders/pcsx-redux Length of output: 466 Widen the MAC0 math before passing it to Line 74 and Lines 360-367 still do the multiply/accumulate in 32-bit first. Suggested fix if (last) {
int64_t rawMac0;
- mac0() = F(gteDQB() + gteDQA() * hOverSz3, rawMac0);
+ mac0() = F(gteDQB() + (int64_t)gteDQA() * hOverSz3, rawMac0);
ir0() = limH(rawMac0);
}
}
@@
void PCSX::GTE::AVSZ3(uint32_t code) {
gteFlag() = 0;
int64_t rawMac0;
- mac0() = F(gteZSF3() * sz1() + gteZSF3() * sz2() + gteZSF3() * sz3(), rawMac0);
+ mac0() = F((int64_t)gteZSF3() * sz1() + (int64_t)gteZSF3() * sz2() +
+ (int64_t)gteZSF3() * sz3(),
+ rawMac0);
otz() = limD<true>(rawMac0);
}
@@
void PCSX::GTE::AVSZ4(uint32_t code) {
gteFlag() = 0;
int64_t rawMac0;
- mac0() = F(gteZSF4() * sz0() + gteZSF4() * sz1() + gteZSF4() * sz2() + gteZSF4() * sz3(), rawMac0);
+ mac0() = F((int64_t)gteZSF4() * sz0() + (int64_t)gteZSF4() * sz1() +
+ (int64_t)gteZSF4() * sz2() + (int64_t)gteZSF4() * sz3(),
+ rawMac0);
otz() = limD<true>(rawMac0);
}Also applies to: 357-368 🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
|
|
||
| // OP: outer product using rotation matrix diagonal | ||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::op(uint32_t op) { | ||
| gteFlag() = 0; | ||
| mac1() = A1<sf>(r22() * ir3() - r33() * ir2()); | ||
| mac2() = A2<sf>(r33() * ir1() - r11() * ir3()); | ||
| mac3() = A3<sf>(r11() * ir2() - r22() * ir1()); | ||
| ir1() = limB1<lm>(mac1()); | ||
| ir2() = limB2<lm>(mac2()); | ||
| ir3() = limB3<lm>(mac3()); | ||
| } | ||
|
Check warning on line 89 in src/core/gte-instructions.cc
|
||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::dpcs(uint32_t op) { | ||
| gteFlag() = 0; | ||
| depthCue<sf, lm>(rgbR() << 16, rgbG() << 16, rgbB() << 16); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::intpl(uint32_t op) { | ||
| gteFlag() = 0; | ||
| depthCue<sf, lm>(ir1() << 12, ir2() << 12, ir3() << 12); | ||
| pushColor(); | ||
| } | ||
|
|
||
| // MVMVA: fully templatized wrapper for dispatch table | ||
| template <bool sf, bool lm, int mx, int v, int cv> | ||
| static void mvmvaImpl() { | ||
| gteFlag() = 0; | ||
| matrixVectorMultiply<sf, lm, mx, v, cv>(); | ||
| } | ||
|
|
||
| // NCDS core: used by NCDS (v=0) and NCDT (v=0,1,2) | ||
| template <bool sf, bool lm, int v> | ||
| static void ncdsCore() { | ||
| lightTransform<sf, lm, v>(); | ||
| colorMatrix<sf, lm>(); | ||
| depthCueColor<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::cdp(uint32_t op) { | ||
| gteFlag() = 0; | ||
| colorMatrix<sf, lm>(); | ||
| depthCueColor<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| // NCCS core: used by NCCS (v=0) and NCCT (v=0,1,2) | ||
| template <bool sf, bool lm, int v> | ||
| static void nccsCore() { | ||
| lightTransform<sf, lm, v>(); | ||
| colorMatrix<sf, lm>(); | ||
| colorApply<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::cc(uint32_t op) { | ||
| gteFlag() = 0; | ||
| colorMatrix<sf, lm>(); | ||
| colorApply<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| // NCS core: used by NCS (v=0) and NCT (v=0,1,2) | ||
| template <bool sf, bool lm, int v> | ||
| static void ncsCore() { | ||
| lightTransform<sf, lm, v>(); | ||
| colorMatrix<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::sqr(uint32_t op) { | ||
| gteFlag() = 0; | ||
| mac1() = A1<sf>(ir1() * ir1()); | ||
| mac2() = A2<sf>(ir2() * ir2()); | ||
| mac3() = A3<sf>(ir3() * ir3()); | ||
| ir1() = limB1<lm>(mac1()); | ||
| ir2() = limB2<lm>(mac2()); | ||
| ir3() = limB3<lm>(mac3()); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::dcpl(uint32_t op) { | ||
| gteFlag() = 0; | ||
| depthCueColor<sf, lm>(); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::dpct(uint32_t op) { | ||
| gteFlag() = 0; | ||
| for (int v = 0; v < 3; v++) { | ||
| depthCue<sf, lm>(rgb0R() << 16, rgb0G() << 16, rgb0B() << 16); | ||
| pushColor(); | ||
| } | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::gpf(uint32_t op) { | ||
| gteFlag() = 0; | ||
| mac1() = A1<sf>(ir0() * ir1()); | ||
| mac2() = A2<sf>(ir0() * ir2()); | ||
| mac3() = A3<sf>(ir0() * ir3()); | ||
| ir1() = limB1<lm>(mac1()); | ||
| ir2() = limB2<lm>(mac2()); | ||
| ir3() = limB3<lm>(mac3()); | ||
| pushColor(); | ||
| } | ||
|
|
||
| template <bool sf, bool lm> | ||
| void PCSX::GTE::gpl(uint32_t op) { | ||
| gteFlag() = 0; | ||
| int64_t shiftedMac1, shiftedMac2, shiftedMac3; | ||
| if constexpr (sf) { | ||
| shiftedMac1 = (int64_t)mac1() << 12; // <<12 on int32_t overflows | ||
| shiftedMac2 = (int64_t)mac2() << 12; | ||
| shiftedMac3 = (int64_t)mac3() << 12; | ||
| } else { | ||
| shiftedMac1 = mac1(); | ||
| shiftedMac2 = mac2(); | ||
| shiftedMac3 = mac3(); | ||
| } | ||
| mac1() = A1<sf>(shiftedMac1 + ir0() * ir1()); | ||
| mac2() = A2<sf>(shiftedMac2 + ir0() * ir2()); | ||
| mac3() = A3<sf>(shiftedMac3 + ir0() * ir3()); | ||
| ir1() = limB1<lm>(mac1()); | ||
| ir2() = limB2<lm>(mac2()); | ||
| ir3() = limB3<lm>(mac3()); | ||
| pushColor(); | ||
| } | ||
|
|
||
| // ============================================================================ | ||
| // MVMVA dispatch table (256 entries: sf * lm * mx * v * cv) | ||
| // ============================================================================ | ||
|
|
||
| namespace { | ||
|
|
||
| template <bool sf, bool lm, int mx, int v, int cv> | ||
| struct MvmvaEntry { | ||
| static void fn() { mvmvaImpl<sf, lm, mx, v, cv>(); } | ||
| }; | ||
|
|
||
| using MvmvaFn = void (*)(); | ||
|
|
||
| constexpr auto mvmvaTable = | ||
| PCSX::GTEImpl::makeMvmvaTable<MvmvaFn, MvmvaEntry>(std::make_index_sequence<256>{}); | ||
|
|
||
| } // anonymous namespace | ||
|
|
||
| // ============================================================================ | ||
| // Public dispatch methods | ||
| // ============================================================================ | ||
|
|
||
| #define GTE_DISPATCH_SF_LM(method, ...) \ | ||
| do { \ | ||
| uint32_t _op = code & 0x1ffffff; \ | ||
| switch (sfLmIndex(_op)) { \ | ||
| case 0: method<false, false>(_op, ##__VA_ARGS__); break; \ | ||
| case 1: method<false, true>(_op, ##__VA_ARGS__); break; \ | ||
| case 2: method<true, false>(_op, ##__VA_ARGS__); break; \ | ||
| case 3: method<true, true>(_op, ##__VA_ARGS__); break; \ | ||
| } \ | ||
| } while (0) | ||
|
|
||
| void PCSX::GTE::RTPS(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: rtps<false, false, 0>(true); break; | ||
| case 1: rtps<false, true, 0>(true); break; | ||
| case 2: rtps<true, false, 0>(true); break; | ||
| case 3: rtps<true, true, 0>(true); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::RTPT(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: rtps<false, false, 0>(false); rtps<false, false, 1>(false); rtps<false, false, 2>(true); break; | ||
| case 1: rtps<false, true, 0>(false); rtps<false, true, 1>(false); rtps<false, true, 2>(true); break; | ||
| case 2: rtps<true, false, 0>(false); rtps<true, false, 1>(false); rtps<true, false, 2>(true); break; | ||
| case 3: rtps<true, true, 0>(false); rtps<true, true, 1>(false); rtps<true, true, 2>(true); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::NCLIP(uint32_t code) { | ||
| gteFlag() = 0; | ||
| if (PGXP_NLCIP_valid(sxy0(), sxy1(), sxy2())) | ||
| mac0() = F(PGXP_NCLIP()); | ||
| else | ||
| mac0() = F((int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() - | ||
| sx0() * sy2() - sx1() * sy0() - sx2() * sy1()); | ||
|
Comment on lines
+275
to
+276
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cd src/core && head -300 gte-instructions.cc | tail -50Repository: grumpycoders/pcsx-redux Length of output: 1997 🏁 Script executed: wc -l src/core/gte-instructions.ccRepository: grumpycoders/pcsx-redux Length of output: 99 🏁 Script executed: sed -n '270,280p' src/core/gte-instructions.ccRepository: grumpycoders/pcsx-redux Length of output: 492 🏁 Script executed: # Search for the class definition and relevant function signatures
rg -A 5 "sx0\(\)" src/core/gte-instructions.cc | head -30Repository: grumpycoders/pcsx-redux Length of output: 409 🏁 Script executed: # Find the F() function definition
rg "^\s*\w+\s+F\(" src/core/gte-instructions.cc -A 3 | head -20Repository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Get broader context around the problematic line
sed -n '260,290p' src/core/gte-instructions.ccRepository: grumpycoders/pcsx-redux Length of output: 1412 🏁 Script executed: # Find declarations of sx0, sy1, sy2, etc.
rg "sx0\(\)|sy0\(\)|sx1\(\)|sy1\(\)|sx2\(\)|sy2\(\)" src/core/gte-instructions.cc --context 2 | head -40Repository: grumpycoders/pcsx-redux Length of output: 667 🏁 Script executed: # Search for class definition and member function declarations
rg "class\s+GTE" src/core -A 30 | head -50Repository: grumpycoders/pcsx-redux Length of output: 2227 🏁 Script executed: # Look for the header file definition
fd -e h "gte" src/core/Repository: grumpycoders/pcsx-redux Length of output: 125 🏁 Script executed: # Check gte.h for function signatures
rg "sx0|sy0|sx1|sy1|sx2|sy2" src/core/gte.h -B 2 -A 1 | head -50Repository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Search for sx0 and sy0 definitions in gte files
rg "sx[0-2]\(\)|sy[0-2]\(\)" src/core/gte.h -A 2 -B 2Repository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Try broader search in gte-internal.h
rg "sx0|sy0|sx1|sy1|sx2|sy2" src/core/gte-internal.h | head -30Repository: grumpycoders/pcsx-redux Length of output: 390 🏁 Script executed: # Look for the actual register storage and accessor methods
rg "int16_t.*sx|int32_t.*sx" src/core/gte-internal.h | head -20Repository: grumpycoders/pcsx-redux Length of output: 387 🏁 Script executed: # Search for these as members/properties
ast-grep --pattern 'int16_t $_' src/core/gte-internal.hRepository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Let's look at the gte-internal.h header
head -100 src/core/gte-internal.hRepository: grumpycoders/pcsx-redux Length of output: 4465 🏁 Script executed: # Find the F() function definition
rg "F\(" src/core/gte-internal.h | head -20Repository: grumpycoders/pcsx-redux Length of output: 143 🏁 Script executed: # Look for F function more carefully
rg "^\s*\w+\s+F\(" src/core/gte-internal.h -A 3Repository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Search in gte-instructions.cc for F definition
rg "^\s*\w+\s+F\(" src/core/gte-instructions.cc -A 3Repository: grumpycoders/pcsx-redux Length of output: 49 🏁 Script executed: # Broader search for F definition
rg "^.*F\s*\(" src/core/ -A 2 | head -40Repository: grumpycoders/pcsx-redux Length of output: 2126 🏁 Script executed: # Look at what happens around F() usage and mac0 assignment
sed -n '1,100p' src/core/gte-internal.h | tail -60Repository: grumpycoders/pcsx-redux Length of output: 2520 🏁 Script executed: # Get F() function definition with more context
rg "inline int64_t F" src/core/gte-internal.h -A 10 | head -50Repository: grumpycoders/pcsx-redux Length of output: 805 🏁 Script executed: # Let's see where F() is defined - check line numbers
rg -n "inline int64_t F" src/core/gte-internal.hRepository: grumpycoders/pcsx-redux Length of output: 151 🏁 Script executed: # Now get those specific lines with context
sed -n '200,300p' src/core/gte-internal.hRepository: grumpycoders/pcsx-redux Length of output: 3835 🏁 Script executed: # Let me verify the max/min range analysis with concrete calculations
python3 << 'EOF'
import sys
# int16_t range: -32768 to 32767
# int32_t range: -2147483648 to 2147483647
max_int16 = 32767
min_int16 = -32768
max_int32 = 2147483647
min_int32 = -2147483648
# Maximum product of two int16_t
max_product = max_int16 * max_int16
min_product = min_int16 * min_int16
print(f"Max int16 product: {max_product}")
print(f"Min int16 product: {min_product}")
print(f"Fits in int32: {max_product <= max_int32 and min_product <= max_int32}")
# Now with the six-term determinant
# Worst case positive: three products added, three subtracted
# Best case for overflow: all six terms are max magnitude but cause cancellation
# Worst case: all three "+" terms are max, all three "-" terms are max
worst_positive = 3 * max_product
worst_negative = 3 * min_product
print(f"\nWorst case positive sum (3 max products): {worst_positive}")
print(f"Exceeds int32 max? {worst_positive > max_int32}")
print(f"Worst case negative sum (3 min products): {worst_negative}")
print(f"Below int32 min? {worst_negative < min_int32}")
EOFRepository: grumpycoders/pcsx-redux Length of output: 300 🏁 Script executed: # Let me also verify what the actual computation order is
# Looking at the exact expression again:
# (int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() - sx0() * sy2() - sx1() * sy0() - sx2() * sy1()
# The key issue is that in C++, int16_t * int16_t promotes to int (32-bit)
# Let me check the exact behavior by looking at how compiler would handle this
python3 << 'EOF'
# C++ integer promotion rules:
# - If both operands are int16_t, they are promoted to int (usually 32-bit signed)
# - int * int = int (32-bit)
# - (int64_t) int16_t * int16_t: cast applies to first operand, so int64_t * int16_t = int64_t
# - But int16_t * int16_t = int (without the cast), and if only first cast, we get:
# (int64_t)(sx0()) * sy1() = int64_t (because cast has higher precedence than *)
# Then: int64_t + int + int - int - int - int
# The int additions/subtractions happen at int level before being promoted to int64_t
# This is the bug: intermediate int32_t operations can overflow
print("C++ arithmetic evaluation:")
print("(int64_t)sx0() * sy1() + sx1() * sy2() + ...")
print()
print("Evaluation order (with operator precedence and type promotion):")
print("1. (int64_t)sx0() -> int64_t cast")
print("2. (int64_t)sx0() * sy1() -> int64_t * int16_t -> int64_t")
print("3. sx1() * sy2() -> int16_t * int16_t -> int (32-bit signed)")
print("4. sx2() * sy0() -> int16_t * int16_t -> int (32-bit signed)")
print("5. (int64_t)sx0() * sy1() + sx1() * sy2() -> int64_t + int -> int64_t")
print("6. ... remaining int terms added/subtracted at int level first, then promoted")
print()
print("The bug: steps 3, 4, etc. compute products as 32-bit int, which can overflow!")
EOFRepository: grumpycoders/pcsx-redux Length of output: 644 Add int64_t casts to all multiplications in the NCLIP determinant expression. The six-term area calculation overflows in 32-bit arithmetic before reaching Suggested fix- mac0() = F((int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() -
- sx0() * sy2() - sx1() * sy0() - sx2() * sy1());
+ mac0() = F((int64_t)sx0() * sy1() + (int64_t)sx1() * sy2() + (int64_t)sx2() * sy0() -
+ (int64_t)sx0() * sy2() - (int64_t)sx1() * sy0() - (int64_t)sx2() * sy1());🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| void PCSX::GTE::OP(uint32_t code) { GTE_DISPATCH_SF_LM(op); } | ||
| void PCSX::GTE::DPCS(uint32_t code) { GTE_DISPATCH_SF_LM(dpcs); } | ||
| void PCSX::GTE::INTPL(uint32_t code) { GTE_DISPATCH_SF_LM(intpl); } | ||
|
|
||
| void PCSX::GTE::MVMVA(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| unsigned sf = (_op >> 19) & 1; | ||
| unsigned lm = (_op >> 10) & 1; | ||
| unsigned mx = (_op >> 17) & 3; | ||
| unsigned v = (_op >> 15) & 3; | ||
| unsigned cv = (_op >> 13) & 3; | ||
| unsigned idx = (sf << 7) | (lm << 6) | (mx << 4) | (v << 2) | cv; | ||
| mvmvaTable[idx](); | ||
| } | ||
|
|
||
| void PCSX::GTE::NCDS(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: ncdsCore<false, false, 0>(); break; | ||
| case 1: ncdsCore<false, true, 0>(); break; | ||
| case 2: ncdsCore<true, false, 0>(); break; | ||
| case 3: ncdsCore<true, true, 0>(); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::CDP(uint32_t code) { GTE_DISPATCH_SF_LM(cdp); } | ||
|
|
||
| void PCSX::GTE::NCDT(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: ncdsCore<false, false, 0>(); ncdsCore<false, false, 1>(); ncdsCore<false, false, 2>(); break; | ||
| case 1: ncdsCore<false, true, 0>(); ncdsCore<false, true, 1>(); ncdsCore<false, true, 2>(); break; | ||
| case 2: ncdsCore<true, false, 0>(); ncdsCore<true, false, 1>(); ncdsCore<true, false, 2>(); break; | ||
| case 3: ncdsCore<true, true, 0>(); ncdsCore<true, true, 1>(); ncdsCore<true, true, 2>(); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::NCCS(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: nccsCore<false, false, 0>(); break; | ||
| case 1: nccsCore<false, true, 0>(); break; | ||
| case 2: nccsCore<true, false, 0>(); break; | ||
| case 3: nccsCore<true, true, 0>(); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::CC(uint32_t code) { GTE_DISPATCH_SF_LM(cc); } | ||
|
|
||
| void PCSX::GTE::NCS(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: ncsCore<false, false, 0>(); break; | ||
| case 1: ncsCore<false, true, 0>(); break; | ||
| case 2: ncsCore<true, false, 0>(); break; | ||
| case 3: ncsCore<true, true, 0>(); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::NCT(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: ncsCore<false, false, 0>(); ncsCore<false, false, 1>(); ncsCore<false, false, 2>(); break; | ||
| case 1: ncsCore<false, true, 0>(); ncsCore<false, true, 1>(); ncsCore<false, true, 2>(); break; | ||
| case 2: ncsCore<true, false, 0>(); ncsCore<true, false, 1>(); ncsCore<true, false, 2>(); break; | ||
| case 3: ncsCore<true, true, 0>(); ncsCore<true, true, 1>(); ncsCore<true, true, 2>(); break; | ||
| } | ||
| } | ||
|
|
||
| void PCSX::GTE::SQR(uint32_t code) { GTE_DISPATCH_SF_LM(sqr); } | ||
| void PCSX::GTE::DCPL(uint32_t code) { GTE_DISPATCH_SF_LM(dcpl); } | ||
| void PCSX::GTE::DPCT(uint32_t code) { GTE_DISPATCH_SF_LM(dpct); } | ||
|
|
||
| void PCSX::GTE::AVSZ3(uint32_t code) { | ||
| gteFlag() = 0; | ||
| int64_t rawMac0; | ||
| mac0() = F(gteZSF3() * sz1() + gteZSF3() * sz2() + gteZSF3() * sz3(), rawMac0); | ||
| otz() = limD<true>(rawMac0); | ||
| } | ||
|
|
||
| void PCSX::GTE::AVSZ4(uint32_t code) { | ||
| gteFlag() = 0; | ||
| int64_t rawMac0; | ||
| mac0() = F(gteZSF4() * sz0() + gteZSF4() * sz1() + gteZSF4() * sz2() + gteZSF4() * sz3(), rawMac0); | ||
| otz() = limD<true>(rawMac0); | ||
| } | ||
|
|
||
| void PCSX::GTE::GPF(uint32_t code) { GTE_DISPATCH_SF_LM(gpf); } | ||
| void PCSX::GTE::GPL(uint32_t code) { GTE_DISPATCH_SF_LM(gpl); } | ||
|
|
||
| void PCSX::GTE::NCCT(uint32_t code) { | ||
| uint32_t _op = code & 0x1ffffff; | ||
| gteFlag() = 0; | ||
| switch (sfLmIndex(_op)) { | ||
| case 0: nccsCore<false, false, 0>(); nccsCore<false, false, 1>(); nccsCore<false, false, 2>(); break; | ||
| case 1: nccsCore<false, true, 0>(); nccsCore<false, true, 1>(); nccsCore<false, true, 2>(); break; | ||
| case 2: nccsCore<true, false, 0>(); nccsCore<true, false, 1>(); nccsCore<true, false, 2>(); break; | ||
| case 3: nccsCore<true, true, 0>(); nccsCore<true, true, 1>(); nccsCore<true, true, 2>(); break; | ||
| } | ||
| } | ||
|
|
||
| #undef GTE_DISPATCH_SF_LM | ||
Uh oh!
There was an error while loading. Please reload this page.