diff --git a/src/core/gte-instructions.cc b/src/core/gte-instructions.cc new file mode 100644 index 000000000..c90515371 --- /dev/null +++ b/src/core/gte-instructions.cc @@ -0,0 +1,385 @@ +/*************************************************************************** + * Copyright (C) 2026 PCSX-Redux authors * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +// GTE instruction implementations and public dispatch methods. +// +// Each instruction is implemented as a template parameterized on sf (shift +// factor) and lm (limit mode). The public methods decode these bits from +// the instruction encoding and dispatch to the right instantiation. +// +// MVMVA is further templatized on mx, v, and cv for full compile-time +// elimination of the matrix/vector selection branches. + +#include "core/gte.h" +#include "core/gte-internal.h" +#include "core/pgxp_debug.h" +#include "core/pgxp_gte.h" + +using namespace PCSX::GTEImpl; + +// ============================================================================ +// Template instruction implementations +// ============================================================================ + +// RTPS core: perspective transform for vertex v. +// When last=true, computes the depth queue interpolation at the end. +template +static void rtps(bool last) { + mac1() = A1(int44(trX() << 12) + + r11() * vertexX() + r12() * vertexY() + r13() * vertexZ()); + mac2() = A2(int44(trY() << 12) + + r21() * vertexX() + r22() * vertexY() + r23() * vertexZ()); + int64_t rawMac3; + mac3() = A3(int44(trZ() << 12) + + r31() * vertexX() + r32() * vertexY() + r33() * vertexZ(), rawMac3); + + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3sf(rawMac3); + + pushZ(limD(rawMac3)); + + int32_t hOverSz3 = gteDivide(gteH(), sz3()); + + sxy0() = sxy1(); + sxy1() = sxy2(); + + double widescreenFactor = PCSX::g_emulator->config().Widescreen ? 0.75 : 1.0; + // ir1()*hOverSz3 can exceed int32_t (hOverSz3 is up to 0x1FFFF), so widen ir first + sx2() = limG1(F(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor) >> 16); + sy2() = limG2(F(gteOFY() + (int64_t)ir2() * hOverSz3) >> 16); + + PGXP_pushSXYZ2s(limG1ia(gteOFX() + (int64_t)ir1() * hOverSz3 * widescreenFactor), + limG2ia(gteOFY() + (int64_t)ir2() * hOverSz3), + std::max((int)sz3(), gteH() / 2), sxy2()); + + if (last) { + int64_t rawMac0; + mac0() = F(gteDQB() + gteDQA() * hOverSz3, rawMac0); + ir0() = limH(rawMac0); + } +} + +// OP: outer product using rotation matrix diagonal +template +void PCSX::GTE::op(uint32_t op) { + gteFlag() = 0; + mac1() = A1(r22() * ir3() - r33() * ir2()); + mac2() = A2(r33() * ir1() - r11() * ir3()); + mac3() = A3(r11() * ir2() - r22() * ir1()); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); +} + +template +void PCSX::GTE::dpcs(uint32_t op) { + gteFlag() = 0; + depthCue(rgbR() << 16, rgbG() << 16, rgbB() << 16); + pushColor(); +} + +template +void PCSX::GTE::intpl(uint32_t op) { + gteFlag() = 0; + depthCue(ir1() << 12, ir2() << 12, ir3() << 12); + pushColor(); +} + +// MVMVA: fully templatized wrapper for dispatch table +template +static void mvmvaImpl() { + gteFlag() = 0; + matrixVectorMultiply(); +} + +// NCDS core: used by NCDS (v=0) and NCDT (v=0,1,2) +template +static void ncdsCore() { + lightTransform(); + colorMatrix(); + depthCueColor(); + pushColor(); +} + +template +void PCSX::GTE::cdp(uint32_t op) { + gteFlag() = 0; + colorMatrix(); + depthCueColor(); + pushColor(); +} + +// NCCS core: used by NCCS (v=0) and NCCT (v=0,1,2) +template +static void nccsCore() { + lightTransform(); + colorMatrix(); + colorApply(); + pushColor(); +} + +template +void PCSX::GTE::cc(uint32_t op) { + gteFlag() = 0; + colorMatrix(); + colorApply(); + pushColor(); +} + +// NCS core: used by NCS (v=0) and NCT (v=0,1,2) +template +static void ncsCore() { + lightTransform(); + colorMatrix(); + pushColor(); +} + +template +void PCSX::GTE::sqr(uint32_t op) { + gteFlag() = 0; + mac1() = A1(ir1() * ir1()); + mac2() = A2(ir2() * ir2()); + mac3() = A3(ir3() * ir3()); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); +} + +template +void PCSX::GTE::dcpl(uint32_t op) { + gteFlag() = 0; + depthCueColor(); + pushColor(); +} + +template +void PCSX::GTE::dpct(uint32_t op) { + gteFlag() = 0; + for (int v = 0; v < 3; v++) { + depthCue(rgb0R() << 16, rgb0G() << 16, rgb0B() << 16); + pushColor(); + } +} + +template +void PCSX::GTE::gpf(uint32_t op) { + gteFlag() = 0; + mac1() = A1(ir0() * ir1()); + mac2() = A2(ir0() * ir2()); + mac3() = A3(ir0() * ir3()); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); + pushColor(); +} + +template +void PCSX::GTE::gpl(uint32_t op) { + gteFlag() = 0; + int64_t shiftedMac1, shiftedMac2, shiftedMac3; + if constexpr (sf) { + shiftedMac1 = (int64_t)mac1() << 12; // <<12 on int32_t overflows + shiftedMac2 = (int64_t)mac2() << 12; + shiftedMac3 = (int64_t)mac3() << 12; + } else { + shiftedMac1 = mac1(); + shiftedMac2 = mac2(); + shiftedMac3 = mac3(); + } + mac1() = A1(shiftedMac1 + ir0() * ir1()); + mac2() = A2(shiftedMac2 + ir0() * ir2()); + mac3() = A3(shiftedMac3 + ir0() * ir3()); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); + pushColor(); +} + +// ============================================================================ +// MVMVA dispatch table (256 entries: sf * lm * mx * v * cv) +// ============================================================================ + +namespace { + +template +struct MvmvaEntry { + static void fn() { mvmvaImpl(); } +}; + +using MvmvaFn = void (*)(); + +constexpr auto mvmvaTable = + PCSX::GTEImpl::makeMvmvaTable(std::make_index_sequence<256>{}); + +} // anonymous namespace + +// ============================================================================ +// Public dispatch methods +// ============================================================================ + +#define GTE_DISPATCH_SF_LM(method, ...) \ + do { \ + uint32_t _op = code & 0x1ffffff; \ + switch (sfLmIndex(_op)) { \ + case 0: method(_op, ##__VA_ARGS__); break; \ + case 1: method(_op, ##__VA_ARGS__); break; \ + case 2: method(_op, ##__VA_ARGS__); break; \ + case 3: method(_op, ##__VA_ARGS__); break; \ + } \ + } while (0) + +void PCSX::GTE::RTPS(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: rtps(true); break; + case 1: rtps(true); break; + case 2: rtps(true); break; + case 3: rtps(true); break; + } +} + +void PCSX::GTE::RTPT(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: rtps(false); rtps(false); rtps(true); break; + case 1: rtps(false); rtps(false); rtps(true); break; + case 2: rtps(false); rtps(false); rtps(true); break; + case 3: rtps(false); rtps(false); rtps(true); break; + } +} + +void PCSX::GTE::NCLIP(uint32_t code) { + gteFlag() = 0; + if (PGXP_NLCIP_valid(sxy0(), sxy1(), sxy2())) + mac0() = F(PGXP_NCLIP()); + else + mac0() = F((int64_t)sx0() * sy1() + sx1() * sy2() + sx2() * sy0() - + sx0() * sy2() - sx1() * sy0() - sx2() * sy1()); +} + +void PCSX::GTE::OP(uint32_t code) { GTE_DISPATCH_SF_LM(op); } +void PCSX::GTE::DPCS(uint32_t code) { GTE_DISPATCH_SF_LM(dpcs); } +void PCSX::GTE::INTPL(uint32_t code) { GTE_DISPATCH_SF_LM(intpl); } + +void PCSX::GTE::MVMVA(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + unsigned sf = (_op >> 19) & 1; + unsigned lm = (_op >> 10) & 1; + unsigned mx = (_op >> 17) & 3; + unsigned v = (_op >> 15) & 3; + unsigned cv = (_op >> 13) & 3; + unsigned idx = (sf << 7) | (lm << 6) | (mx << 4) | (v << 2) | cv; + mvmvaTable[idx](); +} + +void PCSX::GTE::NCDS(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: ncdsCore(); break; + case 1: ncdsCore(); break; + case 2: ncdsCore(); break; + case 3: ncdsCore(); break; + } +} + +void PCSX::GTE::CDP(uint32_t code) { GTE_DISPATCH_SF_LM(cdp); } + +void PCSX::GTE::NCDT(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: ncdsCore(); ncdsCore(); ncdsCore(); break; + case 1: ncdsCore(); ncdsCore(); ncdsCore(); break; + case 2: ncdsCore(); ncdsCore(); ncdsCore(); break; + case 3: ncdsCore(); ncdsCore(); ncdsCore(); break; + } +} + +void PCSX::GTE::NCCS(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: nccsCore(); break; + case 1: nccsCore(); break; + case 2: nccsCore(); break; + case 3: nccsCore(); break; + } +} + +void PCSX::GTE::CC(uint32_t code) { GTE_DISPATCH_SF_LM(cc); } + +void PCSX::GTE::NCS(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: ncsCore(); break; + case 1: ncsCore(); break; + case 2: ncsCore(); break; + case 3: ncsCore(); break; + } +} + +void PCSX::GTE::NCT(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: ncsCore(); ncsCore(); ncsCore(); break; + case 1: ncsCore(); ncsCore(); ncsCore(); break; + case 2: ncsCore(); ncsCore(); ncsCore(); break; + case 3: ncsCore(); ncsCore(); ncsCore(); break; + } +} + +void PCSX::GTE::SQR(uint32_t code) { GTE_DISPATCH_SF_LM(sqr); } +void PCSX::GTE::DCPL(uint32_t code) { GTE_DISPATCH_SF_LM(dcpl); } +void PCSX::GTE::DPCT(uint32_t code) { GTE_DISPATCH_SF_LM(dpct); } + +void PCSX::GTE::AVSZ3(uint32_t code) { + gteFlag() = 0; + int64_t rawMac0; + mac0() = F(gteZSF3() * sz1() + gteZSF3() * sz2() + gteZSF3() * sz3(), rawMac0); + otz() = limD(rawMac0); +} + +void PCSX::GTE::AVSZ4(uint32_t code) { + gteFlag() = 0; + int64_t rawMac0; + mac0() = F(gteZSF4() * sz0() + gteZSF4() * sz1() + gteZSF4() * sz2() + gteZSF4() * sz3(), rawMac0); + otz() = limD(rawMac0); +} + +void PCSX::GTE::GPF(uint32_t code) { GTE_DISPATCH_SF_LM(gpf); } +void PCSX::GTE::GPL(uint32_t code) { GTE_DISPATCH_SF_LM(gpl); } + +void PCSX::GTE::NCCT(uint32_t code) { + uint32_t _op = code & 0x1ffffff; + gteFlag() = 0; + switch (sfLmIndex(_op)) { + case 0: nccsCore(); nccsCore(); nccsCore(); break; + case 1: nccsCore(); nccsCore(); nccsCore(); break; + case 2: nccsCore(); nccsCore(); nccsCore(); break; + case 3: nccsCore(); nccsCore(); nccsCore(); break; + } +} + +#undef GTE_DISPATCH_SF_LM diff --git a/src/core/gte-internal.h b/src/core/gte-internal.h new file mode 100644 index 000000000..71a27c407 --- /dev/null +++ b/src/core/gte-internal.h @@ -0,0 +1,480 @@ +/*************************************************************************** + * Copyright (C) 2026 PCSX-Redux authors * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +// GTE internal implementation header. +// +// Shared by gte-transfer.cc and gte-instructions.cc. Not part of the public +// interface. Contains register accessors, arithmetic helpers, limiter functions, +// and pipeline stage templates - everything that the GTE instruction +// implementations need but callers of the GTE class do not. + +#pragma once + +#include +#include +#include + +#include "core/gte.h" +#include "core/psxemulator.h" +#include "core/r3000a.h" +#include "support/table-generator.h" + +namespace PCSX { +namespace GTEImpl { + +// ============================================================================ +// 44-bit accumulator with per-addition overflow tracking +// ============================================================================ + +class int44 { + public: + int44(int64_t value) + : m_value(value), + m_posOverflow(value > INT64_C(0x7ffffffffff)), + m_negOverflow(value < INT64_C(-0x80000000000)) {} + + int44(int64_t value, bool posOverflow, bool negOverflow) + : m_value(value), m_posOverflow(posOverflow), m_negOverflow(negOverflow) {} + + int44 operator+(int64_t rhs) const { + int64_t result = ((m_value + rhs) << 20) >> 20; + return int44(result, m_posOverflow || (result < 0 && m_value >= 0 && rhs >= 0), + m_negOverflow || (result >= 0 && m_value < 0 && rhs < 0)); + } + + bool positiveOverflow() const { return m_posOverflow; } + bool negativeOverflow() const { return m_negOverflow; } + int64_t value() const { return m_value; } + + private: + int64_t m_value; + bool m_posOverflow; + bool m_negOverflow; +}; + +// ============================================================================ +// FLAG register bit definitions +// ============================================================================ + +namespace Flag { +constexpr uint32_t GTE_ERROR = 1u << 31; +constexpr uint32_t MAC1_POS = GTE_ERROR | (1u << 30); +constexpr uint32_t MAC1_NEG = GTE_ERROR | (1u << 27); +constexpr uint32_t MAC2_POS = GTE_ERROR | (1u << 29); +constexpr uint32_t MAC2_NEG = GTE_ERROR | (1u << 26); +constexpr uint32_t MAC3_POS = GTE_ERROR | (1u << 28); +constexpr uint32_t MAC3_NEG = GTE_ERROR | (1u << 25); +constexpr uint32_t IR1_SAT = GTE_ERROR | (1u << 24); +constexpr uint32_t IR2_SAT = GTE_ERROR | (1u << 23); +constexpr uint32_t IR3_SAT = 1u << 22; +constexpr uint32_t COLOR_R_SAT = 1u << 21; +constexpr uint32_t COLOR_G_SAT = 1u << 20; +constexpr uint32_t COLOR_B_SAT = 1u << 19; +constexpr uint32_t SZ_SAT = GTE_ERROR | (1u << 18); +constexpr uint32_t DIV_OVER = GTE_ERROR | (1u << 17); +constexpr uint32_t MAC0_POS = GTE_ERROR | (1u << 16); +constexpr uint32_t MAC0_NEG = GTE_ERROR | (1u << 15); +constexpr uint32_t SX_SAT = GTE_ERROR | (1u << 14); +constexpr uint32_t SY_SAT = GTE_ERROR | (1u << 13); +constexpr uint32_t IR0_SAT = 1u << 12; +constexpr uint32_t ERROR_BITS = 0x7f87e000u; +} // namespace Flag + +// ============================================================================ +// Register access +// ============================================================================ + +inline PAIR* dataRegs() { return g_emulator->m_cpu->m_regs.CP2D.p; } +inline PAIR* ctrlRegs() { return g_emulator->m_cpu->m_regs.CP2C.p; } + +// Vertex vectors: compile-time v selection +template +inline int16_t vertexX() { + if constexpr (v < 3) return dataRegs()[v * 2].sw.l; + else return dataRegs()[9].sw.l; +} +template +inline int16_t vertexY() { + if constexpr (v < 3) return dataRegs()[v * 2].sw.h; + else return dataRegs()[10].sw.l; +} +template +inline int16_t vertexZ() { + if constexpr (v < 3) return dataRegs()[v * 2 + 1].sw.l; + else return dataRegs()[11].sw.l; +} + +// RGBC +inline uint8_t& rgbR() { return dataRegs()[6].b.l; } +inline uint8_t& rgbG() { return dataRegs()[6].b.h; } +inline uint8_t& rgbB() { return dataRegs()[6].b.h2; } +inline uint8_t& rgbCode() { return dataRegs()[6].b.h3; } + +inline uint16_t& otz() { return dataRegs()[7].w.l; } + +inline int16_t& ir0() { return dataRegs()[8].sw.l; } +inline int16_t& ir1() { return dataRegs()[9].sw.l; } +inline int16_t& ir2() { return dataRegs()[10].sw.l; } +inline int16_t& ir3() { return dataRegs()[11].sw.l; } + +inline uint32_t& sxy0() { return dataRegs()[12].d; } +inline int16_t& sx0() { return dataRegs()[12].sw.l; } +inline int16_t& sy0() { return dataRegs()[12].sw.h; } +inline uint32_t& sxy1() { return dataRegs()[13].d; } +inline int16_t& sx1() { return dataRegs()[13].sw.l; } +inline int16_t& sy1() { return dataRegs()[13].sw.h; } +inline uint32_t& sxy2() { return dataRegs()[14].d; } +inline int16_t& sx2() { return dataRegs()[14].sw.l; } +inline int16_t& sy2() { return dataRegs()[14].sw.h; } + +inline uint16_t& sz0() { return dataRegs()[16].w.l; } +inline uint16_t& sz1() { return dataRegs()[17].w.l; } +inline uint16_t& sz2() { return dataRegs()[18].w.l; } +inline uint16_t& sz3() { return dataRegs()[19].w.l; } + +inline uint32_t& rgb0() { return dataRegs()[20].d; } +inline uint8_t& rgb0R() { return dataRegs()[20].b.l; } +inline uint8_t& rgb0G() { return dataRegs()[20].b.h; } +inline uint8_t& rgb0B() { return dataRegs()[20].b.h2; } +inline uint32_t& rgb1() { return dataRegs()[21].d; } +inline uint32_t& rgb2() { return dataRegs()[22].d; } +inline uint8_t& rgb2R() { return dataRegs()[22].b.l; } +inline uint8_t& rgb2G() { return dataRegs()[22].b.h; } +inline uint8_t& rgb2B() { return dataRegs()[22].b.h2; } +inline uint8_t& rgb2Cd() { return dataRegs()[22].b.h3; } + +inline int32_t& mac0() { return dataRegs()[24].sd; } +inline int32_t& mac1() { return dataRegs()[25].sd; } +inline int32_t& mac2() { return dataRegs()[26].sd; } +inline int32_t& mac3() { return dataRegs()[27].sd; } + +// Control registers - rotation matrix +inline int16_t r11() { return ctrlRegs()[0].sw.l; } +inline int16_t r12() { return ctrlRegs()[0].sw.h; } +inline int16_t r13() { return ctrlRegs()[1].sw.l; } +inline int16_t r21() { return ctrlRegs()[1].sw.h; } +inline int16_t r22() { return ctrlRegs()[2].sw.l; } +inline int16_t r23() { return ctrlRegs()[2].sw.h; } +inline int16_t r31() { return ctrlRegs()[3].sw.l; } +inline int16_t r32() { return ctrlRegs()[3].sw.h; } +inline int16_t r33() { return ctrlRegs()[4].sw.l; } + +// Control registers used in 64-bit arithmetic return int64_t to avoid casts at every use site. +// The underlying storage is 32-bit or 16-bit; the widening happens here, once. +inline int64_t trX() { return ctrlRegs()[5].sd; } +inline int64_t trY() { return ctrlRegs()[6].sd; } +inline int64_t trZ() { return ctrlRegs()[7].sd; } +inline int64_t rbk() { return ctrlRegs()[13].sd; } +inline int64_t gbk() { return ctrlRegs()[14].sd; } +inline int64_t bbk() { return ctrlRegs()[15].sd; } +inline int64_t rfc() { return ctrlRegs()[21].sd; } +inline int64_t gfc() { return ctrlRegs()[22].sd; } +inline int64_t bfc() { return ctrlRegs()[23].sd; } +inline int64_t gteOFX() { return ctrlRegs()[24].sd; } +inline int64_t gteOFY() { return ctrlRegs()[25].sd; } +inline int16_t gteH() { return ctrlRegs()[26].sw.l; } // stays 16-bit for gteDivide signature +inline int64_t gteDQA() { return ctrlRegs()[27].sw.l; } +inline int64_t gteDQB() { return ctrlRegs()[28].sd; } +inline int64_t gteZSF3() { return ctrlRegs()[29].sw.l; } +inline int64_t gteZSF4() { return ctrlRegs()[30].sw.l; } +inline uint32_t& gteFlag() { return ctrlRegs()[31].d; } + +// Matrix element access - compile-time (mx, row, col) +template +inline int32_t matrixElement() { + if constexpr (mx < 3) { + constexpr int linear = row * 3 + col; + constexpr int regIdx = mx * 8 + linear / 2; + if constexpr (linear & 1) return ctrlRegs()[regIdx].sw.h; + else return ctrlRegs()[regIdx].sw.l; + } else { + // Garbage matrix: {-R<<4, R<<4, IR0, R13, R13, R13, R22, R22, R22} + constexpr int linear = row * 3 + col; + if constexpr (linear == 0) { return (-static_cast(dataRegs()[6].b.l)) << 4; } + else if constexpr (linear == 1) { return static_cast(dataRegs()[6].b.l) << 4; } + else if constexpr (linear == 2) { return ir0(); } + else if constexpr (linear <= 5) { return ctrlRegs()[1].sw.l; } // R13 + else { return ctrlRegs()[2].sw.l; } // R22 + } +} + +// Control vector component - compile-time (cv, component) +template +inline int64_t controlVector() { + if constexpr (cv == 3) return 0; + else return ctrlRegs()[cv * 8 + 5 + component].sd; +} + +// ============================================================================ +// Division +// ============================================================================ + +// UNR reciprocal table generator for GTE division. +// Formula from hardware: unrTable[i] = max(0, ((0x40000 / (i + 0x100)) + 1) / 2 - 0x101) +struct UNRGenerator { + static consteval uint8_t calculateValue(size_t i) { + int val = ((0x40000 / (int)(i + 0x100)) + 1) / 2 - 0x101; + return static_cast(val < 0 ? 0 : val); + } +}; + +inline constexpr auto unrTable = generateTable<257, UNRGenerator>(); + +inline uint32_t gteDivide(uint16_t numerator, uint16_t denominator) { + if (numerator >= denominator * 2) { + gteFlag() |= Flag::DIV_OVER; + return 0x1ffff; + } + + int shift = GTE::countLeadingZeros16(denominator); + int r1 = (denominator << shift) & 0x7fff; + int r2 = unrTable[((r1 + 0x40) >> 7)] + 0x101; + int r3 = ((0x80 - (r2 * (r1 + 0x8000))) >> 8) & 0x1ffff; + uint32_t reciprocal = ((r2 * r3) + 0x80) >> 8; + uint32_t result = ((static_cast(reciprocal) * (numerator << shift)) + 0x8000) >> 16; + return std::min(0x1ffff, result); +} + +// ============================================================================ +// Limiter functions +// ============================================================================ + +inline int32_t lim(int32_t value, int32_t max, int32_t min, uint32_t flag) { + if (value > max) { gteFlag() |= flag; return max; } + if (value < min) { gteFlag() |= flag; return min; } + return value; +} + +template +inline int64_t gteShift(int64_t a) { + if constexpr (sf) return a >> 12; + else return a; +} + +template +inline int32_t bounds(int44 value, uint32_t posFlag, uint32_t negFlag) { + if (value.positiveOverflow()) gteFlag() |= posFlag; + if (value.negativeOverflow()) gteFlag() |= negFlag; + return static_cast(gteShift(value.value())); +} + +template +inline int32_t A1(int44 a) { return bounds(a, Flag::MAC1_POS, Flag::MAC1_NEG); } + +template +inline int32_t A2(int44 a) { return bounds(a, Flag::MAC2_POS, Flag::MAC2_NEG); } + +template +inline int32_t A3(int44 a, int64_t& rawOut) { + rawOut = a.value(); + return bounds(a, Flag::MAC3_POS, Flag::MAC3_NEG); +} + +template +inline int32_t A3(int44 a) { return bounds(a, Flag::MAC3_POS, Flag::MAC3_NEG); } + +inline int64_t F(int64_t a, int64_t& rawOut) { + rawOut = a; + if (a > INT64_C(0x7fffffff)) gteFlag() |= Flag::MAC0_POS; + if (a < INT64_C(-0x80000000)) gteFlag() |= Flag::MAC0_NEG; + return a; +} + +inline int64_t F(int64_t a) { + if (a > INT64_C(0x7fffffff)) gteFlag() |= Flag::MAC0_POS; + if (a < INT64_C(-0x80000000)) gteFlag() |= Flag::MAC0_NEG; + return a; +} + +template inline int32_t limB1(int32_t a) { return lim(a, 0x7fff, lm ? 0 : -0x8000, Flag::IR1_SAT); } +template inline int32_t limB2(int32_t a) { return lim(a, 0x7fff, lm ? 0 : -0x8000, Flag::IR2_SAT); } +template inline int32_t limB3(int32_t a) { return lim(a, 0x7fff, lm ? 0 : -0x8000, Flag::IR3_SAT); } + +template +inline int32_t limB3sf(int64_t rawMac3) { + int32_t valueSf = static_cast(gteShift(rawMac3)); + int32_t value12 = static_cast(rawMac3 >> 12); + constexpr int32_t min = lm ? 0 : -0x8000; + if (value12 < -0x8000 || value12 > 0x7fff) gteFlag() |= Flag::IR3_SAT; + return std::clamp(valueSf, min, 0x7fff); +} + +inline int32_t limC1(int32_t a) { return lim(a, 0xff, 0, Flag::COLOR_R_SAT); } +inline int32_t limC2(int32_t a) { return lim(a, 0xff, 0, Flag::COLOR_G_SAT); } +inline int32_t limC3(int32_t a) { return lim(a, 0xff, 0, Flag::COLOR_B_SAT); } + +template +inline int32_t limD(int64_t a) { return lim(static_cast(gteShift(a)), 0xffff, 0, Flag::SZ_SAT); } + +inline int32_t limG1(int64_t a) { + if (a > 0x3ff) { gteFlag() |= Flag::SX_SAT; return 0x3ff; } + if (a < -0x400) { gteFlag() |= Flag::SX_SAT; return -0x400; } + return static_cast(a); +} + +inline int32_t limG2(int64_t a) { + if (a > 0x3ff) { gteFlag() |= Flag::SY_SAT; return 0x3ff; } + if (a < -0x400) { gteFlag() |= Flag::SY_SAT; return -0x400; } + return static_cast(a); +} + +inline int32_t limG1ia(int64_t a) { return static_cast(std::clamp(a, -0x4000000, 0x3ffffff)); } +inline int32_t limG2ia(int64_t a) { return static_cast(std::clamp(a, -0x4000000, 0x3ffffff)); } + +inline int32_t limH(int64_t rawMac0) { + int64_t valueSf = rawMac0 >> 12; + int32_t value12 = static_cast(rawMac0 >> 12); + if (valueSf < 0 || valueSf > 0x1000) gteFlag() |= Flag::IR0_SAT; + return std::clamp(value12, 0, 0x1000); +} + +// ============================================================================ +// FIFO operations +// ============================================================================ + +inline void pushZ(uint16_t z) { + sz0() = sz1(); sz1() = sz2(); sz2() = sz3(); sz3() = z; +} + +inline void pushColor() { + rgb0() = rgb1(); rgb1() = rgb2(); + rgb2Cd() = rgbCode(); + rgb2R() = limC1(mac1() >> 4); + rgb2G() = limC2(mac2() >> 4); + rgb2B() = limC3(mac3() >> 4); +} + +// ============================================================================ +// Pipeline stage: matrix-vector multiply (fully templatized) +// ============================================================================ + +template +inline void matrixVectorMultiply(int64_t& rawMac3) { + if constexpr (cv == 2) { + // FC bug path: columns 1-2 first, then column 0 for FLAG only + mac1() = A1(int44(matrixElement() * vertexY()) + + matrixElement() * vertexZ()); + mac2() = A2(int44(matrixElement() * vertexY()) + + matrixElement() * vertexZ()); + mac3() = A3(int44(matrixElement() * vertexY()) + + matrixElement() * vertexZ(), rawMac3); + // Column 0: FLAG side effects only, results discarded + limB1(A1(int44(controlVector() << 12) + + matrixElement() * vertexX())); + limB2(A2(int44(controlVector() << 12) + + matrixElement() * vertexX())); + limB3(A3(int44(controlVector() << 12) + + matrixElement() * vertexX())); + } else { + mac1() = A1(int44(controlVector() << 12) + + matrixElement() * vertexX() + + matrixElement() * vertexY() + + matrixElement() * vertexZ()); + mac2() = A2(int44(controlVector() << 12) + + matrixElement() * vertexX() + + matrixElement() * vertexY() + + matrixElement() * vertexZ()); + mac3() = A3(int44(controlVector() << 12) + + matrixElement() * vertexX() + + matrixElement() * vertexY() + + matrixElement() * vertexZ(), rawMac3); + } + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); +} + +template +inline void matrixVectorMultiply() { + int64_t unused; + matrixVectorMultiply(unused); +} + +// ============================================================================ +// Pipeline stage: light transform - L * V(v) -> MAC/IR +// ============================================================================ + +template +inline void lightTransform() { + matrixVectorMultiply(); +} + +// ============================================================================ +// Pipeline stage: color matrix - BK + C * IR -> MAC/IR +// ============================================================================ + +template +inline void colorMatrix() { + matrixVectorMultiply(); +} + +// ============================================================================ +// Pipeline stage: depth cue interpolation +// ============================================================================ + +template +inline void depthCue(int64_t inR, int64_t inG, int64_t inB) { + mac1() = A1(inR + ir0() * limB1(A1((rfc() << 12) - inR))); + mac2() = A2(inG + ir0() * limB2(A2((gfc() << 12) - inG))); + int64_t rawMac3; + mac3() = A3(inB + ir0() * limB3(A3((bfc() << 12) - inB)), rawMac3); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); +} + +template +inline void depthCueColor() { + depthCue((int64_t)(rgbR() << 4) * ir1(), + (int64_t)(rgbG() << 4) * ir2(), + (int64_t)(rgbB() << 4) * ir3()); +} + +// ============================================================================ +// Pipeline stage: color apply - RGBC * IR -> MAC/IR +// ============================================================================ + +template +inline void colorApply() { + mac1() = A1((int64_t)(rgbR() << 4) * ir1()); + mac2() = A2((int64_t)(rgbG() << 4) * ir2()); + mac3() = A3((int64_t)(rgbB() << 4) * ir3()); + ir1() = limB1(mac1()); + ir2() = limB2(mac2()); + ir3() = limB3(mac3()); +} + +// ============================================================================ +// Dispatch helpers +// ============================================================================ + +inline unsigned sfLmIndex(uint32_t op) { + return ((op >> 18) & 2) | ((op >> 10) & 1); +} + +// Generate a 256-entry dispatch table for MVMVA (sf * lm * mx * v * cv). +// Index layout: [sf:1][lm:1][mx:2][v:2][cv:2] +template class Impl, size_t... Is> +constexpr auto makeMvmvaTable(std::index_sequence) { + return std::array{ + Impl> 7), bool((Is >> 6) & 1), int((Is >> 4) & 3), int((Is >> 2) & 3), int(Is & 3)>::fn...}; +} + +} // namespace GTEImpl +} // namespace PCSX diff --git a/src/core/gte-transfer.cc b/src/core/gte-transfer.cc new file mode 100644 index 000000000..6768c5ed9 --- /dev/null +++ b/src/core/gte-transfer.cc @@ -0,0 +1,126 @@ +/*************************************************************************** + * Copyright (C) 2026 PCSX-Redux authors * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +// GTE COP2 data transfer operations: MFC2, MTC2, CFC2, CTC2, LWC2, SWC2. + +#include "core/gte.h" +#include "core/gte-internal.h" +#include "core/psxmem.h" + +using namespace PCSX::GTEImpl; + +uint32_t PCSX::GTE::MFC2(uint32_t code) { + return MFC2(static_cast(_Rd_)); +} + +uint32_t PCSX::GTE::MFC2(int reg) { + auto* d = dataRegs(); + switch (reg) { + case 1: case 3: case 5: + case 8: case 9: case 10: case 11: + d[reg].d = static_cast(d[reg].sw.l); + break; + case 7: case 16: case 17: case 18: case 19: + d[reg].d = static_cast(d[reg].w.l); + break; + case 15: + d[reg].d = sxy2(); + break; + case 28: case 29: + d[reg].d = lim(ir1() >> 7, 0x1f, 0, 0) | + (lim(ir2() >> 7, 0x1f, 0, 0) << 5) | + (lim(ir3() >> 7, 0x1f, 0, 0) << 10); + break; + } + return d[reg].d; +} + +uint32_t PCSX::GTE::CFC2(uint32_t code) { + return ctrlRegs()[_Rd_].d; +} + +void PCSX::GTE::MTC2(uint32_t value, int reg) { + auto* d = dataRegs(); + switch (reg) { + case 15: + sxy0() = sxy1(); + sxy1() = sxy2(); + sxy2() = value; + break; + case 28: + ir1() = (value & 0x1f) << 7; + ir2() = (value & 0x3e0) << 2; + ir3() = (value & 0x7c00) >> 3; + break; + case 30: + d[31].d = countLeadingBits(value); + break; + case 31: + return; + } + d[reg].d = value; +} + +void PCSX::GTE::MTC2(uint32_t code) { + MTC2(g_emulator->m_cpu->m_regs.GPR.r[_Rt_], _Rd_); +} + +void PCSX::GTE::CTC2(uint32_t value, int reg) { + switch (reg) { + case 4: case 12: case 20: + case 26: case 27: case 29: case 30: + value = static_cast(static_cast(value)); + break; + case 31: + value = value & 0x7ffff000; + if (value & Flag::ERROR_BITS) value |= Flag::GTE_ERROR; + break; + } + ctrlRegs()[reg].d = value; +} + +void PCSX::GTE::CTC2(uint32_t code) { + CTC2(g_emulator->m_cpu->m_regs.GPR.r[_Rt_], _Rd_); +} + +void PCSX::GTE::LWC2(uint32_t code) { + uint32_t addr = g_emulator->m_cpu->m_regs.GPR.r[_Rs_] + _Imm_; + if (addr & 3) { + g_emulator->m_cpu->m_regs.pc -= 4; + g_system->log(LogClass::CPU, _("Unaligned address 0x%08x in LWC2 from 0x%08x\n"), addr, + g_emulator->m_cpu->m_regs.pc); + g_emulator->m_cpu->m_regs.CP0.n.BadVAddr = addr; + g_emulator->m_cpu->exception(R3000Acpu::Exception::LoadAddressError, g_emulator->m_cpu->m_inDelaySlot); + return; + } + MTC2(g_emulator->m_mem->read32(addr), _Rt_); +} + +void PCSX::GTE::SWC2(uint32_t code) { + uint32_t addr = g_emulator->m_cpu->m_regs.GPR.r[_Rs_] + _Imm_; + if (addr & 3) { + g_emulator->m_cpu->m_regs.pc -= 4; + g_system->log(LogClass::CPU, _("Unaligned address 0x%08x in SWC2 from 0x%08x\n"), addr, + g_emulator->m_cpu->m_regs.pc); + g_emulator->m_cpu->m_regs.CP0.n.BadVAddr = addr; + g_emulator->m_cpu->exception(R3000Acpu::Exception::StoreAddressError, g_emulator->m_cpu->m_inDelaySlot); + return; + } + g_emulator->m_mem->write32(addr, MFC2(static_cast(_Rt_))); +} diff --git a/src/core/gte.cc b/src/core/gte.cc deleted file mode 100644 index cf9799466..000000000 --- a/src/core/gte.cc +++ /dev/null @@ -1,1026 +0,0 @@ -/* - * PlayStation Geometry Transformation Engine emulator - * - * Copyright 2003-2013 smf - * - */ - -#include "core/gte.h" - -#include - -#include "core/pgxp_debug.h" -#include "core/pgxp_gte.h" -#include "core/psxmem.h" - -#undef GTE_SF -#undef GTE_MX -#undef GTE_V -#undef GTE_CV -#undef GTE_LM -#undef GTE_FUNCT - -#undef VX0 -#undef VY0 -#undef VZ0 -#undef VX1 -#undef VY1 -#undef VZ1 -#undef VX2 -#undef VY2 -#undef VZ2 -#undef R -#undef G -#undef B -#undef CODE -#undef OTZ -#undef IR0 -#undef IR1 -#undef IR2 -#undef IR3 -#undef SXY0 -#undef SX0 -#undef SY0 -#undef SXY1 -#undef SX1 -#undef SY1 -#undef SXY2 -#undef SX2 -#undef SY2 -#undef SXYP -#undef SXP -#undef SYP -#undef SZ0 -#undef SZ1 -#undef SZ2 -#undef SZ3 -#undef RGB0 -#undef R0 -#undef G0 -#undef B0 -#undef CD0 -#undef RGB1 -#undef R1 -#undef G1 -#undef B1 -#undef CD1 -#undef RGB2 -#undef R2 -#undef G2 -#undef B2 -#undef CD2 -#undef RES1 -#undef MAC0 -#undef MAC1 -#undef MAC2 -#undef MAC3 -#undef IRGB -#undef ORGB -#undef LZCS -#undef LZCR - -#undef R11 -#undef R12 -#undef R13 -#undef R21 -#undef R22 -#undef R23 -#undef R31 -#undef R32 -#undef R33 -#undef TRX -#undef TRY -#undef TRZ -#undef L11 -#undef L12 -#undef L13 -#undef L21 -#undef L22 -#undef L23 -#undef L31 -#undef L32 -#undef L33 -#undef RBK -#undef GBK -#undef BBK -#undef LR1 -#undef LR2 -#undef LR3 -#undef LG1 -#undef LG2 -#undef LG3 -#undef LB1 -#undef LB2 -#undef LB3 -#undef RFC -#undef GFC -#undef BFC -#undef OFX -#undef OFY -#undef H -#undef DQA -#undef DQB -#undef ZSF3 -#undef ZSF4 -#undef FLAG - -#undef VX -#undef VY -#undef VZ -#undef MX11 -#undef MX12 -#undef MX13 -#undef MX21 -#undef MX22 -#undef MX23 -#undef MX31 -#undef MX32 -#undef MX33 -#undef CV1 -#undef CV2 -#undef CV3 - -#define GTE_SF(op) ((op >> 19) & 1) -#define GTE_MX(op) ((op >> 17) & 3) -#define GTE_V(op) ((op >> 15) & 3) -#define GTE_CV(op) ((op >> 13) & 3) -#define GTE_LM(op) ((op >> 10) & 1) -#define GTE_FUNCT(op) (op & 63) - -#define VX0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[0].sw.l) -#define VY0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[0].sw.h) -#define VZ0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[1].sw.l) -#define VX1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[2].w.l) -#define VY1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[2].w.h) -#define VZ1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[3].w.l) -#define VX2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[4].w.l) -#define VY2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[4].w.h) -#define VZ2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[5].w.l) -#define R (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[6].b.l) -#define G (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[6].b.h) -#define B (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[6].b.h2) -#define CODE (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[6].b.h3) -#define OTZ (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[7].w.l) -#define IR0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[8].sw.l) -#define IR1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[9].sw.l) -#define IR2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[10].sw.l) -#define IR3 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[11].sw.l) -#define SXY0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[12].d) -#define SX0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[12].sw.l) -#define SY0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[12].sw.h) -#define SXY1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[13].d) -#define SX1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[13].sw.l) -#define SY1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[13].sw.h) -#define SXY2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[14].d) -#define SX2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[14].sw.l) -#define SY2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[14].sw.h) -#define SXYP (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[15].d) -#define SXP (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[15].sw.l) -#define SYP (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[15].sw.h) -#define SZ0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[16].w.l) -#define SZ1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[17].w.l) -#define SZ2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[18].w.l) -#define SZ3 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[19].w.l) -#define RGB0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[20].d) -#define R0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[20].b.l) -#define G0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[20].b.h) -#define B0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[20].b.h2) -#define CD0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[20].b.h3) -#define RGB1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[21].d) -#define R1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[21].b.l) -#define G1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[21].b.h) -#define B1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[21].b.h2) -#define CD1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[21].b.h3) -#define RGB2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[22].d) -#define R2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[22].b.l) -#define G2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[22].b.h) -#define B2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[22].b.h2) -#define CD2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[22].b.h3) -#define RES1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[23].d) -#define MAC0 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[24].sd) -#define MAC1 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[25].sd) -#define MAC2 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[26].sd) -#define MAC3 (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[27].sd) -#define IRGB (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[28].d) -#define ORGB (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[29].d) -#define LZCS (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[30].d) -#define LZCR (PCSX::g_emulator->m_cpu->m_regs.CP2D.p[31].d) - -#define R11 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[0].sw.l) -#define R12 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[0].sw.h) -#define R13 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[1].sw.l) -#define R21 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[1].sw.h) -#define R22 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[2].sw.l) -#define R23 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[2].sw.h) -#define R31 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[3].sw.l) -#define R32 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[3].sw.h) -#define R33 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[4].sw.l) -#define TRX (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[5].sd) -#define TRY (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[6].sd) -#define TRZ (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[7].sd) -#define L11 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[8].sw.l) -#define L12 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[8].sw.h) -#define L13 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[9].sw.l) -#define L21 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[9].sw.h) -#define L22 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[10].sw.l) -#define L23 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[10].sw.h) -#define L31 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[11].sw.l) -#define L32 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[11].sw.h) -#define L33 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[12].sw.l) -#define RBK (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[13].sd) -#define GBK (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[14].sd) -#define BBK (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[15].sd) -#define LR1 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[16].sw.l) -#define LR2 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[16].sw.h) -#define LR3 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[17].sw.l) -#define LG1 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[17].sw.h) -#define LG2 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[18].sw.l) -#define LG3 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[18].sw.h) -#define LB1 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[19].sw.l) -#define LB2 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[19].sw.h) -#define LB3 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[20].sw.l) -#define RFC (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[21].sd) -#define GFC (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[22].sd) -#define BFC (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[23].sd) -#define OFX (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[24].sd) -#define OFY (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[25].sd) -#define H (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[26].sw.l) -#define DQA (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[27].sw.l) -#define DQB (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[28].sd) -#define ZSF3 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[29].sw.l) -#define ZSF4 (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[30].sw.l) -#define FLAG (PCSX::g_emulator->m_cpu->m_regs.CP2C.p[31].d) - -#define VX(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2D.p[n << 1].sw.l : IR1) -#define VY(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2D.p[n << 1].sw.h : IR2) -#define VZ(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2D.p[(n << 1) + 1].sw.l : IR3) -#define MX11(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3)].sw.l : -R << 4) -#define MX12(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3)].sw.h : R << 4) -#define MX13(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 1].sw.l : IR0) -#define MX21(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 1].sw.h : R13) -#define MX22(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 2].sw.l : R13) -#define MX23(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 2].sw.h : R13) -#define MX31(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 3].sw.l : R22) -#define MX32(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 3].sw.h : R22) -#define MX33(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 4].sw.l : R22) -#define CV1(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 5].sd : 0) -#define CV2(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 6].sd : 0) -#define CV3(n) (n < 3 ? PCSX::g_emulator->m_cpu->m_regs.CP2C.p[(n << 3) + 7].sd : 0) - -static int32_t LIM(int32_t value, int32_t max, int32_t min, uint32_t flag) { - if (value > max) { - FLAG |= flag; - return max; - } else if (value < min) { - FLAG |= flag; - return min; - } - - return value; -} - -uint32_t PCSX::GTE::MFC2_internal(int reg) { - switch (reg) { - case 1: - case 3: - case 5: - case 8: - case 9: - case 10: - case 11: - PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d = (int32_t)PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].sw.l; - break; - - case 7: - case 16: - case 17: - case 18: - case 19: - PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d = (uint32_t)PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].w.l; - break; - - case 15: - PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d = SXY2; - break; - - case 28: - case 29: - PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d = - LIM(IR1 >> 7, 0x1f, 0, 0) | (LIM(IR2 >> 7, 0x1f, 0, 0) << 5) | (LIM(IR3 >> 7, 0x1f, 0, 0) << 10); - break; - } - - return PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d; -} - -void PCSX::GTE::MTC2_internal(uint32_t value, int reg) { - switch (reg) { - case 15: - SXY0 = SXY1; - SXY1 = SXY2; - SXY2 = value; - break; - - case 28: - IR1 = (value & 0x1f) << 7; - IR2 = (value & 0x3e0) << 2; - IR3 = (value & 0x7c00) >> 3; - break; - - case 30: - LZCR = countLeadingBits(value); - break; - - case 31: - return; - } - - PCSX::g_emulator->m_cpu->m_regs.CP2D.p[reg].d = value; -} - -void PCSX::GTE::CTC2_internal(uint32_t value, int reg) { - switch (reg) { - case 4: - case 12: - case 20: - case 26: - case 27: - case 29: - case 30: - value = (int32_t)(int16_t)value; - break; - - case 31: - value = value & 0x7ffff000; - if ((value & 0x7f87e000) != 0) value |= 0x80000000; - break; - } - - PCSX::g_emulator->m_cpu->m_regs.CP2C.p[reg].d = value; -} - -// Push a Z value to the Z-coordinate FIFO -void PCSX::GTE::pushZ(uint16_t z) { - SZ0 = SZ1; - SZ1 = SZ2; - SZ2 = SZ3; - SZ3 = z; -} - -// Arithmetic shift right by (sf * 12) -static inline int64_t gte_shift(int64_t a, int sf) { return sf == 0 ? a : a >> 12; } -// Shift left by (sf * 12) for GPL -static inline int64_t gte_shift_GPL(int64_t a, int sf) { return sf == 0 ? a : a << 12; } - -int32_t PCSX::GTE::BOUNDS(int44 value, int max_flag, int min_flag) { - if (value.positiveOverflow()) FLAG |= max_flag; - if (value.negativeOverflow()) FLAG |= min_flag; - - return gte_shift(value.value(), s_sf); -} - -static uint32_t gte_divide(uint16_t numerator, uint16_t denominator) { - if (numerator >= denominator * 2) { // Division overflow - FLAG |= (1 << 31) | (1 << 17); - return 0x1ffff; - } - - static uint8_t table[] = { - 0xff, 0xfd, 0xfb, 0xf9, 0xf7, 0xf5, 0xf3, 0xf1, 0xef, 0xee, 0xec, 0xea, 0xe8, 0xe6, 0xe4, 0xe3, 0xe1, 0xdf, - 0xdd, 0xdc, 0xda, 0xd8, 0xd6, 0xd5, 0xd3, 0xd1, 0xd0, 0xce, 0xcd, 0xcb, 0xc9, 0xc8, 0xc6, 0xc5, 0xc3, 0xc1, - 0xc0, 0xbe, 0xbd, 0xbb, 0xba, 0xb8, 0xb7, 0xb5, 0xb4, 0xb2, 0xb1, 0xb0, 0xae, 0xad, 0xab, 0xaa, 0xa9, 0xa7, - 0xa6, 0xa4, 0xa3, 0xa2, 0xa0, 0x9f, 0x9e, 0x9c, 0x9b, 0x9a, 0x99, 0x97, 0x96, 0x95, 0x94, 0x92, 0x91, 0x90, - 0x8f, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, - 0x7a, 0x79, 0x78, 0x77, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, - 0x67, 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, - 0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x48, - 0x47, 0x46, 0x45, 0x44, 0x43, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, 0x3c, 0x3c, 0x3b, 0x3a, 0x39, - 0x39, 0x38, 0x37, 0x36, 0x36, 0x35, 0x34, 0x33, 0x33, 0x32, 0x31, 0x31, 0x30, 0x2f, 0x2e, 0x2e, 0x2d, 0x2c, - 0x2c, 0x2b, 0x2a, 0x2a, 0x29, 0x28, 0x28, 0x27, 0x26, 0x26, 0x25, 0x24, 0x24, 0x23, 0x22, 0x22, 0x21, 0x20, - 0x20, 0x1f, 0x1e, 0x1e, 0x1d, 0x1d, 0x1c, 0x1b, 0x1b, 0x1a, 0x19, 0x19, 0x18, 0x18, 0x17, 0x16, 0x16, 0x15, - 0x15, 0x14, 0x14, 0x13, 0x12, 0x12, 0x11, 0x11, 0x10, 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, - 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08, 0x07, 0x07, 0x06, 0x06, 0x05, 0x05, 0x04, 0x04, 0x03, 0x03, 0x02, 0x02, - 0x01, 0x01, 0x00, 0x00, 0x00}; - - int shift = PCSX::GTE::countLeadingZeros16(denominator); - - int r1 = (denominator << shift) & 0x7fff; - int r2 = table[((r1 + 0x40) >> 7)] + 0x101; - int r3 = ((0x80 - (r2 * (r1 + 0x8000))) >> 8) & 0x1ffff; - uint32_t reciprocal = ((r2 * r3) + 0x80) >> 8; - - const uint32_t res = ((((uint64_t)reciprocal * (numerator << shift)) + 0x8000) >> 16); - - // Some divisions like 0xF015/0x780B result in 0x20000, but are saturated to 0x1ffff without setting FLAG - return std::min(0x1ffff, res); -} - -// Setting bits 12 & 19-22 in FLAG does not set bit 31 - -int32_t PCSX::GTE::A1(int44 a) { return BOUNDS(a, (1 << 31) | (1 << 30), (1 << 31) | (1 << 27)); } -int32_t PCSX::GTE::A2(int44 a) { return BOUNDS(a, (1 << 31) | (1 << 29), (1 << 31) | (1 << 26)); } -int32_t PCSX::GTE::A3(int44 a) { - s_mac3 = a.value(); - return BOUNDS(a, (1 << 31) | (1 << 28), (1 << 31) | (1 << 25)); -} -static int32_t Lm_B1(int32_t a, int lm) { return LIM(a, 0x7fff, -0x8000 * !lm, (1 << 31) | (1 << 24)); } -static int32_t Lm_B2(int32_t a, int lm) { return LIM(a, 0x7fff, -0x8000 * !lm, (1 << 31) | (1 << 23)); } -static int32_t Lm_B3(int32_t a, int lm) { return LIM(a, 0x7fff, -0x8000 * !lm, (1 << 22)); } - -static int32_t Lm_B3_sf(int64_t value, int sf, int lm) { - int32_t value_sf = gte_shift(value, sf); - int32_t value_12 = gte_shift(value, 1); - constexpr int32_t max = 0x7fff; - int32_t min = 0; - if (lm == 0) min = -0x8000; - - if (value_12 < -0x8000 || value_12 > 0x7fff) FLAG |= (1 << 22); - return std::clamp(value_sf, min, max); -} - -static int32_t Lm_C1(int32_t a) { return LIM(a, 0x00ff, 0x0000, (1 << 21)); } -static int32_t Lm_C2(int32_t a) { return LIM(a, 0x00ff, 0x0000, (1 << 20)); } -static int32_t Lm_C3(int32_t a) { return LIM(a, 0x00ff, 0x0000, (1 << 19)); } -static int32_t Lm_D(int64_t a, int sf) { return LIM(gte_shift(a, sf), 0xffff, 0x0000, (1 << 31) | (1 << 18)); } - -int64_t PCSX::GTE::F(int64_t a) { - s_mac0 = a; - - if (a > S64(0x7fffffff)) FLAG |= (1 << 31) | (1 << 16); - - if (a < S64(-0x80000000)) FLAG |= (1 << 31) | (1 << 15); - - return a; -} - -static int32_t Lm_G1(int64_t a) { - if (a > 0x3ff) { - FLAG |= (1 << 31) | (1 << 14); - return 0x3ff; - } - if (a < -0x400) { - FLAG |= (1 << 31) | (1 << 14); - return -0x400; - } - - return a; -} - -static int32_t Lm_G2(int64_t a) { - if (a > 0x3ff) { - FLAG |= (1 << 31) | (1 << 13); - return 0x3ff; - } - - if (a < -0x400) { - FLAG |= (1 << 31) | (1 << 13); - return -0x400; - } - - return a; -} - -static int32_t Lm_G1_ia(int64_t a) { return std::clamp(a, -0x4000000, 0x3ffffff); } -static int32_t Lm_G2_ia(int64_t a) { return std::clamp(a, -0x4000000, 0x3ffffff); } - -static int32_t Lm_H(int64_t value, int sf) { - int64_t value_sf = gte_shift(value, sf); - int32_t value_12 = gte_shift(value, 1); - constexpr int32_t max = 0x1000; - constexpr int32_t min = 0x0000; - - if (value_sf < min || value_sf > max) FLAG |= (1 << 12); - return std::clamp(value_12, min, max); -} - -void PCSX::GTE::RTPS(uint32_t op) { - GTE_LOG("%08x GTE: RTPS|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(int44((int64_t)TRX << 12) + (R11 * VX0) + (R12 * VY0) + (R13 * VZ0)); - MAC2 = A2(int44((int64_t)TRY << 12) + (R21 * VX0) + (R22 * VY0) + (R23 * VZ0)); - MAC3 = A3(int44((int64_t)TRZ << 12) + (R31 * VX0) + (R32 * VY0) + (R33 * VZ0)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3_sf(s_mac3, s_sf, lm); - pushZ(Lm_D(s_mac3, 1)); - - const int32_t h_over_sz3 = gte_divide(H, SZ3); - SXY0 = SXY1; - SXY1 = SXY2; - SX2 = - Lm_G1(F((int64_t)OFX + ((int64_t)IR1 * h_over_sz3) * (PCSX::g_emulator->config().Widescreen ? 0.75 : 1)) >> 16); - - SY2 = Lm_G2(F((int64_t)OFY + ((int64_t)IR2 * h_over_sz3)) >> 16); - - PGXP_pushSXYZ2s( - Lm_G1_ia((int64_t)OFX + (int64_t)(IR1 * h_over_sz3) * (PCSX::g_emulator->config().Widescreen ? 0.75 : 1)), - Lm_G2_ia((int64_t)OFY + (int64_t)(IR2 * h_over_sz3)), std::max((int)SZ3, H / 2), SXY2); - - // PGXP_RTPS(0, SXY2); - - MAC0 = F((int64_t)DQB + ((int64_t)DQA * h_over_sz3)); - IR0 = Lm_H(s_mac0, 1); -} - -void PCSX::GTE::NCLIP(uint32_t op) { - GTE_LOG("%08x GTE: NCLIP|", op); - FLAG = 0; - - if (PGXP_NLCIP_valid(SXY0, SXY1, SXY2)) - MAC0 = F(PGXP_NCLIP()); - else - MAC0 = F((int64_t)(SX0 * SY1) + (SX1 * SY2) + (SX2 * SY0) - (SX0 * SY2) - (SX1 * SY0) - (SX2 * SY1)); -} - -void PCSX::GTE::OP(uint32_t op) { - GTE_LOG("%08x GTE: OP|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((int64_t)(R22 * IR3) - (R33 * IR2)); - MAC2 = A2((int64_t)(R33 * IR1) - (R11 * IR3)); - MAC3 = A3((int64_t)(R11 * IR2) - (R22 * IR1)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); -} - -void PCSX::GTE::DPCS(uint32_t op) { - GTE_LOG("%08x GTE: DPCS|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((R << 16) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - (R << 16)), 0))); - MAC2 = A2((G << 16) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - (G << 16)), 0))); - MAC3 = A3((B << 16) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - (B << 16)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::INTPL(uint32_t op) { - GTE_LOG("%08x GTE: INTPL|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((IR1 << 12) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - (IR1 << 12)), 0))); - MAC2 = A2((IR2 << 12) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - (IR2 << 12)), 0))); - MAC3 = A3((IR3 << 12) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - (IR3 << 12)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::MVMVA(uint32_t op) { - GTE_LOG("%08x GTE: MVMVA|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - const int mx = GTE_MX(gteop(op)); - const int v = GTE_V(gteop(op)); - const int cv = GTE_CV(gteop(op)); - - switch (cv) { - case 2: - MAC1 = A1((int64_t)(MX12(mx) * VY(v)) + (MX13(mx) * VZ(v))); - MAC2 = A2((int64_t)(MX22(mx) * VY(v)) + (MX23(mx) * VZ(v))); - MAC3 = A3((int64_t)(MX32(mx) * VY(v)) + (MX33(mx) * VZ(v))); - Lm_B1(A1(((int64_t)CV1(cv) << 12) + (MX11(mx) * VX(v))), 0); - Lm_B2(A2(((int64_t)CV2(cv) << 12) + (MX21(mx) * VX(v))), 0); - Lm_B3(A3(((int64_t)CV3(cv) << 12) + (MX31(mx) * VX(v))), 0); - break; - - default: - MAC1 = A1(int44((int64_t)CV1(cv) << 12) + (MX11(mx) * VX(v)) + (MX12(mx) * VY(v)) + (MX13(mx) * VZ(v))); - MAC2 = A2(int44((int64_t)CV2(cv) << 12) + (MX21(mx) * VX(v)) + (MX22(mx) * VY(v)) + (MX23(mx) * VZ(v))); - MAC3 = A3(int44((int64_t)CV3(cv) << 12) + (MX31(mx) * VX(v)) + (MX32(mx) * VY(v)) + (MX33(mx) * VZ(v))); - break; - } - - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); -} - -void PCSX::GTE::NCDS(uint32_t op) { - GTE_LOG("%08x GTE: NCDS|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((int64_t)(L11 * VX0) + (L12 * VY0) + (L13 * VZ0)); - MAC2 = A2((int64_t)(L21 * VX0) + (L22 * VY0) + (L23 * VZ0)); - MAC3 = A3((int64_t)(L31 * VX0) + (L32 * VY0) + (L33 * VZ0)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(((R << 4) * IR1) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - ((R << 4) * IR1)), 0))); - MAC2 = A2(((G << 4) * IR2) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - ((G << 4) * IR2)), 0))); - MAC3 = A3(((B << 4) * IR3) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - ((B << 4) * IR3)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::CDP(uint32_t op) { - GTE_LOG("%08x GTE: CDP|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(((R << 4) * IR1) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - ((R << 4) * IR1)), 0))); - MAC2 = A2(((G << 4) * IR2) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - ((G << 4) * IR2)), 0))); - MAC3 = A3(((B << 4) * IR3) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - ((B << 4) * IR3)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::NCDT(uint32_t op) { - GTE_LOG("%08x GTE: NCDT|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - for (int v = 0; v < 3; v++) { - MAC1 = A1((int64_t)(L11 * VX(v)) + (L12 * VY(v)) + (L13 * VZ(v))); - MAC2 = A2((int64_t)(L21 * VX(v)) + (L22 * VY(v)) + (L23 * VZ(v))); - MAC3 = A3((int64_t)(L31 * VX(v)) + (L32 * VY(v)) + (L33 * VZ(v))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(((R << 4) * IR1) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - ((R << 4) * IR1)), 0))); - MAC2 = A2(((G << 4) * IR2) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - ((G << 4) * IR2)), 0))); - MAC3 = A3(((B << 4) * IR3) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - ((B << 4) * IR3)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); - } -} - -void PCSX::GTE::NCCS(uint32_t op) { - GTE_LOG("%08x GTE: NCCS|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((int64_t)(L11 * VX0) + (L12 * VY0) + (L13 * VZ0)); - MAC2 = A2((int64_t)(L21 * VX0) + (L22 * VY0) + (L23 * VZ0)); - MAC3 = A3((int64_t)(L31 * VX0) + (L32 * VY0) + (L33 * VZ0)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1((R << 4) * IR1); - MAC2 = A2((G << 4) * IR2); - MAC3 = A3((B << 4) * IR3); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::CC(uint32_t op) { - GTE_LOG("%08x GTE: CC|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - GTE_LOG("%08x GTE: CC|", op); - MAC1 = A1(int44(((int64_t)RBK) << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44(((int64_t)GBK) << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44(((int64_t)BBK) << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1((R << 4) * IR1); - MAC2 = A2((G << 4) * IR2); - MAC3 = A3((B << 4) * IR3); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::NCS(uint32_t op) { - GTE_LOG("%08x GTE: NCS|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1((int64_t)(L11 * VX0) + (L12 * VY0) + (L13 * VZ0)); - MAC2 = A2((int64_t)(L21 * VX0) + (L22 * VY0) + (L23 * VZ0)); - MAC3 = A3((int64_t)(L31 * VX0) + (L32 * VY0) + (L33 * VZ0)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::NCT(uint32_t op) { - GTE_LOG("%08x GTE: NCT|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - for (int v = 0; v < 3; v++) { - MAC1 = A1((int64_t)(L11 * VX(v)) + (L12 * VY(v)) + (L13 * VZ(v))); - MAC2 = A2((int64_t)(L21 * VX(v)) + (L22 * VY(v)) + (L23 * VZ(v))); - MAC3 = A3((int64_t)(L31 * VX(v)) + (L32 * VY(v)) + (L33 * VZ(v))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); - } -} - -void PCSX::GTE::SQR(uint32_t op) { - GTE_LOG("%08x GTE: SQR|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(IR1 * IR1); - MAC2 = A2(IR2 * IR2); - MAC3 = A3(IR3 * IR3); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); -} - -void PCSX::GTE::DCPL(uint32_t op) { - GTE_LOG("%08x GTE: DCPL|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(((R << 4) * IR1) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - ((R << 4) * IR1)), 0))); - MAC2 = A2(((G << 4) * IR2) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - ((G << 4) * IR2)), 0))); - MAC3 = A3(((B << 4) * IR3) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - ((B << 4) * IR3)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::DPCT(uint32_t op) { - GTE_LOG("%08x GTE: DPCT|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - for (int v = 0; v < 3; v++) { - MAC1 = A1((R0 << 16) + (IR0 * Lm_B1(A1(((int64_t)RFC << 12) - (R0 << 16)), 0))); - MAC2 = A2((G0 << 16) + (IR0 * Lm_B2(A2(((int64_t)GFC << 12) - (G0 << 16)), 0))); - MAC3 = A3((B0 << 16) + (IR0 * Lm_B3(A3(((int64_t)BFC << 12) - (B0 << 16)), 0))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); - } -} - -void PCSX::GTE::AVSZ3(uint32_t op) { - GTE_LOG("%08x GTE: AVSZ3|", op); - FLAG = 0; - - MAC0 = F((int64_t)(ZSF3 * SZ1) + (ZSF3 * SZ2) + (ZSF3 * SZ3)); - OTZ = Lm_D(s_mac0, 1); -} - -void PCSX::GTE::AVSZ4(uint32_t op) { - GTE_LOG("%08x GTE: AVSZ4|", op); - FLAG = 0; - - MAC0 = F((int64_t)(ZSF4 * SZ0) + (ZSF4 * SZ1) + (ZSF4 * SZ2) + (ZSF4 * SZ3)); - OTZ = Lm_D(s_mac0, 1); -} - -void PCSX::GTE::RTPT(uint32_t op) { - GTE_LOG("%08x GTE: RTPT|", op); - - int32_t h_over_sz3; - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - for (int v = 0; v < 3; v++) { - MAC1 = A1(int44((int64_t)TRX << 12) + (R11 * VX(v)) + (R12 * VY(v)) + (R13 * VZ(v))); - MAC2 = A2(int44((int64_t)TRY << 12) + (R21 * VX(v)) + (R22 * VY(v)) + (R23 * VZ(v))); - MAC3 = A3(int44((int64_t)TRZ << 12) + (R31 * VX(v)) + (R32 * VY(v)) + (R33 * VZ(v))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3_sf(s_mac3, s_sf, lm); - pushZ(Lm_D(s_mac3, 1)); - - h_over_sz3 = gte_divide(H, SZ3); - SXY0 = SXY1; - SXY1 = SXY2; - SX2 = Lm_G1( - F((int64_t)OFX + ((int64_t)IR1 * h_over_sz3) * (PCSX::g_emulator->config().Widescreen ? 0.75 : 1)) >> 16); - SY2 = Lm_G2(F((int64_t)OFY + ((int64_t)IR2 * h_over_sz3)) >> 16); - - PGXP_pushSXYZ2s( - Lm_G1_ia((int64_t)OFX + (int64_t)(IR1 * h_over_sz3) * (PCSX::g_emulator->config().Widescreen ? 0.75 : 1)), - Lm_G2_ia((int64_t)OFY + (int64_t)(IR2 * h_over_sz3)), std::max((int)SZ3, H / 2), SXY2); - - // PGXP_RTPS(v, SXY2); - } - - MAC0 = F((int64_t)DQB + ((int64_t)DQA * h_over_sz3)); - IR0 = Lm_H(s_mac0, 1); -} - -void PCSX::GTE::GPL(uint32_t op) { - GTE_LOG("%08x GTE: GPL|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(gte_shift_GPL(MAC1, s_sf) + (IR0 * IR1)); - MAC2 = A2(gte_shift_GPL(MAC2, s_sf) + (IR0 * IR2)); - MAC3 = A3(gte_shift_GPL(MAC3, s_sf) + (IR0 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::GPF(uint32_t op) { - GTE_LOG("%08x GTE: GPF|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - MAC1 = A1(IR0 * IR1); - MAC2 = A2(IR0 * IR2); - MAC3 = A3(IR0 * IR3); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); -} - -void PCSX::GTE::NCCT(uint32_t op) { - GTE_LOG("%08x GTE: NCCT|", op); - - const int lm = GTE_LM(gteop(op)); - s_sf = GTE_SF(gteop(op)); - FLAG = 0; - - for (int v = 0; v < 3; v++) { - MAC1 = A1((int64_t)(L11 * VX(v)) + (L12 * VY(v)) + (L13 * VZ(v))); - MAC2 = A2((int64_t)(L21 * VX(v)) + (L22 * VY(v)) + (L23 * VZ(v))); - MAC3 = A3((int64_t)(L31 * VX(v)) + (L32 * VY(v)) + (L33 * VZ(v))); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1(int44((int64_t)RBK << 12) + (LR1 * IR1) + (LR2 * IR2) + (LR3 * IR3)); - MAC2 = A2(int44((int64_t)GBK << 12) + (LG1 * IR1) + (LG2 * IR2) + (LG3 * IR3)); - MAC3 = A3(int44((int64_t)BBK << 12) + (LB1 * IR1) + (LB2 * IR2) + (LB3 * IR3)); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - MAC1 = A1((R << 4) * IR1); - MAC2 = A2((G << 4) * IR2); - MAC3 = A3((B << 4) * IR3); - IR1 = Lm_B1(MAC1, lm); - IR2 = Lm_B2(MAC2, lm); - IR3 = Lm_B3(MAC3, lm); - RGB0 = RGB1; - RGB1 = RGB2; - CD2 = CODE; - R2 = Lm_C1(MAC1 >> 4); - G2 = Lm_C2(MAC2 >> 4); - B2 = Lm_C3(MAC3 >> 4); - } -} diff --git a/src/core/gte.h b/src/core/gte.h index 41f26707a..e5b6d1612 100644 --- a/src/core/gte.h +++ b/src/core/gte.h @@ -1,5 +1,5 @@ /*************************************************************************** - * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * + * Copyright (C) 2026 PCSX-Redux authors * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -18,64 +18,32 @@ ***************************************************************************/ #pragma once + #include +#include #include "core/psxemulator.h" #include "core/r3000a.h" -// WTF termios +// termios defines NCCS which collides with our method name #undef NCCS -#define gteoB (PCSX::g_emulator->m_cpu->m_regs.GPR.r[_Rs_] + _Imm_) -#define gteop(instruction) ((instruction) & 0x1ffffff) - namespace PCSX { class GTE { public: - uint32_t MFC2(uint32_t code) { - // CPU[Rt] = GTE_D[Rd] - return MFC2_internal(_Rd_); - } - - uint32_t MFC2(int reg) { return MFC2_internal(reg); } - - uint32_t CFC2(uint32_t code) { - // CPU[Rt] = GTE_C[Rd] - return PCSX::g_emulator->m_cpu->m_regs.CP2C.p[_Rd_].d; - } - - void CTC2(uint32_t value, int reg) { CTC2_internal(value, reg); } - - void MTC2(uint32_t value, int reg) { MTC2_internal(value, reg); } - - void MTC2(uint32_t code) { MTC2_internal(PCSX::g_emulator->m_cpu->m_regs.GPR.r[_Rt_], _Rd_); } - void CTC2(uint32_t code) { CTC2_internal(PCSX::g_emulator->m_cpu->m_regs.GPR.r[_Rt_], _Rd_); } - void LWC2(uint32_t code) { - if (gteoB & 3) { - PCSX::g_emulator->m_cpu->m_regs.pc -= 4; - PCSX::g_system->log(PCSX::LogClass::CPU, _("Unaligned address 0x%08x in LWC2 from 0x%08x\n"), gteoB, - PCSX::g_emulator->m_cpu->m_regs.pc); - PCSX::g_emulator->m_cpu->m_regs.CP0.n.BadVAddr = gteoB; - PCSX::g_emulator->m_cpu->exception(PCSX::R3000Acpu::Exception::LoadAddressError, - PCSX::g_emulator->m_cpu->m_inDelaySlot); - return; - } - MTC2_internal(PCSX::g_emulator->m_mem->read32(gteoB), _Rt_); - } - void SWC2(uint32_t code) { - if (gteoB & 3) { - PCSX::g_emulator->m_cpu->m_regs.pc -= 4; - PCSX::g_system->log(PCSX::LogClass::CPU, _("Unaligned address 0x%08x in SWC2 from 0x%08x\n"), gteoB, - PCSX::g_emulator->m_cpu->m_regs.pc); - PCSX::g_emulator->m_cpu->m_regs.CP0.n.BadVAddr = gteoB; - PCSX::g_emulator->m_cpu->exception(PCSX::R3000Acpu::Exception::StoreAddressError, - PCSX::g_emulator->m_cpu->m_inDelaySlot); - return; - } - PCSX::g_emulator->m_mem->write32(gteoB, MFC2_internal(_Rt_)); - } - + // COP2 data transfer operations + uint32_t MFC2(uint32_t code); + uint32_t MFC2(int reg); + uint32_t CFC2(uint32_t code); + void MTC2(uint32_t value, int reg); + void MTC2(uint32_t code); + void CTC2(uint32_t value, int reg); + void CTC2(uint32_t code); + void LWC2(uint32_t code); + void SWC2(uint32_t code); + + // GTE function instructions (COP2 imm25) void RTPS(uint32_t code); void NCLIP(uint32_t code); void OP(uint32_t code); @@ -99,61 +67,31 @@ class GTE { void GPL(uint32_t code); void NCCT(uint32_t code); - // If MSB is set, return the number of leading ones, else return the number of leading zeroes - // For an input of 0, 32 is returned + // Count leading redundant sign bits. For positive: leading zeros. For negative: leading ones. + // Returns 32 for input of 0 or 0xffffffff. static uint32_t countLeadingBits(uint32_t value) { - if (value & 0x80000000) { - value = ~value; - } + if (value & 0x80000000) value = ~value; return std::countl_zero(value); } - // Count leading zeroes of a 16-bit value. For an input of 0, 16 is returned + // Count leading zeros of a 16-bit value. Returns 16 for input of 0. static uint32_t countLeadingZeros16(uint16_t value) { - // Use a 32-bit CLZ as it's what's most commonly available and Clang/GCC fail to optimize 16-bit CLZ - const auto count = std::countl_zero((uint32_t)value); - return count - 16; + return std::countl_zero(static_cast(value)) - 16; } private: - class int44 { - public: - int44(int64_t value) - : m_value(value), m_positive_overflow(value > 0x7ffffffffff), m_negative_overflow(value < -0x80000000000) {} - - int44(int64_t value, bool positive_overflow, bool negative_overflow) - : m_value(value), m_positive_overflow(positive_overflow), m_negative_overflow(negative_overflow) {} - - int44 operator+(int64_t rhs) { - int64_t value = ((m_value + rhs) << 20) >> 20; - return int44(value, m_positive_overflow || (value < 0 && m_value >= 0 && rhs >= 0), - m_negative_overflow || (value >= 0 && m_value < 0 && rhs < 0)); - } - - bool positiveOverflow() { return m_positive_overflow; } - bool negativeOverflow() { return m_negative_overflow; } - int64_t value() { return m_value; } - - private: - int64_t m_value; - bool m_positive_overflow; - bool m_negative_overflow; - }; - - int s_sf; - int64_t s_mac0; - int64_t s_mac3; - - int32_t BOUNDS(int44 value, int max_flag, int min_flag); - int32_t A1(int44 a); - int32_t A2(int44 a); - int32_t A3(int44 a); - int64_t F(int64_t a); - - uint32_t MFC2_internal(int reg); - void MTC2_internal(uint32_t value, int reg); - void CTC2_internal(uint32_t value, int reg); - void pushZ(uint16_t z); + // Template instruction implementations, parameterized on sf (shift factor) and lm (limit mode). + // Defined in gte-instructions.cc. The public methods dispatch to these based on the encoding. + template void op(uint32_t op); + template void dpcs(uint32_t op); + template void intpl(uint32_t op); + template void cdp(uint32_t op); + template void cc(uint32_t op); + template void sqr(uint32_t op); + template void dcpl(uint32_t op); + template void dpct(uint32_t op); + template void gpf(uint32_t op); + template void gpl(uint32_t op); }; } // namespace PCSX diff --git a/src/mips/common/hardware/cop2.h b/src/mips/common/hardware/cop2.h new file mode 100644 index 000000000..e3f430068 --- /dev/null +++ b/src/mips/common/hardware/cop2.h @@ -0,0 +1,310 @@ +/* + +MIT License + +Copyright (c) 2026 PCSX-Redux authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#pragma once + +// COP2 (GTE) instruction encoder and register access helpers. +// +// GTE command encoding (25-bit immediate for cop2 instruction): +// +// 24 20 19 18-17 16-15 14-13 12-11 10 9-6 5-0 +// [fake ] [pad][sf][ mx ][ v ][ cv ][ pad ][lm][pad][cmd] +// +// sf: shift flag (0 = no shift, 1 = shift right 12) +// mx: matrix select (0=RT, 1=LL, 2=LC, 3=garbage) +// v: vector select (0=V0, 1=V1, 2=V2, 3=IR) +// cv: control vector select (0=TR, 1=BK, 2=FC/bugged, 3=zero) +// lm: limit flag (0=clamp -0x8000..0x7fff, 1=clamp 0..0x7fff) +// cmd: function code (6 bits) +// +// The upper bits (20-24) contain a "fake" opcode number that Sony's +// documentation uses for instruction naming. Hardware ignores these +// bits for dispatch - only the 6-bit function code matters. + +#include + +// ========================================================================== +// Bitfield encoding +// ========================================================================== + +#define COP2_SF_SHIFT 19 +#define COP2_MX_SHIFT 17 +#define COP2_V_SHIFT 15 +#define COP2_CV_SHIFT 13 +#define COP2_LM_SHIFT 10 + +// Shift factor +#define COP2_SF0 0 // No shift +#define COP2_SF1 1 // Shift right 12 + +// Matrix select +#define COP2_MX_RT 0 // Rotation matrix +#define COP2_MX_LL 1 // Light matrix +#define COP2_MX_LC 2 // Light color matrix +#define COP2_MX_BAD 3 // Garbage matrix (undocumented) + +// Vector select +#define COP2_V_V0 0 +#define COP2_V_V1 1 +#define COP2_V_V2 2 +#define COP2_V_IR 3 // IR1/IR2/IR3 + +// Control vector select +#define COP2_CV_TR 0 // Translation vector +#define COP2_CV_BK 1 // Background color +#define COP2_CV_FC 2 // Far color (bugged) +#define COP2_CV_NONE 3 // Zero / no translation + +// Limit mode +#define COP2_LM_SIGNED 0 // Clamp IR to [-0x8000, 0x7FFF] +#define COP2_LM_UNSIGNED 1 // Clamp IR to [0, 0x7FFF] + +// Function codes (bits 5-0) +#define COP2_FN_RTPS 0x01 +#define COP2_FN_NCLIP 0x06 +#define COP2_FN_OP 0x0c +#define COP2_FN_DPCS 0x10 +#define COP2_FN_INTPL 0x11 +#define COP2_FN_MVMVA 0x12 +#define COP2_FN_NCDS 0x13 +#define COP2_FN_CDP 0x14 +#define COP2_FN_NCDT 0x16 +#define COP2_FN_NCCS 0x1b +#define COP2_FN_CC 0x1c +#define COP2_FN_NCS 0x1e +#define COP2_FN_NCT 0x20 +#define COP2_FN_SQR 0x28 +#define COP2_FN_DCPL 0x29 +#define COP2_FN_DPCT 0x2a +#define COP2_FN_AVSZ3 0x2d +#define COP2_FN_AVSZ4 0x2e +#define COP2_FN_RTPT 0x30 +#define COP2_FN_GPF 0x3d +#define COP2_FN_GPL 0x3e +#define COP2_FN_NCCT 0x3f + +// ========================================================================== +// Generic encoder: build a cop2 opcode from individual fields +// ========================================================================== + +// Generic encoder: build a cop2 opcode from individual fields. +// The fake field (bits 24-20) is Sony's instruction number. Hardware +// ignores it, but conventional encodings include it. +#define COP2_OP(fake, sf, mx, v, cv, lm, fn) \ + (((fake) << 20) | ((sf) << COP2_SF_SHIFT) | ((mx) << COP2_MX_SHIFT) | \ + ((v) << COP2_V_SHIFT) | ((cv) << COP2_CV_SHIFT) | \ + ((lm) << COP2_LM_SHIFT) | (fn)) + +// ========================================================================== +// Named instruction encoders +// ========================================================================== +// Each macro embeds the conventional fake field value from Sony's docs. +// The sf and lm parameters are user-selectable. Other fields (mx, v, cv) +// are fixed per instruction - only MVMVA exposes them. + +// Perspective transform (single / triple) +#define COP2_RTPS(sf, lm) COP2_OP( 1, sf, 0, 0, 0, lm, COP2_FN_RTPS) +#define COP2_RTPT(sf, lm) COP2_OP( 2, sf, 0, 0, 0, lm, COP2_FN_RTPT) + +// Normal clipping +#define COP2_NCLIP COP2_OP(20, 0, 0, 0, 0, 0, COP2_FN_NCLIP) + +// Cross product (rotation diagonal x IR) +#define COP2_OP_CP(sf, lm) COP2_OP(23, sf, 0, 0, 0, lm, COP2_FN_OP) + +// Depth cue +#define COP2_DPCS(sf, lm) COP2_OP( 7, sf, 0, 0, 0, lm, COP2_FN_DPCS) +#define COP2_DPCT(sf, lm) COP2_OP(15, sf, 0, 0, 0, lm, COP2_FN_DPCT) +#define COP2_DCPL(sf, lm) COP2_OP( 6, sf, 0, 0, 0, lm, COP2_FN_DCPL) +#define COP2_INTPL(sf, lm) COP2_OP( 9, sf, 0, 0, 0, lm, COP2_FN_INTPL) + +// Matrix-vector multiply and add (fully parameterized) +#define COP2_MVMVA(sf, mx, v, cv, lm) \ + COP2_OP(4, sf, mx, v, cv, lm, COP2_FN_MVMVA) + +// Lighting: normal color (single / triple) +#define COP2_NCS(sf, lm) COP2_OP(12, sf, 0, 0, 0, lm, COP2_FN_NCS) +#define COP2_NCT(sf, lm) COP2_OP(13, sf, 0, 0, 0, lm, COP2_FN_NCT) +#define COP2_NCCS(sf, lm) COP2_OP(16, sf, 0, 0, 0, lm, COP2_FN_NCCS) +#define COP2_NCCT(sf, lm) COP2_OP(17, sf, 0, 0, 0, lm, COP2_FN_NCCT) +#define COP2_NCDS(sf, lm) COP2_OP(14, sf, 0, 0, 0, lm, COP2_FN_NCDS) +#define COP2_NCDT(sf, lm) COP2_OP(15, sf, 0, 0, 0, lm, COP2_FN_NCDT) + +// Color +#define COP2_CC(sf, lm) COP2_OP(19, sf, 0, 0, 0, lm, COP2_FN_CC) +#define COP2_CDP(sf, lm) COP2_OP(18, sf, 0, 0, 0, lm, COP2_FN_CDP) + +// Square +#define COP2_SQR(sf, lm) COP2_OP(10, sf, 0, 0, 0, lm, COP2_FN_SQR) + +// Average Z +#define COP2_AVSZ3 COP2_OP(21, 1, 0, 0, 0, 0, COP2_FN_AVSZ3) +#define COP2_AVSZ4 COP2_OP(22, 1, 0, 0, 0, 0, COP2_FN_AVSZ4) + +// General purpose interpolation +#define COP2_GPF(sf, lm) COP2_OP(25, sf, 0, 0, 0, lm, COP2_FN_GPF) +#define COP2_GPL(sf, lm) COP2_OP(26, sf, 0, 0, 0, lm, COP2_FN_GPL) + +// ========================================================================== +// Execution macro +// ========================================================================== + +#define cop2_cmd(op) __asm__ volatile("cop2 %0" : : "i"(op)) + +// ========================================================================== +// Register access +// ========================================================================== + +// GTE data registers (MTC2/MFC2, $0-$31) +#define cop2_put(reg, val) do { \ + uint32_t _v = (val); \ + __asm__ volatile("mtc2 %0, $" #reg \ + "\n\tnop\n\tnop" \ + : : "r"(_v)); \ +} while (0) + +#define cop2_get(reg, dest) do { \ + __asm__ volatile("mfc2 %0, $" #reg \ + "\n\tnop\n\tnop" \ + : "=r"(dest)); \ +} while (0) + +// GTE control registers (CTC2/CFC2, $0-$31) +#define cop2_putc(reg, val) do { \ + uint32_t _v = (val); \ + __asm__ volatile("ctc2 %0, $" #reg \ + "\n\tnop\n\tnop" \ + : : "r"(_v)); \ +} while (0) + +#define cop2_getc(reg, dest) do { \ + __asm__ volatile("cfc2 %0, $" #reg \ + "\n\tnop\n\tnop" \ + : "=r"(dest)); \ +} while (0) + +// ========================================================================== +// Data register indices +// ========================================================================== + +#define COP2_VXY0 0 // VX0 (low16), VY0 (high16) +#define COP2_VZ0 1 +#define COP2_VXY1 2 +#define COP2_VZ1 3 +#define COP2_VXY2 4 +#define COP2_VZ2 5 +#define COP2_RGBC 6 // R (low8), G, B, CODE (high8) +#define COP2_OTZ 7 // 16-bit unsigned, zero-extended on read +#define COP2_IR0 8 // 16-bit signed, sign-extended on read +#define COP2_IR1 9 +#define COP2_IR2 10 +#define COP2_IR3 11 +#define COP2_SXY0 12 +#define COP2_SXY1 13 +#define COP2_SXY2 14 +#define COP2_SXYP 15 // Write pushes SXY FIFO, read returns SXY2 +#define COP2_SZ0 16 // 16-bit unsigned, zero-extended on read +#define COP2_SZ1 17 +#define COP2_SZ2 18 +#define COP2_SZ3 19 +#define COP2_RGB0 20 // Color FIFO entry 0 (oldest) +#define COP2_RGB1 21 +#define COP2_RGB2 22 // Color FIFO entry 2 (newest, written by instructions) +#define COP2_RES1 23 // Reserved (but read/write works) +#define COP2_MAC0 24 // 32-bit signed +#define COP2_MAC1 25 +#define COP2_MAC2 26 +#define COP2_MAC3 27 +#define COP2_IRGB 28 // Write expands 5-bit fields to IR1-3. Read packs IR1-3. +#define COP2_ORGB 29 // Read-only: packs IR1-3 with saturation +#define COP2_LZCS 30 // Write triggers LZCR computation +#define COP2_LZCR 31 // Read-only: leading bit count result + +// ========================================================================== +// Control register indices +// ========================================================================== + +#define COP2_R11R12 0 +#define COP2_R13R21 1 +#define COP2_R22R23 2 +#define COP2_R31R32 3 +#define COP2_R33 4 // 16-bit, sign-extended on read/write +#define COP2_TRX 5 // 32-bit +#define COP2_TRY 6 +#define COP2_TRZ 7 +#define COP2_L11L12 8 +#define COP2_L13L21 9 +#define COP2_L22L23 10 +#define COP2_L31L32 11 +#define COP2_L33 12 // 16-bit, sign-extended +#define COP2_RBK 13 // 32-bit +#define COP2_GBK 14 +#define COP2_BBK 15 +#define COP2_LR1LR2 16 +#define COP2_LR3LG1 17 +#define COP2_LG2LG3 18 +#define COP2_LB1LB2 19 +#define COP2_LB3 20 // 16-bit, sign-extended +#define COP2_RFC 21 // 32-bit +#define COP2_GFC 22 +#define COP2_BFC 23 +#define COP2_OFX 24 // 32-bit (16.16 fixed) +#define COP2_OFY 25 +#define COP2_H 26 // 16-bit unsigned (but sign-extends on CFC2 read) +#define COP2_DQA 27 // 16-bit, sign-extended +#define COP2_DQB 28 // 32-bit +#define COP2_ZSF3 29 // 16-bit, sign-extended +#define COP2_ZSF4 30 // 16-bit, sign-extended +#define COP2_FLAG 31 // FLAG register (write mask 0x7FFFF000, bit 31 recomputed) + +// ========================================================================== +// FLAG register bit definitions +// ========================================================================== + +#define COP2_FLAG_MAC1_OVER_POS (1u << 30) // MAC1 result > +0x7FFFFFFFFFF +#define COP2_FLAG_MAC2_OVER_POS (1u << 29) +#define COP2_FLAG_MAC3_OVER_POS (1u << 28) +#define COP2_FLAG_MAC1_OVER_NEG (1u << 27) // MAC1 result < -0x80000000000 +#define COP2_FLAG_MAC2_OVER_NEG (1u << 26) +#define COP2_FLAG_MAC3_OVER_NEG (1u << 25) +#define COP2_FLAG_IR1_SAT (1u << 24) // IR1 saturated (sets summary) +#define COP2_FLAG_IR2_SAT (1u << 23) // IR2 saturated (sets summary) +#define COP2_FLAG_IR3_SAT (1u << 22) // IR3 saturated (NO summary) +#define COP2_FLAG_COLOR_R_SAT (1u << 21) // Color R saturated to [0,255] (NO summary) +#define COP2_FLAG_COLOR_G_SAT (1u << 20) // Color G saturated (NO summary) +#define COP2_FLAG_COLOR_B_SAT (1u << 19) // Color B saturated (NO summary) +#define COP2_FLAG_SZ3_OTZ_SAT (1u << 18) // SZ3/OTZ saturated to [0,0xFFFF] (sets summary) +#define COP2_FLAG_DIV_OVERFLOW (1u << 17) // Division overflow H >= 2*SZ3 (sets summary) +#define COP2_FLAG_MAC0_OVER_POS (1u << 16) // MAC0 > 0x7FFFFFFF (sets summary) +#define COP2_FLAG_MAC0_OVER_NEG (1u << 15) // MAC0 < -0x80000000 (sets summary) +#define COP2_FLAG_SX2_SAT (1u << 14) // SX2 saturated to [-0x400,0x3FF] (sets summary) +#define COP2_FLAG_SY2_SAT (1u << 13) // SY2 saturated (sets summary) +#define COP2_FLAG_IR0_SAT (1u << 12) // IR0 saturated to [0,0x1000] (NO summary) +#define COP2_FLAG_ERROR (1u << 31) // Error summary (OR of bits that set summary) + +// Bits that set the error summary (bit 31): +// 30-23 (MAC overflow, IR1/IR2 sat) and 18-13 (SZ3, div, MAC0, SX2, SY2) +// Bits that do NOT set summary: 22 (IR3), 21-19 (color RGB), 12 (IR0) diff --git a/src/mips/tests/Makefile b/src/mips/tests/Makefile index 83fd8656a..c62ecf502 100644 --- a/src/mips/tests/Makefile +++ b/src/mips/tests/Makefile @@ -3,6 +3,7 @@ all: $(MAKE) -C cpu all $(MAKE) -C cop0 all $(MAKE) -C dma all + $(MAKE) -C gte all $(MAKE) -C libc all $(MAKE) -C memcpy all $(MAKE) -C memset all @@ -14,6 +15,7 @@ clean: $(MAKE) -C cpu clean $(MAKE) -C cop0 clean $(MAKE) -C dma clean + $(MAKE) -C gte clean $(MAKE) -C libc clean $(MAKE) -C memcpy clean $(MAKE) -C memset clean diff --git a/src/mips/tests/gte/Makefile b/src/mips/tests/gte/Makefile new file mode 100644 index 000000000..79c970cf1 --- /dev/null +++ b/src/mips/tests/gte/Makefile @@ -0,0 +1,50 @@ +TARGET = gte +USE_FUNCTION_SECTIONS = false +TYPE = ps-exe + +SRCS = \ +../uC-sdk-glue/BoardConsole.c \ +../uC-sdk-glue/BoardInit.c \ +../uC-sdk-glue/init.c \ +\ +../../../../third_party/uC-sdk/libc/src/cxx-glue.c \ +../../../../third_party/uC-sdk/libc/src/errno.c \ +../../../../third_party/uC-sdk/libc/src/initfini.c \ +../../../../third_party/uC-sdk/libc/src/malloc.c \ +../../../../third_party/uC-sdk/libc/src/qsort.c \ +../../../../third_party/uC-sdk/libc/src/rand.c \ +../../../../third_party/uC-sdk/libc/src/reent.c \ +../../../../third_party/uC-sdk/libc/src/stdio.c \ +../../../../third_party/uC-sdk/libc/src/string.c \ +../../../../third_party/uC-sdk/libc/src/strto.c \ +../../../../third_party/uC-sdk/libc/src/unistd.c \ +../../../../third_party/uC-sdk/libc/src/xprintf.c \ +../../../../third_party/uC-sdk/libc/src/xscanf.c \ +../../../../third_party/uC-sdk/libc/src/yscanf.c \ +../../../../third_party/uC-sdk/os/src/devfs.c \ +../../../../third_party/uC-sdk/os/src/filesystem.c \ +../../../../third_party/uC-sdk/os/src/fio.c \ +../../../../third_party/uC-sdk/os/src/hash-djb2.c \ +../../../../third_party/uC-sdk/os/src/init.c \ +../../../../third_party/uC-sdk/os/src/osdebug.c \ +../../../../third_party/uC-sdk/os/src/romfs.c \ +../../../../third_party/uC-sdk/os/src/sbrk.c \ + + +CPPFLAGS = -DNOFLOATINGPOINT +CPPFLAGS += -I. +CPPFLAGS += -I../../../../third_party/uC-sdk/libc/include +CPPFLAGS += -I../../../../third_party/uC-sdk/os/include +CPPFLAGS += -I../../../../third_party/libcester/include +CPPFLAGS += -I../../openbios/uC-sdk-glue + +ifeq ($(PCSX_TESTS),true) +CPPFLAGS += -DPCSX_TESTS=1 +endif + +SRCS += \ +../../common/syscalls/printf.s \ +../../common/crt0/uC-sdk-crt0.s \ +gte.c \ + +include ../../common.mk diff --git a/src/mips/tests/gte/gte-avsz.c b/src/mips/tests/gte/gte-avsz.c new file mode 100644 index 000000000..269517c14 --- /dev/null +++ b/src/mips/tests/gte/gte-avsz.c @@ -0,0 +1,82 @@ +// AVSZ3 / AVSZ4: Average Z value computation + +CESTER_TEST(avsz3_basic, gte_tests, + cop2_put(17, 100); + cop2_put(18, 200); + cop2_put(19, 300); + cop2_putc(29, 0x555); // ZSF3 ~ 4096/3 + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + int32_t mac0; + uint32_t otz; + cop2_get(24, mac0); + cop2_get(7, otz); + cester_assert_int_eq(819000, mac0); + cester_assert_uint_eq(199, otz); +) + +CESTER_TEST(avsz4_basic, gte_tests, + cop2_put(16, 100); + cop2_put(17, 200); + cop2_put(18, 300); + cop2_put(19, 400); + cop2_putc(30, 0x400); // ZSF4 = 4096/4 + gte_clear_flag(); + cop2_cmd(COP2_AVSZ4); + int32_t mac0; + uint32_t otz; + cop2_get(24, mac0); + cop2_get(7, otz); + cester_assert_int_eq(1024000, mac0); + cester_assert_uint_eq(250, otz); +) + +// Verify AVSZ3 uses SZ1+SZ2+SZ3, not SZ0+SZ1+SZ2 +CESTER_TEST(avsz3_uses_sz123, gte_tests, + cop2_put(16, 1000); // SZ0 - should be ignored + cop2_put(17, 2000); // SZ1 + cop2_put(18, 3000); // SZ2 + cop2_put(19, 4000); // SZ3 + cop2_putc(29, 0x1000); // ZSF3 = 1.0 in 4.12 + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + int32_t mac0; + cop2_get(24, mac0); + // SZ1+SZ2+SZ3 = 9000, * 4096 = 36864000 + cester_assert_int_eq(36864000, mac0); +) + +// OTZ saturation: result > 0xffff +CESTER_TEST(avsz3_otz_saturate, gte_tests, + cop2_put(17, 0xffff); + cop2_put(18, 0xffff); + cop2_put(19, 0xffff); + cop2_putc(29, 0x1000); + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + uint32_t otz, flag; + cop2_get(7, otz); + flag = gte_read_flag(); + cester_assert_uint_eq(0xffff, otz); + // FLAG.18 (OTZ saturation) should be set + uint32_t flag18 = (flag >> 18) & 1; + cester_assert_uint_eq(1, flag18); +) + +// Negative ZSF producing negative MAC0 +CESTER_TEST(avsz3_negative_zsf, gte_tests, + cop2_put(17, 100); + cop2_put(18, 200); + cop2_put(19, 300); + cop2_putc(29, 0xf000); // ZSF3 = negative (sign-extended) + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + int32_t mac0; + uint32_t otz, flag; + cop2_get(24, mac0); + cop2_get(7, otz); + flag = gte_read_flag(); + ramsyscall_printf("AVSZ3 neg ZSF: MAC0=%d OTZ=%u FLAG=0x%08x\n", mac0, otz, flag); + // Negative result should saturate OTZ to 0 + cester_assert_uint_eq(0, otz); +) diff --git a/src/mips/tests/gte/gte-depthcue.c b/src/mips/tests/gte/gte-depthcue.c new file mode 100644 index 000000000..86629ed5d --- /dev/null +++ b/src/mips/tests/gte/gte-depthcue.c @@ -0,0 +1,215 @@ +// Depth cue instructions: DPCS, DPCT, DCPL, INTPL + +// DPCS: depth cue single - interpolates RGBC toward far color using IR0 +CESTER_TEST(dpcs_basic, gte_tests, + gte_set_far_color(0x1000, 0x1000, 0x1000); // FC = (4096, 4096, 4096) + cop2_put(6, 0x00808080); // RGBC: R=0x80, G=0x80, B=0x80 + cop2_put(8, 0x0800); // IR0 = 0.5 + gte_clear_flag(); + cop2_cmd(COP2_DPCS(1, 0)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("DPCS: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + cester_assert_int_eq(3072, mac1); + cester_assert_int_eq(3072, mac2); + cester_assert_int_eq(3072, mac3); + cester_assert_uint_eq(0x00c0c0c0, rgb2); + // Formula: MAC = R<<16 + IR0*(FC<<12 - R<<16) >> shift + // R<<16 = 0x80<<16 = 0x800000 + // FC<<12 = 0x1000<<12 = 0x1000000 + // diff = 0x1000000 - 0x800000 = 0x800000 + // IR0 * diff = 0x800 * 0x800000 ... this is large +) + +// DPCS with IR0=0: no interpolation, output = input color +CESTER_TEST(dpcs_ir0_zero, gte_tests, + gte_set_far_color(0xff00, 0xff00, 0xff00); + cop2_put(6, 0x00406080); // R=0x80, G=0x60, B=0x40 + cop2_put(8, 0); // IR0 = 0 + gte_clear_flag(); + cop2_cmd(COP2_DPCS(1, 0)); + uint32_t rgb2; + cop2_get(22, rgb2); + uint8_t r = rgb2 & 0xff; + uint8_t g = (rgb2 >> 8) & 0xff; + uint8_t b = (rgb2 >> 16) & 0xff; + // With IR0=0, interpolation weight is 0, so output = input + cester_assert_uint_eq(0x80, r); + cester_assert_uint_eq(0x60, g); + cester_assert_uint_eq(0x40, b); +) + +// DPCS with IR0=0x1000: full interpolation toward far color +CESTER_TEST(dpcs_ir0_max, gte_tests, + gte_set_far_color(0x1000, 0x800, 0x400); // FC scaled + cop2_put(6, 0x00000000); // RGBC: all zero + cop2_put(8, 0x1000); // IR0 = 1.0 + gte_clear_flag(); + cop2_cmd(COP2_DPCS(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + ramsyscall_printf("DPCS max: MAC=(%d,%d,%d)\n", mac1, mac2, mac3); + cester_assert_int_eq(4096, mac1); + cester_assert_int_eq(2048, mac2); + cester_assert_int_eq(1024, mac3); + // With R=0, MAC = 0 + IR0 * (FC<<12 - 0) = 1.0 * FC<<12 >> 12 = FC +) + +// DPCS color FIFO push and CODE preservation +CESTER_TEST(dpcs_code_preserved, gte_tests, + gte_set_far_color(0, 0, 0); + cop2_put(6, 0xab102030); // CODE=0xAB, R=0x30, G=0x20, B=0x10 + cop2_put(8, 0); + gte_clear_flag(); + cop2_cmd(COP2_DPCS(1, 0)); + uint32_t rgb2; + cop2_get(22, rgb2); + cester_assert_uint_eq(0xab, (rgb2 >> 24) & 0xff); // CODE preserved +) + +// DPCT: depth cue triple - reads from color FIFO front (RGB0), not RGBC +CESTER_TEST(dpct_reads_fifo, gte_tests, + gte_set_far_color(0, 0, 0); + // Set up color FIFO with known values + cop2_put(20, 0x00102030); // RGB0: R=0x30, G=0x20, B=0x10 + cop2_put(21, 0x00405060); // RGB1 + cop2_put(22, 0x00708090); // RGB2 + cop2_put(6, 0xff000000); // RGBC: CODE=0xff, colors=0 (should NOT be used as input) + cop2_put(8, 0); // IR0=0: output = input + gte_clear_flag(); + cop2_cmd(COP2_DPCT(1, 0)); + // After 3 iterations, the FIFO has been processed + uint32_t rgb0, rgb1, rgb2; + cop2_get(20, rgb0); + cop2_get(21, rgb1); + cop2_get(22, rgb2); + ramsyscall_printf("DPCT: RGB0=0x%08x RGB1=0x%08x RGB2=0x%08x\n", rgb0, rgb1, rgb2); + // Each iteration: reads R0/G0/B0 (front of FIFO), pushes result + // With IR0=0, each iteration's output = its input color + // Iteration 1: reads RGB0(0x102030), pushes -> FIFO shifts + // Iteration 2: reads new RGB0 (was RGB1: 0x405060), pushes + // Iteration 3: reads new RGB0 (was RGB2: 0x708090), pushes + // Result FIFO should contain the 3 processed colors + // CODE comes from RGBC (0xff) + cester_assert_uint_eq(0xff102030, rgb0); + cester_assert_uint_eq(0xff405060, rgb1); + cester_assert_uint_eq(0xff708090, rgb2); +) + +// DCPL: depth cue with pre-computed light +CESTER_TEST(dcpl_basic, gte_tests, + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(6, 0x00808080); // RGBC + // Pre-computed light in IR1-3 + cop2_put(9, 0x1000); // IR1 = 1.0 + cop2_put(10, 0x0800); // IR2 = 0.5 + cop2_put(11, 0x0400); // IR3 = 0.25 + cop2_put(8, 0); // IR0 = 0 (no depth cue) + gte_clear_flag(); + cop2_cmd(COP2_DCPL(1, 0)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("DCPL: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + cester_assert_int_eq(2048, mac1); + cester_assert_int_eq(1024, mac2); + cester_assert_int_eq(512, mac3); + cester_assert_uint_eq(0x00204080, rgb2); + // With IR0=0: MAC = (R<<4)*IR, no depth cue interpolation + // MAC1 = (0x80 << 4) * 0x1000 = 0x800 * 0x1000 = 0x800000 + // After >>12: 0x800 = 2048 -> IR1, /16 = 128 -> R2 +) + +// DCPL with depth cue interpolation +CESTER_TEST(dcpl_with_depth, gte_tests, + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(6, 0x00808080); + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(8, 0x0800); // IR0 = 0.5 + gte_clear_flag(); + cop2_cmd(COP2_DCPL(1, 0)); + int32_t mac1, mac2, mac3; + uint32_t flag; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + flag = gte_read_flag(); + ramsyscall_printf("DCPL depth: MAC=(%d,%d,%d) FLAG=0x%08x\n", mac1, mac2, mac3, flag); + cester_assert_int_eq(3072, mac1); + cester_assert_int_eq(3072, mac2); + cester_assert_int_eq(3072, mac3); + cester_assert_uint_eq(0x00000000, flag); +) + +// INTPL: interpolation (depth cue on IR vector directly) +CESTER_TEST(intpl_basic, gte_tests, + gte_set_far_color(0x1000, 0x2000, 0x3000); + cop2_put(9, 0x100); // IR1 + cop2_put(10, 0x200); // IR2 + cop2_put(11, 0x300); // IR3 + cop2_put(8, 0); // IR0 = 0: no interpolation + gte_clear_flag(); + cop2_cmd(COP2_INTPL(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // With IR0=0: MAC = IR << 12 >> shift = IR (with sf=1) + cester_assert_int_eq(0x100, mac1); + cester_assert_int_eq(0x200, mac2); + cester_assert_int_eq(0x300, mac3); +) + +CESTER_TEST(intpl_half, gte_tests, + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(9, 0); + cop2_put(10, 0); + cop2_put(11, 0); + cop2_put(8, 0x0800); // IR0 = 0.5 + gte_clear_flag(); + cop2_cmd(COP2_INTPL(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + ramsyscall_printf("INTPL half: MAC=(%d,%d,%d)\n", mac1, mac2, mac3); + cester_assert_int_eq(2048, mac1); + cester_assert_int_eq(2048, mac2); + cester_assert_int_eq(2048, mac3); + // IR=0, FC=0x1000, IR0=0.5 + // MAC = 0 + 0.5*(FC - 0) = 0.5 * 0x1000 = 0x800 +) + +// INTPL pushes color FIFO +CESTER_TEST(intpl_color_push, gte_tests, + gte_set_far_color(0, 0, 0); + cop2_put(9, 0x0ff0); // MAC1=0x0ff0, /16 = 255 + cop2_put(10, 0x0800); // MAC2=0x0800, /16 = 128 + cop2_put(11, 0x0010); // MAC3=0x0010, /16 = 1 + cop2_put(8, 0); + cop2_put(6, 0xcc000000); // CODE=0xCC + gte_clear_flag(); + cop2_cmd(COP2_INTPL(1, 0)); + uint32_t rgb2; + cop2_get(22, rgb2); + uint8_t cd = (rgb2 >> 24) & 0xff; + uint8_t r = rgb2 & 0xff; + uint8_t g = (rgb2 >> 8) & 0xff; + uint8_t b = (rgb2 >> 16) & 0xff; + ramsyscall_printf("INTPL color: R=%u G=%u B=%u CD=0x%02x raw=0x%08x\n", r, g, b, cd, rgb2); + cester_assert_uint_eq(255, r); + cester_assert_uint_eq(128, g); + cester_assert_uint_eq(1, b); + cester_assert_uint_eq(0xcc, cd); +) diff --git a/src/mips/tests/gte/gte-edgecase.c b/src/mips/tests/gte/gte-edgecase.c new file mode 100644 index 000000000..9c06bdb79 --- /dev/null +++ b/src/mips/tests/gte/gte-edgecase.c @@ -0,0 +1,560 @@ +// Edge cases and degenerate inputs: division, overflow boundaries, +// zero matrices, negative Z, FLAG verification per instruction. + +// ========================================================================== +// Division edge cases (tested via RTPS) +// ========================================================================== + +// Division by zero: SZ3=0 +CESTER_TEST(edge_div_by_zero, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 200); + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 0); // VZ0=0 -> SZ3=0 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sz3, sxy2, flag; + cop2_get(19, sz3); + cop2_get(14, sxy2); + flag = gte_read_flag(); + ramsyscall_printf("div/0: SZ3=%u SXY2=0x%08x FLAG=0x%08x\n", sz3, sxy2, flag); + // SZ3=0, H=200 -> H >= SZ3*2 -> division overflow (FLAG.17) + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(1, f17); +) + +// H=0: zero numerator +CESTER_TEST(edge_div_h_zero, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 0); // H=0 + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("H=0: SX=%d FLAG=0x%08x\n", sx, flag); + // H=0, SZ3=1000 -> H < SZ3*2 -> no overflow, quotient = 0 + // SX = OFX/65536 + IR1 * 0 = 0 + cester_assert_int_eq(0, sx); + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(0, f17); +) + +// Division overflow boundary: H=SZ3*2-1 (just under, no overflow) +CESTER_TEST(edge_div_boundary_under, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 199); // H=199 + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 100); // SZ3=100 -> H < 200 -> no overflow + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + uint32_t f17 = (flag >> 17) & 1; + ramsyscall_printf("div boundary under: H=199 SZ3=100 FLAG.17=%u\n", f17); + cester_assert_uint_eq(0, f17); +) + +// Division overflow boundary: H=SZ3*2 (exactly at overflow) +CESTER_TEST(edge_div_boundary_at, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 200); // H=200 + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 100); // SZ3=100 -> H >= 200 -> overflow + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + uint32_t f17 = (flag >> 17) & 1; + ramsyscall_printf("div boundary at: H=200 SZ3=100 FLAG.17=%u\n", f17); + cester_assert_uint_eq(1, f17); +) + +// Division overflow boundary: H=SZ3*2+1 (just over, definitely overflow) +CESTER_TEST(edge_div_boundary_over, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 201); // H=201 + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 100); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(1, f17); +) + +// ========================================================================== +// IR saturation boundaries +// ========================================================================== + +// IR at exactly 0x7FFF (max positive, no saturation) +CESTER_TEST(edge_ir_max_no_sat, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 0x7fff); + cop2_put(10, 0x7fff); + cop2_put(11, 0x7fff); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t ir1; + cop2_get(9, ir1); + uint32_t flag = gte_read_flag(); + // 0x1000 * 0x7FFF >> 12 = 0x7FFF -> no saturation + cester_assert_uint_eq(0x7fff, ir1); + // FLAG.24 (IR1 sat) should NOT be set + uint32_t f24 = (flag >> 24) & 1; + cester_assert_uint_eq(0, f24); +) + +// IR just over 0x7FFF (triggers saturation) +CESTER_TEST(edge_ir_over_max, gte_tests, + cop2_put(8, 0x1001); // IR0 = 0x1001 (slightly > 1.0) + cop2_put(9, 0x7fff); + cop2_put(10, 0x100); + cop2_put(11, 0x100); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t ir1; + cop2_get(9, ir1); + uint32_t flag = gte_read_flag(); + ramsyscall_printf("IR over max: IR1=0x%04x FLAG=0x%08x\n", ir1 & 0xffff, flag); + // 0x1001 * 0x7FFF >> 12 = 0x8000 -> saturates to 0x7FFF + cester_assert_uint_eq(0x7fff, ir1); + uint32_t f24 = (flag >> 24) & 1; + cester_assert_uint_eq(1, f24); +) + +// ========================================================================== +// MAC0 overflow boundaries +// ========================================================================== + +// NCLIP with values designed to overflow MAC0 +CESTER_TEST(edge_mac0_positive_overflow, gte_tests, + // Maximize cross product: opposing corners of 16-bit range + cop2_put(12, (0x7fff << 16) | 0x7fff); // (32767, 32767) + cop2_put(13, (0x8000 << 16) | 0x8000); // (-32768, -32768) + cop2_put(14, 0x00000000); // (0, 0) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + uint32_t flag; + cop2_get(24, mac0); + flag = gte_read_flag(); + // SX0*(SY1-SY2) + SX1*(SY2-SY0) + SX2*(SY0-SY1) + // = 32767*(-32768) + (-32768)*(0-32767) + 0 + // = -1073709056 + (-32768)*(-32767) + // = -1073709056 + 1073709056 = 0... hmm, that's zero + // Actually: 32767*(-32768-0) + (-32768)*(0-32767) + 0*(32767-(-32768)) + // = 32767*(-32768) + (-32768)*(-32767) + // = -1073709056 + 1073709056 = 0 + // Need asymmetric triangle for overflow + ramsyscall_printf("MAC0 overflow test: MAC0=%d FLAG=0x%08x (F16=%u F15=%u)\n", + mac0, flag, (flag >> 16) & 1, (flag >> 15) & 1); + // Cancels to zero - no actual overflow despite the test name + cester_assert_int_eq(0, mac0); + cester_assert_uint_eq(0x00000000, flag); +) + +// NCLIP that actually overflows MAC0 negatively +CESTER_TEST(edge_mac0_negative_overflow, gte_tests, + // (32767, 32767), (-32768, 32767), (32767, -32768) + cop2_put(12, (0x7fff << 16) | 0x7fff); + cop2_put(13, (0x7fff << 16) | 0x8000); + cop2_put(14, (0x8000 << 16) | 0x7fff); + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + uint32_t flag; + cop2_get(24, mac0); + flag = gte_read_flag(); + ramsyscall_printf("MAC0 neg overflow: MAC0=%d FLAG=0x%08x\n", mac0, flag); + // The cross product should be large negative + // FLAG.15 (MAC0 negative overflow) should be set + cester_assert_int_eq(-131071, mac0); + // FLAG.16 set from intermediate positive overflow in NCLIP's chained additions + uint32_t f16 = (flag >> 16) & 1; + cester_assert_uint_eq(1, f16); +) + +// ========================================================================== +// Color saturation boundaries +// ========================================================================== + +// Color output at exactly 255 (no saturation) +CESTER_TEST(edge_color_at_255, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 0x0ff0); // MAC1 = 0x0ff0, /16 = 255 + cop2_put(10, 0x0ff0); + cop2_put(11, 0x0ff0); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t rgb2, flag; + cop2_get(22, rgb2); + flag = gte_read_flag(); + uint32_t r_255 = rgb2 & 0xff; + cester_assert_uint_eq(255, r_255); + uint32_t f21_255 = (flag >> 21) & 1; + cester_assert_uint_eq(0, f21_255); // No color saturation flag +) + +// Color output at 256 (saturates to 255, FLAG set) +CESTER_TEST(edge_color_at_256, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 0x1000); // MAC1 = 0x1000, /16 = 256 -> saturates + cop2_put(10, 0x100); + cop2_put(11, 0x100); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t rgb2, flag; + cop2_get(22, rgb2); + flag = gte_read_flag(); + uint32_t r_256 = rgb2 & 0xff; + cester_assert_uint_eq(255, r_256); // saturated to 255 + uint32_t f21_256 = (flag >> 21) & 1; + cester_assert_uint_eq(1, f21_256); // R saturation flag set +) + +// Negative color (saturates to 0, FLAG set) +CESTER_TEST(edge_color_negative, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 0xffff8000); // IR1 = -32768 -> negative MAC1 -> color=0 + cop2_put(10, 0x100); + cop2_put(11, 0x100); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t rgb2, flag; + cop2_get(22, rgb2); + flag = gte_read_flag(); + uint32_t r_neg = rgb2 & 0xff; + cester_assert_uint_eq(0, r_neg); // clamped to 0 + uint32_t f21_neg = (flag >> 21) & 1; + cester_assert_uint_eq(1, f21_neg); // Color R saturation flag +) + +// ========================================================================== +// Screen coordinate saturation +// ========================================================================== + +// SX at exactly 0x3FF (max, no saturation) +CESTER_TEST(edge_sx_at_max, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0x3ff << 16); // OFX = 0x3FF in 16.16 + cop2_putc(25, 0); + cop2_putc(26, 0); // H=0 -> quotient=0 -> SX = OFX only + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, 0); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + cester_assert_int_eq(0x3ff, sx); + uint32_t f14 = (flag >> 14) & 1; + cester_assert_uint_eq(0, f14); // no saturation +) + +// SX at 0x400 (saturates to 0x3FF) +CESTER_TEST(edge_sx_over_max, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0x400 << 16); // OFX = 0x400 + cop2_putc(25, 0); + cop2_putc(26, 0); + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, 0); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + cester_assert_int_eq(0x3ff, sx); // saturated + uint32_t f14 = (flag >> 14) & 1; + cester_assert_uint_eq(1, f14); +) + +// SY at -0x400 (min, no saturation) +CESTER_TEST(edge_sy_at_min, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, (uint32_t)(-0x400) << 16); // OFY = -0x400 + cop2_putc(26, 0); + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, 0); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sy = (int16_t)(sxy2 >> 16); + cester_assert_int_eq(-0x400, sy); + uint32_t f13 = (flag >> 13) & 1; + cester_assert_uint_eq(0, f13); +) + +// ========================================================================== +// Degenerate matrix states +// ========================================================================== + +// Zero rotation matrix: everything should become translation only +CESTER_TEST(edge_zero_matrix, gte_tests, + cop2_putc(0, 0); + cop2_putc(1, 0); + cop2_putc(2, 0); + cop2_putc(3, 0); + cop2_putc(4, 0); + gte_set_translation(100, 200, 300); + cop2_put(0, (0x7fff << 16) | 0x7fff); // large vertex + cop2_put(1, 0x7fff); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // Zero matrix * anything = 0, plus translation + cester_assert_int_eq(100, mac1); + cester_assert_int_eq(200, mac2); + cester_assert_int_eq(300, mac3); +) + +// Max magnitude matrix elements +CESTER_TEST(edge_max_matrix, gte_tests, + cop2_putc(0, 0x7fff7fff); // R11=R12=0x7FFF + cop2_putc(1, 0x7fff7fff); + cop2_putc(2, 0x7fff7fff); + cop2_putc(3, 0x7fff7fff); + cop2_putc(4, 0x7fff); + gte_set_translation(0, 0, 0); + cop2_put(0, (0x7fff << 16) | 0x7fff); + cop2_put(1, 0x7fff); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, COP2_MX_RT, COP2_V_V0, COP2_CV_NONE, 0)); + int32_t mac1; + uint32_t flag; + cop2_get(25, mac1); + flag = gte_read_flag(); + ramsyscall_printf("max matrix: MAC1=%d FLAG=0x%08x\n", mac1, flag); + // 3 * 0x7FFF * 0x7FFF = 3 * 1073676289 = 3221028867 + // >> 12 = 786380, fits in 32-bit MAC. But 44-bit accumulator overflow? + cester_assert_int_eq(786384, mac1); + cester_assert_uint_eq(0x81c00000, flag); +) + +// Negative Z in RTPS (behind camera) +CESTER_TEST(edge_negative_z, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, -1000); // TRZ = -1000 + gte_set_screen(160 << 16, 120 << 16, 200); + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 0); // VZ=0, MAC3 = TRZ = -1000 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sz3, flag; + int32_t mac3; + cop2_get(19, sz3); + cop2_get(27, mac3); + flag = gte_read_flag(); + ramsyscall_printf("neg Z: MAC3=%d SZ3=%u FLAG=0x%08x\n", mac3, sz3, flag); + // MAC3 = -1000, SZ3 should saturate to 0 (Lm_D clamps to [0, 0xFFFF]) + cester_assert_int_eq(-1000, mac3); + cester_assert_uint_eq(0, sz3); // saturated + uint32_t f18 = (flag >> 18) & 1; + cester_assert_uint_eq(1, f18); // OTZ/SZ3 saturation +) + +// SQR of -0x8000 (minimum 16-bit signed) +CESTER_TEST(edge_sqr_min_negative, gte_tests, + cop2_put(9, 0xffff8000); // IR1 = -32768 + cop2_put(10, 0); + cop2_put(11, 0); + gte_clear_flag(); + cop2_cmd(COP2_SQR(0, 0)); + int32_t mac1; + uint32_t flag; + cop2_get(25, mac1); + flag = gte_read_flag(); + // (-32768)^2 = 1073741824 = 0x40000000 (fits in 32-bit signed) + ramsyscall_printf("SQR(-32768): MAC1=%d FLAG=0x%08x\n", mac1, flag); + cester_assert_int_eq(1073741824, mac1); +) + +// GPL with negative MAC base +CESTER_TEST(edge_gpl_negative_base, gte_tests, + cop2_put(25, -10000); // MAC1 = -10000 + cop2_put(26, -20000); + cop2_put(27, -30000); + cop2_put(8, 0x1000); // IR0 = 1.0 + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPL(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // GPL sf=1: MAC = (old_MAC << 12 + IR0*IR) >> 12 + // = ((-10000 << 12) + 4096*100) >> 12 + // = (-40960000 + 409600) >> 12 + // = -40550400 >> 12 = -9900 + cester_assert_int_eq(-9900, mac1); + cester_assert_int_eq(-19800, mac2); + cester_assert_int_eq(-29700, mac3); +) + +// ========================================================================== +// FLAG cleared at instruction start +// ========================================================================== + +// Verify FLAG is reset to 0 at the start of each GTE instruction, +// not accumulating from previous instructions +CESTER_TEST(edge_flag_cleared_each_instruction, gte_tests, + // First: trigger IR1 saturation via GPF + cop2_put(8, 0x1001); + cop2_put(9, 0x7fff); + cop2_put(10, 0x100); + cop2_put(11, 0x100); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t flag1 = gte_read_flag(); + uint32_t f24_1 = (flag1 >> 24) & 1; + cester_assert_uint_eq(1, f24_1); // IR1 saturated + + // Now: run a clean GPF that should NOT trigger any flags + cop2_put(8, 0x1000); + cop2_put(9, 0x100); + cop2_put(10, 0x100); + cop2_put(11, 0x100); + cop2_put(6, 0x00808080); + // Do NOT call gte_clear_flag() - the instruction should clear it itself + cop2_cmd(COP2_GPF(1, 0)); + uint32_t flag2 = gte_read_flag(); + // FLAG should be 0 - the instruction clears it at start + cester_assert_uint_eq(0, flag2); +) + +// ========================================================================== +// IR0 saturation boundary +// ========================================================================== + +// IR0 at exactly 0x1000 (max, no saturation) +CESTER_TEST(edge_ir0_at_max, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 200); + cop2_putc(27, 0); // DQA = 0 + cop2_putc(28, 0x1000000); // DQB = 0x1000000 -> MAC0=DQB, IR0=DQB>>12=0x1000 + cop2_put(0, 0); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t ir0, flag; + cop2_get(8, ir0); + flag = gte_read_flag(); + ramsyscall_printf("IR0 max: IR0=0x%04x FLAG=0x%08x\n", ir0 & 0xffff, flag); + // IR0 should be exactly 0x1000 + uint32_t f12 = (flag >> 12) & 1; + cester_assert_uint_eq(0, f12); // no saturation +) + +// ========================================================================== +// OTZ saturation boundary +// ========================================================================== + +// OTZ at exactly 0xFFFF (max, triggers saturation) +CESTER_TEST(edge_otz_at_max, gte_tests, + // Need MAC0 >> 12 = 0xFFFF -> MAC0 = 0xFFFF << 12 = 0xFFFF000 + // ZSF3 * (SZ1+SZ2+SZ3) = 0xFFFF000 + // Use ZSF3 = 0x1000, SZ_sum = 0xFFFF -> each SZ = 0x5555 + cop2_put(17, 0x5555); + cop2_put(18, 0x5555); + cop2_put(19, 0x5555); + cop2_putc(29, 0x1000); + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + uint32_t otz, flag; + cop2_get(7, otz); + flag = gte_read_flag(); + ramsyscall_printf("OTZ max: OTZ=%u FLAG=0x%08x\n", otz, flag); + // 0x5555*3 = 0xFFFF, * 0x1000 = 0xFFFF000, >> 12 = 0xFFFF + cester_assert_uint_eq(0xffff, otz); +) + +// ========================================================================== +// Depth cue inner clamp (FC - input can go negative) +// ========================================================================== + +// DPCS where FC << input color (FC-input negative, inner lm=0 clamp) +CESTER_TEST(edge_depthcue_fc_less_than_input, gte_tests, + gte_set_far_color(0, 0, 0); // FC = 0 (dark fog) + cop2_put(6, 0x00ffffff); // RGBC: R=G=B=0xFF (bright) + cop2_put(8, 0x0800); // IR0 = 0.5 + gte_clear_flag(); + cop2_cmd(COP2_DPCS(1, 0)); + int32_t mac1; + uint32_t rgb2, flag; + cop2_get(25, mac1); + cop2_get(22, rgb2); + flag = gte_read_flag(); + ramsyscall_printf("DPCS FC should produce intermediate result + cester_assert_int_eq(2040, mac1); + cester_assert_uint_eq(0x007f7f7f, rgb2); + cester_assert_uint_eq(0x00000000, flag); +) + +// ========================================================================== +// INTPL where FC < IR (interpolation goes backward) +// ========================================================================== + +CESTER_TEST(edge_intpl_fc_less_than_ir, gte_tests, + gte_set_far_color(0, 0, 0); // FC = 0 + cop2_put(9, 0x1000); // IR = 0x1000 (> FC) + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(8, 0x0800); // IR0 = 0.5 + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_INTPL(1, 0)); + int32_t mac1; + uint32_t flag; + cop2_get(25, mac1); + flag = gte_read_flag(); + ramsyscall_printf("INTPL FC> 12 = -0x1000 -> clamped to -0x1000 (in range) + // MAC = 0x1000<<12 + 0x800 * (-0x1000) = 0x1000000 + (-0x800000) + // >> 12 = (0x800000) >> 12 = 0x800 = 2048 + cester_assert_int_eq(2048, mac1); + cester_assert_uint_eq(0x00000000, flag); +) diff --git a/src/mips/tests/gte/gte-encoding.c b/src/mips/tests/gte/gte-encoding.c new file mode 100644 index 000000000..64d9962c3 --- /dev/null +++ b/src/mips/tests/gte/gte-encoding.c @@ -0,0 +1,369 @@ +// GTE instruction encoding tests: systematic sweep of bitfield parameters. +// +// Helper macros for unrolled MVMVA sweeps. Defined at file scope so they +// survive cester's double-include of __BASE_FILE__. + +#define MVMVA_T(mx, v, cv) do { \ + if ((v) == 3) { cop2_put(9, 0x100); cop2_put(10, 0x200); cop2_put(11, 0x300); } \ + gte_clear_flag(); \ + cop2_cmd(COP2_MVMVA(1, mx, v, cv, 0)); \ +} while (0) + +#define MVMVA_MX3_V(v) do { \ + if ((v) == 3) { cop2_put(9, 0x400); cop2_put(10, 0x500); cop2_put(11, 0x600); } \ + gte_clear_flag(); \ + cop2_cmd(COP2_MVMVA(1, 3, v, 3, 0)); \ + int32_t _m1, _m2, _m3; \ + cop2_get(25, _m1); cop2_get(26, _m2); cop2_get(27, _m3); \ + ramsyscall_printf("MVMVA mx=3 v=%d: MAC=(%d,%d,%d)\n", v, _m1, _m2, _m3); \ +} while (0) + +#define MVMVA_CV2_MX(mx) do { \ + cop2_put(9, 0x100); cop2_put(10, 0x200); cop2_put(11, 0x300); \ + gte_clear_flag(); \ + cop2_cmd(COP2_MVMVA(1, mx, 0, 2, 0)); \ + int32_t _m1, _m2, _m3; uint32_t _fl; \ + cop2_get(25, _m1); cop2_get(26, _m2); cop2_get(27, _m3); _fl = gte_read_flag(); \ + ramsyscall_printf("MVMVA mx=%d cv=2: MAC=(%d,%d,%d) FLAG=0x%08x\n", mx, _m1, _m2, _m3, _fl); \ +} while (0) +// +// The GTE command word is a 25-bit immediate with fields: +// [fake:5][sf:1][mx:2][v:2][cv:2][pad:2][lm:1][pad:4][fn:6] +// +// These tests verify: +// 1. The "fake" field (bits 24-20) is ignored by hardware +// 2. sf=0 vs sf=1 behavior for each function code +// 3. lm=0 vs lm=1 behavior for each function code +// 4. All MVMVA mx/v/cv combinations produce results +// 5. Unused bitfield values don't crash + +// ========================================================================== +// Fake field is ignored by hardware +// ========================================================================== + +// Run RTPS with fake=0 (non-standard) and verify same result as fake=1 +CESTER_TEST(enc_fake_field_ignored_rtps, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 1000); + gte_set_screen(160 << 16, 120 << 16, 200); + cop2_put(0, 0); + cop2_put(1, 0); + + // Standard encoding: fake=1, sf=1 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2_std; + cop2_get(14, sxy2_std); + + // Non-standard: fake=0, same sf/fn + cop2_put(0, 0); + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_OP(0, 1, 0, 0, 0, 0, COP2_FN_RTPS)); + uint32_t sxy2_alt; + cop2_get(14, sxy2_alt); + + cester_assert_uint_eq(sxy2_std, sxy2_alt); +) + +// Run GPF with fake=31 (max) vs standard fake=25 +CESTER_TEST(enc_fake_field_ignored_gpf, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00808080); + + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + int32_t mac1_std; + cop2_get(25, mac1_std); + + cop2_put(8, 0x1000); + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_OP(31, 1, 0, 0, 0, 0, COP2_FN_GPF)); + int32_t mac1_alt; + cop2_get(25, mac1_alt); + + cester_assert_int_eq(mac1_std, mac1_alt); +) + +// ========================================================================== +// sf=0 vs sf=1 for each instruction +// ========================================================================== + +// GPF: sf changes shift behavior +CESTER_TEST(enc_gpf_sf_difference, gte_tests, + cop2_put(8, 0x1000); + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(6, 0x00808080); + + // sf=1: MAC = (IR0*IR) >> 12 = (0x1000*0x1000)>>12 = 0x1000 + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + int32_t mac1_sf1; + cop2_get(25, mac1_sf1); + + cop2_put(8, 0x1000); + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(6, 0x00808080); + + // sf=0: MAC = IR0*IR = 0x1000*0x1000 = 0x1000000 + gte_clear_flag(); + cop2_cmd(COP2_GPF(0, 0)); + int32_t mac1_sf0; + cop2_get(25, mac1_sf0); + + cester_assert_int_eq(0x1000, mac1_sf1); + cester_assert_int_eq(0x1000000, mac1_sf0); +) + +// SQR: sf changes shift +CESTER_TEST(enc_sqr_sf_difference, gte_tests, + cop2_put(9, 0x100); + cop2_put(10, 0x100); + cop2_put(11, 0x100); + + gte_clear_flag(); + cop2_cmd(COP2_SQR(1, 0)); + int32_t mac1_sf1; + cop2_get(25, mac1_sf1); + + cop2_put(9, 0x100); + cop2_put(10, 0x100); + cop2_put(11, 0x100); + gte_clear_flag(); + cop2_cmd(COP2_SQR(0, 0)); + int32_t mac1_sf0; + cop2_get(25, mac1_sf0); + + // sf=1: (0x100*0x100)>>12 = 0x10000>>12 = 0x10 + // sf=0: 0x100*0x100 = 0x10000 + cester_assert_int_eq(0x10, mac1_sf1); + cester_assert_int_eq(0x10000, mac1_sf0); +) + +// OP: sf changes shift +CESTER_TEST(enc_op_sf_difference, gte_tests, + cop2_putc(0, 0x00001000); + cop2_putc(2, 0x00002000); + cop2_putc(4, 0x1000); + cop2_put(9, 100); + cop2_put(10, 0); + cop2_put(11, 0); + + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(1, 0)); + int32_t mac2_sf1; + cop2_get(26, mac2_sf1); + + cop2_put(9, 100); + cop2_put(10, 0); + cop2_put(11, 0); + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(0, 0)); + int32_t mac2_sf0; + cop2_get(26, mac2_sf0); + + // sf=1: MAC2 = (R33*IR1 - R11*IR3)>>12 = (0x1000*100 - 0x1000*0)>>12 = 100 + // sf=0: MAC2 = R33*IR1 - R11*IR3 = 0x1000*100 = 409600 + cester_assert_int_eq(100, mac2_sf1); + cester_assert_int_eq(409600, mac2_sf0); +) + +// ========================================================================== +// lm=0 vs lm=1 for each instruction +// ========================================================================== + +// SQR: lm=1 clamps IR to [0, 0x7fff] +CESTER_TEST(enc_sqr_lm_difference, gte_tests, + cop2_put(9, 0x2000); // 2.0 + cop2_put(10, 0x2000); + cop2_put(11, 0x2000); + + // sf=1, lm=0: 2.0^2 = 4.0 = 0x4000 (in range for signed) + gte_clear_flag(); + cop2_cmd(COP2_SQR(1, 0)); + uint32_t ir1_lm0; + cop2_get(9, ir1_lm0); + + cop2_put(9, 0x2000); + cop2_put(10, 0x2000); + cop2_put(11, 0x2000); + + // sf=1, lm=1: same result since 0x4000 > 0 (lm=1 only clamps negative to 0) + gte_clear_flag(); + cop2_cmd(COP2_SQR(1, 1)); + uint32_t ir1_lm1; + cop2_get(9, ir1_lm1); + + // Both should be 0x4000 since result is positive + cester_assert_uint_eq(0x4000, ir1_lm0); + cester_assert_uint_eq(0x4000, ir1_lm1); +) + +// ========================================================================== +// MVMVA: all mx/v/cv combinations (4 x 4 x 4 = 64 combos) +// ========================================================================== + +// Sweep all 64 MVMVA parameter combinations and verify no crash. +// Log MAC results for ground truth capture. +CESTER_TEST(enc_mvmva_full_sweep, gte_tests, + // Set up all matrices and vectors with known non-zero values + // RT matrix + cop2_putc(0, 0x08001000); + cop2_putc(1, 0x02000400); + cop2_putc(2, 0x08001000); + cop2_putc(3, 0x02000400); + cop2_putc(4, 0x1000); + // LL matrix + cop2_putc(8, 0x04000800); + cop2_putc(9, 0x01000200); + cop2_putc(10, 0x04000800); + cop2_putc(11, 0x01000200); + cop2_putc(12, 0x0800); + // LC matrix + cop2_putc(16, 0x02000400); + cop2_putc(17, 0x00800100); + cop2_putc(18, 0x02000400); + cop2_putc(19, 0x00800100); + cop2_putc(20, 0x0400); + // Vectors + cop2_put(0, (0x200 << 16) | 0x100); // V0 + cop2_put(1, 0x300); + cop2_put(2, (0x500 << 16) | 0x400); // V1 + cop2_put(3, 0x600); + cop2_put(4, (0x800 << 16) | 0x700); // V2 + cop2_put(5, 0x900); + cop2_put(9, 0x100); // IR1 + cop2_put(10, 0x200); // IR2 + cop2_put(11, 0x300); // IR3 + cop2_put(8, 0x0800); // IR0 + // Control vectors + gte_set_translation(100, 200, 300); + cop2_putc(13, 400); + cop2_putc(14, 500); + cop2_putc(15, 600); + gte_set_far_color(700, 800, 900); + + // All 64 MVMVA combos unrolled (cop2_cmd requires compile-time constants). + MVMVA_T(0,0,0); MVMVA_T(0,0,1); MVMVA_T(0,0,2); MVMVA_T(0,0,3); + MVMVA_T(0,1,0); MVMVA_T(0,1,1); MVMVA_T(0,1,2); MVMVA_T(0,1,3); + MVMVA_T(0,2,0); MVMVA_T(0,2,1); MVMVA_T(0,2,2); MVMVA_T(0,2,3); + MVMVA_T(0,3,0); MVMVA_T(0,3,1); MVMVA_T(0,3,2); MVMVA_T(0,3,3); + MVMVA_T(1,0,0); MVMVA_T(1,0,1); MVMVA_T(1,0,2); MVMVA_T(1,0,3); + MVMVA_T(1,1,0); MVMVA_T(1,1,1); MVMVA_T(1,1,2); MVMVA_T(1,1,3); + MVMVA_T(1,2,0); MVMVA_T(1,2,1); MVMVA_T(1,2,2); MVMVA_T(1,2,3); + MVMVA_T(1,3,0); MVMVA_T(1,3,1); MVMVA_T(1,3,2); MVMVA_T(1,3,3); + MVMVA_T(2,0,0); MVMVA_T(2,0,1); MVMVA_T(2,0,2); MVMVA_T(2,0,3); + MVMVA_T(2,1,0); MVMVA_T(2,1,1); MVMVA_T(2,1,2); MVMVA_T(2,1,3); + MVMVA_T(2,2,0); MVMVA_T(2,2,1); MVMVA_T(2,2,2); MVMVA_T(2,2,3); + MVMVA_T(2,3,0); MVMVA_T(2,3,1); MVMVA_T(2,3,2); MVMVA_T(2,3,3); + MVMVA_T(3,0,0); MVMVA_T(3,0,1); MVMVA_T(3,0,2); MVMVA_T(3,0,3); + MVMVA_T(3,1,0); MVMVA_T(3,1,1); MVMVA_T(3,1,2); MVMVA_T(3,1,3); + MVMVA_T(3,2,0); MVMVA_T(3,2,1); MVMVA_T(3,2,2); MVMVA_T(3,2,3); + MVMVA_T(3,3,0); MVMVA_T(3,3,1); MVMVA_T(3,3,2); MVMVA_T(3,3,3); + cester_assert_int_eq(1, 1); // if we got here, none crashed +) + +// ========================================================================== +// MVMVA mx=3 (garbage matrix) with all vector/cv combinations +// ========================================================================== + +CESTER_TEST(enc_mvmva_mx3_all_vectors, gte_tests, + cop2_putc(0, 0x20001000); + cop2_putc(1, 0x40003000); + cop2_putc(2, 0x60005000); + cop2_putc(3, 0x80007000); + cop2_putc(4, 0x1000); + cop2_put(8, 0x0800); + cop2_put(0, (0x100 << 16) | 0x100); + cop2_put(1, 0x100); + cop2_put(2, (0x200 << 16) | 0x200); + cop2_put(3, 0x200); + cop2_put(4, (0x300 << 16) | 0x300); + cop2_put(5, 0x300); + cop2_put(9, 0x400); + cop2_put(10, 0x500); + cop2_put(11, 0x600); + + MVMVA_MX3_V(0); MVMVA_MX3_V(1); MVMVA_MX3_V(2); MVMVA_MX3_V(3); + cester_assert_int_eq(1, 1); +) + +// ========================================================================== +// MVMVA cv=2 (FC bug) with all matrix/vector combinations +// ========================================================================== + +CESTER_TEST(enc_mvmva_cv2_all_matrices, gte_tests, + gte_set_identity_rotation(); + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_far_color(0x1000, 0x2000, 0x3000); + cop2_put(0, (0x200 << 16) | 0x100); + cop2_put(1, 0x300); + cop2_put(9, 0x100); + cop2_put(10, 0x200); + cop2_put(11, 0x300); + + MVMVA_CV2_MX(0); MVMVA_CV2_MX(1); MVMVA_CV2_MX(2); + cester_assert_int_eq(1, 1); +) + +// ========================================================================== +// Instructions that ignore sf/lm should produce identical results +// ========================================================================== + +// NCLIP ignores sf and lm +CESTER_TEST(enc_nclip_ignores_sf_lm, gte_tests, + cop2_put(12, 0x00000000); + cop2_put(13, 0x00000064); + cop2_put(14, 0x00640000); + + gte_clear_flag(); + cop2_cmd(COP2_OP(20, 0, 0, 0, 0, 0, COP2_FN_NCLIP)); // standard + int32_t mac0_std; + cop2_get(24, mac0_std); + + cop2_put(12, 0x00000000); + cop2_put(13, 0x00000064); + cop2_put(14, 0x00640000); + gte_clear_flag(); + cop2_cmd(COP2_OP(0, 1, 3, 3, 3, 1, COP2_FN_NCLIP)); // all bits set + int32_t mac0_alt; + cop2_get(24, mac0_alt); + + cester_assert_int_eq(mac0_std, mac0_alt); +) + +// AVSZ3 ignores sf and lm (uses fixed >>12) +CESTER_TEST(enc_avsz3_ignores_sf_lm, gte_tests, + cop2_put(17, 100); + cop2_put(18, 200); + cop2_put(19, 300); + cop2_putc(29, 0x555); + + gte_clear_flag(); + cop2_cmd(COP2_AVSZ3); + int32_t mac0_std; + cop2_get(24, mac0_std); + + cop2_put(17, 100); + cop2_put(18, 200); + cop2_put(19, 300); + cop2_putc(29, 0x555); + gte_clear_flag(); + cop2_cmd(COP2_OP(0, 0, 3, 3, 3, 1, COP2_FN_AVSZ3)); + int32_t mac0_alt; + cop2_get(24, mac0_alt); + + cester_assert_int_eq(mac0_std, mac0_alt); +) diff --git a/src/mips/tests/gte/gte-gpf-gpl.c b/src/mips/tests/gte/gte-gpf-gpl.c new file mode 100644 index 000000000..f262a9074 --- /dev/null +++ b/src/mips/tests/gte/gte-gpf-gpl.c @@ -0,0 +1,148 @@ +// GPF: general purpose interpolation (IR0 * IR -> MAC/IR, push color) +// GPL: general purpose interpolation with base (MAC + IR0 * IR -> MAC/IR, push color) + +CESTER_TEST(gpf_shifted_unity, gte_tests, + cop2_put(8, 0x1000); // IR0 = 1.0 + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00204060); // RGBC + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(100, mac1); + cester_assert_int_eq(200, mac2); + cester_assert_int_eq(300, mac3); +) + +CESTER_TEST(gpf_shifted_half, gte_tests, + cop2_put(8, 0x0800); // IR0 = 0.5 + cop2_put(9, 1000); + cop2_put(10, 2000); + cop2_put(11, 4000); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // IR0*IR >> 12 = 0x800*IR >> 12 = IR/2 + cester_assert_int_eq(500, mac1); + cester_assert_int_eq(1000, mac2); + cester_assert_int_eq(2000, mac3); +) + +// GPF pushes color FIFO +CESTER_TEST(gpf_color_fifo_push, gte_tests, + cop2_put(8, 0x1000); // IR0 = 1.0 + cop2_put(9, 0x0800); // IR1 -> MAC1=0x800, /16=128 + cop2_put(10, 0x0400); // IR2 -> MAC2=0x400, /16=64 + cop2_put(11, 0x0200); // IR3 -> MAC3=0x200, /16=32 + cop2_put(6, 0xaa000000); // RGBC: CODE=0xaa + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + uint32_t rgb2; + cop2_get(22, rgb2); + uint8_t r = rgb2 & 0xff; + uint8_t g = (rgb2 >> 8) & 0xff; + uint8_t b = (rgb2 >> 16) & 0xff; + uint8_t cd = (rgb2 >> 24) & 0xff; + ramsyscall_printf("GPF color: R=%u G=%u B=%u CD=0x%02x\n", r, g, b, cd); + cester_assert_uint_eq(0xaa, cd); // CODE byte preserved + // R = MAC1/16 = 0x800/16 = 128 + cester_assert_uint_eq(128, r); + cester_assert_uint_eq(64, g); + cester_assert_uint_eq(32, b); +) + +// GPF unshifted (sf=0) +CESTER_TEST(gpf_unshifted, gte_tests, + cop2_put(8, 2); // IR0 = 2 + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(0, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // sf=0: no shift, MAC = IR0*IR + cester_assert_int_eq(200, mac1); + cester_assert_int_eq(400, mac2); + cester_assert_int_eq(600, mac3); +) + +// GPL shifted with base +CESTER_TEST(gpl_shifted, gte_tests, + cop2_put(25, 1000); // MAC1 base + cop2_put(26, 2000); // MAC2 base + cop2_put(27, 3000); // MAC3 base + cop2_put(8, 0x1000); // IR0 = 1.0 + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPL(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // GPL sf=1: MAC_new = (MAC_old << 12 + IR0 * IR) >> 12 + // = ((1000<<12) + 4096*100) >> 12 = (4096000+409600)>>12 = 1100 + cester_assert_int_eq(1100, mac1); + cester_assert_int_eq(2200, mac2); + cester_assert_int_eq(3300, mac3); +) + +// GPL unshifted (sf=0): MAC base used as-is, no shift +CESTER_TEST(gpl_unshifted, gte_tests, + cop2_put(25, 100); + cop2_put(26, 200); + cop2_put(27, 300); + cop2_put(8, 3); // IR0 = 3 + cop2_put(9, 10); + cop2_put(10, 20); + cop2_put(11, 30); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPL(0, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // sf=0: MAC_new = MAC_old + IR0*IR = 100+30=130, 200+60=260, 300+90=390 + cester_assert_int_eq(130, mac1); + cester_assert_int_eq(260, mac2); + cester_assert_int_eq(390, mac3); +) + +// GPL pushes color FIFO +CESTER_TEST(gpl_color_fifo, gte_tests, + cop2_put(25, 0); + cop2_put(26, 0); + cop2_put(27, 0); + cop2_put(8, 0x1000); + cop2_put(9, 0x0ff0); // /16 = 255 + cop2_put(10, 0x0800); // /16 = 128 + cop2_put(11, 0x0010); // /16 = 1 + cop2_put(6, 0x55000000); // CODE=0x55 + gte_clear_flag(); + cop2_cmd(COP2_GPL(1, 0)); + uint32_t rgb2; + cop2_get(22, rgb2); + uint8_t r = rgb2 & 0xff; + uint8_t g = (rgb2 >> 8) & 0xff; + uint8_t b = (rgb2 >> 16) & 0xff; + uint8_t cd = (rgb2 >> 24) & 0xff; + cester_assert_uint_eq(0x55, cd); + cester_assert_uint_eq(255, r); + cester_assert_uint_eq(128, g); + cester_assert_uint_eq(1, b); +) diff --git a/src/mips/tests/gte/gte-lighting.c b/src/mips/tests/gte/gte-lighting.c new file mode 100644 index 000000000..a23448f77 --- /dev/null +++ b/src/mips/tests/gte/gte-lighting.c @@ -0,0 +1,326 @@ +// Lighting instructions: NCS, NCT, NCCS, NCCT, NCDS, NCDT, CC, CDP + +// NCS: normal color single (2-stage: normal->light, light->color) +CESTER_TEST(ncs_z_normal_white_light, gte_tests, + gte_set_simple_light(); // L33=0x1000 + gte_set_white_light_color(); // LC identity + gte_set_zero_bk(); + // Normal pointing at light: (0, 0, 0x1000) + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(6, 0x00808080); // RGBC (not used by NCS but CODE is) + gte_clear_flag(); + cop2_cmd(COP2_NCS(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("NCS z-normal: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + // Stage 1: L * normal = (0,0,0x1000).(0,0,0x1000) = only IR3 = 0x1000 + // Stage 2: LC * (0,0,0x1000) + BK = (0,0,0x1000) since LC is identity, BK=0 + // Color FIFO: MAC/16 = 0x1000/16 = 256 -> saturates to 255 + cester_assert_int_eq(0, mac1); + cester_assert_int_eq(0, mac2); + cester_assert_int_eq(4096, mac3); + cester_assert_uint_eq(0x00ff0000, rgb2); +) + +// NCS with background color +CESTER_TEST(ncs_with_background, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + cop2_putc(13, 0x800); // RBK = 0x800 + cop2_putc(14, 0x400); // GBK = 0x400 + cop2_putc(15, 0x200); // BBK = 0x200 + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(6, 0x00000000); + gte_clear_flag(); + cop2_cmd(COP2_NCS(1, 1)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // Stage 1: IR = (0, 0, 0x1000) + // Stage 2: MAC = BK + LC*(0,0,0x1000) = (0x800+0, 0x400+0, 0x200+0x1000) + cester_assert_int_eq(0x800, mac1); + cester_assert_int_eq(0x400, mac2); + cester_assert_int_eq(0x1200, mac3); +) + +// NCT: normal color triple +CESTER_TEST(nct_three_normals, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + // V0 = (0, 0, 0x1000) - facing light + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + // V1 = (0x1000, 0, 0) - perpendicular + cop2_put(2, (0 << 16) | 0x1000); + cop2_put(3, 0); + // V2 = (0, 0x1000, 0) - perpendicular + cop2_put(4, (0x1000 << 16) | 0); + cop2_put(5, 0); + cop2_put(6, 0x00000000); + gte_clear_flag(); + cop2_cmd(COP2_NCT(1, 1)); + uint32_t rgb0, rgb1, rgb2; + cop2_get(20, rgb0); + cop2_get(21, rgb1); + cop2_get(22, rgb2); + ramsyscall_printf("NCT: RGB0=0x%08x RGB1=0x%08x RGB2=0x%08x\n", rgb0, rgb1, rgb2); + // V0 facing light: should have color + // V1, V2 perpendicular: should be dark (light only in Z) + cester_assert_uint_eq(0x00ff0000, rgb0); + cester_assert_uint_eq(0x00000000, rgb1); + cester_assert_uint_eq(0x00000000, rgb2); +) + +// NCCS: normal color color single (adds vertex color multiplication) +CESTER_TEST(nccs_basic, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(6, 0x00808080); // R=0x80, G=0x80, B=0x80 + gte_clear_flag(); + cop2_cmd(COP2_NCCS(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("NCCS: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + // Stage 1: IR = (0, 0, 0x1000) + // Stage 2: MAC = LC*(0,0,0x1000) = (0, 0, 0x1000) + // Stage 3: MAC = (R<<4)*IR = (0x80<<4)*0 for R,G; (0x80<<4)*0x1000 for B... wait + // Actually after stage 2, IR1=0, IR2=0, IR3=0x1000 + // Stage 3: MAC1 = (R<<4)*IR1 = 0x800*0 = 0 + // Only B channel gets lit since only IR3 is non-zero + cester_assert_int_eq(0, mac1); + cester_assert_int_eq(0, mac2); + cester_assert_int_eq(2048, mac3); + cester_assert_uint_eq(0x00800000, rgb2); +) + +// NCCT: normal color color triple +CESTER_TEST(ncct_basic, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(2, 0x00000000); + cop2_put(3, 0x1000); + cop2_put(4, 0x00000000); + cop2_put(5, 0x1000); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_NCCT(1, 1)); + uint32_t rgb0, rgb1, rgb2; + cop2_get(20, rgb0); + cop2_get(21, rgb1); + cop2_get(22, rgb2); + ramsyscall_printf("NCCT: RGB0=0x%08x RGB1=0x%08x RGB2=0x%08x\n", rgb0, rgb1, rgb2); + // All three normals identical -> all three results should match + cester_assert_uint_eq(0x00800000, rgb0); + cester_assert_uint_eq(0x00800000, rgb1); + cester_assert_uint_eq(0x00800000, rgb2); +) + +// NCDS: normal color depth single (full 3-stage pipeline + depth cue) +CESTER_TEST(ncds_no_depth, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + gte_set_far_color(0, 0, 0); + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(6, 0x00808080); + cop2_put(8, 0); // IR0 = 0 (no depth cue) + gte_clear_flag(); + cop2_cmd(COP2_NCDS(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("NCDS no depth: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + cester_assert_int_eq(0, mac1); + cester_assert_int_eq(0, mac2); + cester_assert_int_eq(2048, mac3); + cester_assert_uint_eq(0x00800000, rgb2); +) + +// NCDS with depth cue +CESTER_TEST(ncds_with_depth, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(6, 0x00808080); + cop2_put(8, 0x0800); // IR0 = 0.5 + gte_clear_flag(); + cop2_cmd(COP2_NCDS(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2, flag; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + flag = gte_read_flag(); + ramsyscall_printf("NCDS depth: MAC=(%d,%d,%d) RGB2=0x%08x FLAG=0x%08x\n", + mac1, mac2, mac3, rgb2, flag); + cester_assert_int_eq(2048, mac1); + cester_assert_int_eq(2048, mac2); + cester_assert_int_eq(3072, mac3); + cester_assert_uint_eq(0x00c08080, rgb2); + cester_assert_uint_eq(0x00000000, flag); +) + +// NCDT: normal color depth triple +CESTER_TEST(ncdt_basic, gte_tests, + gte_set_simple_light(); + gte_set_white_light_color(); + gte_set_zero_bk(); + gte_set_far_color(0, 0, 0); + cop2_put(0, 0x00000000); + cop2_put(1, 0x1000); + cop2_put(2, 0x00000000); + cop2_put(3, 0x0800); + cop2_put(4, 0x00000000); + cop2_put(5, 0x0400); + cop2_put(6, 0x00808080); + cop2_put(8, 0); + gte_clear_flag(); + cop2_cmd(COP2_NCDT(1, 1)); + uint32_t rgb0, rgb1, rgb2; + cop2_get(20, rgb0); + cop2_get(21, rgb1); + cop2_get(22, rgb2); + ramsyscall_printf("NCDT: RGB0=0x%08x RGB1=0x%08x RGB2=0x%08x\n", rgb0, rgb1, rgb2); + // V0 has strongest light (normal = 0x1000), V2 weakest (0x400) + cester_assert_uint_eq(0x00800000, rgb0); + cester_assert_uint_eq(0x00400000, rgb1); + cester_assert_uint_eq(0x00200000, rgb2); +) + +// CC: color color (light-to-color + vertex color multiply) +CESTER_TEST(cc_basic, gte_tests, + gte_set_white_light_color(); + gte_set_zero_bk(); + // Pre-computed light intensity in IR1-3 + cop2_put(9, 0x1000); + cop2_put(10, 0x0800); + cop2_put(11, 0x0400); + cop2_put(6, 0x00808080); // RGBC + gte_clear_flag(); + cop2_cmd(COP2_CC(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("CC: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + // Stage 1 (light to color): with white LC identity and zero BK, + // MAC = LC*IR = IR (identity) + // Stage 2 (color mult): MAC = (R<<4)*IR1 = 0x800*0x1000 = 0x800000 + // After >>12 = 0x800, /16 = 128 + cester_assert_int_eq(2048, mac1); + cester_assert_int_eq(1024, mac2); + cester_assert_int_eq(512, mac3); + cester_assert_uint_eq(0x00204080, rgb2); +) + +// CDP: color depth cue with pre-computed light +CESTER_TEST(cdp_basic, gte_tests, + gte_set_white_light_color(); + gte_set_zero_bk(); + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(6, 0x00808080); + cop2_put(8, 0); // IR0=0: no depth cue + gte_clear_flag(); + cop2_cmd(COP2_CDP(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + ramsyscall_printf("CDP: MAC=(%d,%d,%d) RGB2=0x%08x\n", mac1, mac2, mac3, rgb2); + cester_assert_int_eq(2048, mac1); + cester_assert_int_eq(2048, mac2); + cester_assert_int_eq(2048, mac3); + cester_assert_uint_eq(0x00808080, rgb2); +) + +// CDP with depth cue +CESTER_TEST(cdp_with_depth, gte_tests, + gte_set_white_light_color(); + gte_set_zero_bk(); + gte_set_far_color(0x1000, 0x1000, 0x1000); + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + cop2_put(6, 0x00808080); + cop2_put(8, 0x0800); // IR0=0.5 + gte_clear_flag(); + cop2_cmd(COP2_CDP(1, 1)); + int32_t mac1, mac2, mac3; + uint32_t rgb2, flag; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cop2_get(22, rgb2); + flag = gte_read_flag(); + ramsyscall_printf("CDP depth: MAC=(%d,%d,%d) RGB2=0x%08x FLAG=0x%08x\n", + mac1, mac2, mac3, rgb2, flag); + cester_assert_int_eq(3072, mac1); + cester_assert_int_eq(3072, mac2); + cester_assert_int_eq(3072, mac3); + cester_assert_uint_eq(0x00c0c0c0, rgb2); + cester_assert_uint_eq(0x00000000, flag); +) + +// Full lighting pipeline: light matrix with non-trivial light direction +CESTER_TEST(ncs_full_light_matrix, gte_tests, + // Light from (0.707, 0, 0.707) direction - 45 degrees + // In 4.12 fixed: 0.707 ~ 0x0B50 + cop2_putc(8, 0x00000b50); // L11=0x0B50, L12=0 + cop2_putc(9, 0x00000000); // L13=0, L21=0 + cop2_putc(10, 0x00000000); // L22=0, L23=0 + cop2_putc(11, 0x00000000); // L31=0, L32=0 + cop2_putc(12, 0x0b50); // L33=0x0B50 + gte_set_white_light_color(); + gte_set_zero_bk(); + // Normal = (0x1000, 0, 0) - facing X + cop2_put(0, (0 << 16) | 0x1000); + cop2_put(1, 0); + cop2_put(6, 0x00000000); + gte_clear_flag(); + cop2_cmd(COP2_NCS(1, 1)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + ramsyscall_printf("NCS 45deg: MAC=(%d,%d,%d)\n", mac1, mac2, mac3); + // Stage 1: L * normal = (L11*VX, 0, L31*VX) = (0x0B50*0x1000, 0, 0) + // >> 12 = (0x0B50, 0, 0), so IR = (0x0B50, 0, 0) + // Stage 2: LC * IR = (0x0B50, 0, 0) since LC is identity, BK=0 + // MAC1 = 0x0B50, MAC2 = 0, MAC3 = 0 + cester_assert_int_eq(0x0b50, mac1); + cester_assert_int_eq(0, mac2); + cester_assert_int_eq(0, mac3); +) diff --git a/src/mips/tests/gte/gte-mvmva.c b/src/mips/tests/gte/gte-mvmva.c new file mode 100644 index 000000000..a7714667a --- /dev/null +++ b/src/mips/tests/gte/gte-mvmva.c @@ -0,0 +1,203 @@ +// MVMVA: parameterized matrix-vector multiply and add + +// mx=RT, v=V0, cv=TR (standard transform) +CESTER_TEST(mvmva_rt_v0_tr, gte_tests, + // 90-degree Z rotation + cop2_putc(0, 0xf0000000); // R11=0, R12=-0x1000 + cop2_putc(1, 0x10000000); // R13=0, R21=0x1000 + cop2_putc(2, 0x00000000); + cop2_putc(3, 0x00000000); + cop2_putc(4, 0x1000); + gte_set_translation(10, 20, 30); + cop2_put(0, (200 << 16) | 100); + cop2_put(1, 300); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 0, 0, 0, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(-190, mac1); + cester_assert_int_eq(120, mac2); + cester_assert_int_eq(330, mac3); +) + +// mx=RT, v=V1, cv=Zero +CESTER_TEST(mvmva_rt_v1_zero, gte_tests, + gte_set_identity_rotation(); + cop2_put(2, (40 << 16) | 30); // V1 = (30, 40) + cop2_put(3, 50); // V1.Z = 50 + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 0, 1, 3, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(30, mac1); + cester_assert_int_eq(40, mac2); + cester_assert_int_eq(50, mac3); +) + +// mx=RT, v=V2, cv=BK +CESTER_TEST(mvmva_rt_v2_bk, gte_tests, + gte_set_identity_rotation(); + cop2_putc(13, 1000); // RBK + cop2_putc(14, 2000); // GBK + cop2_putc(15, 3000); // BBK + cop2_put(4, (200 << 16) | 100); // V2 + cop2_put(5, 300); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 0, 2, 1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(1100, mac1); + cester_assert_int_eq(2200, mac2); + cester_assert_int_eq(3300, mac3); +) + +// mx=RT, v=IR, cv=Zero +CESTER_TEST(mvmva_rt_ir_zero, gte_tests, + gte_set_identity_rotation(); + cop2_put(9, 500); + cop2_put(10, 600); + cop2_put(11, 700); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 0, 3, 3, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(500, mac1); + cester_assert_int_eq(600, mac2); + cester_assert_int_eq(700, mac3); +) + +// mx=LL (light matrix), v=V0, cv=Zero +CESTER_TEST(mvmva_ll_v0_zero, gte_tests, + gte_set_simple_light(); // L33=0x1000, rest zero + cop2_put(0, (200 << 16) | 100); + cop2_put(1, 0x1000); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 1, 0, 3, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // Only L33 is non-zero, so MAC3 = L33*VZ0 >> 12 = 0x1000 * 0x1000 >> 12 = 0x1000 + cester_assert_int_eq(0, mac1); + cester_assert_int_eq(0, mac2); + cester_assert_int_eq(0x1000, mac3); +) + +// mx=LC (light color), v=IR, cv=BK +CESTER_TEST(mvmva_lc_ir_bk, gte_tests, + gte_set_white_light_color(); + cop2_putc(13, 100); // RBK + cop2_putc(14, 200); // GBK + cop2_putc(15, 300); // BBK + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 2, 3, 1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // White LC identity: MAC = (BK<<12 + LR1*IR1) >> 12 = BK + IR + // BK = (100, 200, 300), IR = (0x1000, 0x1000, 0x1000) = (4096, 4096, 4096) + // MAC1 = 100 + 4096 = 4196, etc. + cester_assert_int_eq(4196, mac1); + cester_assert_int_eq(4296, mac2); + cester_assert_int_eq(4396, mac3); +) + +// cv=2 (far color) bug +CESTER_TEST(mvmva_cv2_fc_bug, gte_tests, + gte_set_identity_rotation(); + gte_set_far_color(0x1000, 0x2000, 0x3000); + cop2_put(0, (0x200 << 16) | 0x100); + cop2_put(1, 0x300); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 0, 0, 2, 0)); + int32_t mac1, mac2, mac3; + uint32_t flag; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + flag = gte_read_flag(); + // Buggy: result is partial - only last column (R13*VZ, R23*VZ, R33*VZ) + // With identity: R13=0, R23=0, R33=0x1000 + // MAC1 = R13*VZ >> 12 = 0 + // MAC2 = R23*VZ >> 12 = 0 (but VY contribution leaks? Let's check) + // MAC3 = R33*VZ >> 12 = 0x300 + ramsyscall_printf("MVMVA cv=2: MAC=(%d,%d,%d) FLAG=0x%08x\n", mac1, mac2, mac3, flag); + cester_assert_int_eq(0, mac1); + cester_assert_int_eq(512, mac2); + cester_assert_int_eq(768, mac3); + cester_assert_uint_eq(0x00000000, flag); +) + +// mx=3 (garbage matrix) +CESTER_TEST(mvmva_mx3_garbage, gte_tests, + cop2_putc(0, 0x20001000); // R11=0x1000, R12=0x2000 + cop2_putc(1, 0x40003000); // R13=0x3000, R21=0x4000 + cop2_putc(2, 0x60005000); // R22=0x5000, R23=0x6000 + cop2_putc(3, 0x80007000); // R31=0x7000, R32=-0x8000 + cop2_putc(4, 0x1000); + cop2_put(8, 0x0800); // IR0 + cop2_put(0, (0x100 << 16) | 0x100); + cop2_put(1, 0x100); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(1, 3, 0, 3, 0)); + int32_t mac1, mac2, mac3; + uint32_t flag; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + flag = gte_read_flag(); + ramsyscall_printf("MVMVA mx=3: MAC=(%d,%d,%d) FLAG=0x%08x\n", mac1, mac2, mac3, flag); + cester_assert_int_eq(128, mac1); + cester_assert_int_eq(2304, mac2); + cester_assert_int_eq(3840, mac3); + cester_assert_uint_eq(0x00000000, flag); +) + +// MVMVA with lm=1 +CESTER_TEST(mvmva_lm1, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(-500, -600, -700); + cop2_put(0, (100 << 16) | 100); + cop2_put(1, 100); + gte_clear_flag(); + // sf=1, mx=RT, v=V0, cv=TR, lm=1 + cop2_cmd(COP2_MVMVA(1, 0, 0, 0, 1)); + int32_t mac1; + uint32_t ir1; + cop2_get(25, mac1); + cop2_get(9, ir1); + // MAC1 = 100 + (-500) = -400 + cester_assert_int_eq(-400, mac1); + // IR1 with lm=1: clamped to [0, 0x7fff], so -400 -> 0 + cester_assert_uint_eq(0, ir1); +) + +// MVMVA sf=0 (no shift) +CESTER_TEST(mvmva_sf0, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_put(0, (10 << 16) | 10); + cop2_put(1, 10); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(0, 0, 0, 3, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // sf=0: no >>12 shift. MAC = R * V = 0x1000 * 10 = 40960 + cester_assert_int_eq(40960, mac1); + cester_assert_int_eq(40960, mac2); + cester_assert_int_eq(40960, mac3); +) diff --git a/src/mips/tests/gte/gte-nclip.c b/src/mips/tests/gte/gte-nclip.c new file mode 100644 index 000000000..b6f6e4f7b --- /dev/null +++ b/src/mips/tests/gte/gte-nclip.c @@ -0,0 +1,84 @@ +// NCLIP: normal clipping (screen-space triangle winding / area) +// MAC0 = SX0*(SY1-SY2) + SX1*(SY2-SY0) + SX2*(SY0-SY1) + +CESTER_TEST(nclip_ccw, gte_tests, + cop2_put(12, 0x00000000); // (0,0) + cop2_put(13, 0x00000064); // (100,0) + cop2_put(14, 0x00640000); // (0,100) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + cop2_get(24, mac0); + cester_assert_int_eq(10000, mac0); + cester_assert_uint_eq(0, gte_read_flag()); +) + +CESTER_TEST(nclip_cw, gte_tests, + cop2_put(12, 0x00000000); + cop2_put(13, 0x00640000); // (0,100) + cop2_put(14, 0x00000064); // (100,0) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + cop2_get(24, mac0); + cester_assert_int_eq(-10000, mac0); +) + +CESTER_TEST(nclip_collinear, gte_tests, + cop2_put(12, 0x00000000); + cop2_put(13, 0x00320032); // (50,50) + cop2_put(14, 0x00640064); // (100,100) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + cop2_get(24, mac0); + cester_assert_int_eq(0, mac0); +) + +// NCLIP with large screen coords near saturation limits +CESTER_TEST(nclip_large_coords, gte_tests, + // SXY values near the screen coord limits (-0x400..0x3FF) + cop2_put(12, (0xfc00 << 16) | 0x03ff); // (0x3FF, -0x400) + cop2_put(13, (0x03ff << 16) | 0xfc00); // (-0x400, 0x3FF) + cop2_put(14, 0x00000000); // (0, 0) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + uint32_t flag; + cop2_get(24, mac0); + flag = gte_read_flag(); + // (0x3FF * 0x3FF) + (-0x400 * 0) + (0 * (-0x400)) + // - (0x3FF * 0) - (-0x400 * (-0x400)) - (0 * 0x3FF) + // = 0x3FF*0x3FF - 0x400*0x400 = 1046529 - 1048576 = -2047 + // Actually: SX0=0x3FF, SY0=-0x400, SX1=-0x400, SY1=0x3FF, SX2=0, SY2=0 + // MAC0 = SX0*(SY1-SY2) + SX1*(SY2-SY0) + SX2*(SY0-SY1) + // = 0x3FF*(0x3FF-0) + (-0x400)*(0-(-0x400)) + 0*((-0x400)-0x3FF) + // = 0x3FF*0x3FF + (-0x400)*0x400 + // = 1046529 - 1048576 = -2047 + ramsyscall_printf("NCLIP large: MAC0=%d FLAG=0x%08x\n", mac0, flag); + cester_assert_int_eq(-2047, mac0); + cester_assert_uint_eq(0, flag); +) + +// NCLIP MAC0 overflow: maximum possible cross product +CESTER_TEST(nclip_overflow, gte_tests, + // Use values that produce MAC0 > 0x7FFFFFFF + // Max SX/SY after saturation is -0x400..0x3FF (11-bit signed) + // Max cross product: 0x3FF*0x3FF*2 + 0x400*0x400*2 ~ 4 million, no overflow + // Need unsaturated values: SXY registers are 16-bit signed + cop2_put(12, (0x7fff << 16) | 0x7fff); // (32767, 32767) + cop2_put(13, (0x8000 << 16) | 0x8000); // (-32768, -32768) + cop2_put(14, (0x7fff << 16) | 0x8000); // (-32768, 32767) + gte_clear_flag(); + cop2_cmd(COP2_NCLIP); + int32_t mac0; + uint32_t flag; + cop2_get(24, mac0); + flag = gte_read_flag(); + ramsyscall_printf("NCLIP overflow: MAC0=%d FLAG=0x%08x\n", mac0, flag); + // Check if FLAG.16 or FLAG.15 (MAC0 overflow) is set + ramsyscall_printf(" FLAG.16=%u FLAG.15=%u\n", (flag >> 16) & 1, (flag >> 15) & 1); + cester_assert_int_eq(131071, mac0); + uint32_t f15 = (flag >> 15) & 1; + cester_assert_uint_eq(1, f15); +) diff --git a/src/mips/tests/gte/gte-op.c b/src/mips/tests/gte/gte-op.c new file mode 100644 index 000000000..d861b0f34 --- /dev/null +++ b/src/mips/tests/gte/gte-op.c @@ -0,0 +1,82 @@ +// OP: outer product / cross product +// Uses rotation matrix diagonal (R11, R22, R33) as D vector +// Result = D x IR + +CESTER_TEST(op_identity_diagonal, gte_tests, + gte_set_identity_rotation(); + cop2_put(9, 1000); + cop2_put(10, 2000); + cop2_put(11, 3000); + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(1, 0)); + int32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + // D=(1,1,1), IR=(1000,2000,3000) + // cross = (1*3000-1*2000, 1*1000-1*3000, 1*2000-1*1000) = (1000,-2000,1000) + cester_assert_int_eq(1000, ir1); + cester_assert_int_eq(-2000, ir2); + cester_assert_int_eq(1000, ir3); +) + +CESTER_TEST(op_unshifted, gte_tests, + gte_set_identity_rotation(); + cop2_put(9, 10); + cop2_put(10, 20); + cop2_put(11, 30); + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(0, 0)); // sf=0 + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // sf=0: no shift. D=(0x1000,0x1000,0x1000), IR=(10,20,30) + // MAC1 = R22*IR3 - R33*IR2 = 0x1000*30 - 0x1000*20 = 4096*(30-20) = 40960 + cester_assert_int_eq(40960, mac1); + cester_assert_int_eq(-81920, mac2); + cester_assert_int_eq(40960, mac3); +) + +// OP with asymmetric diagonal +CESTER_TEST(op_asymmetric, gte_tests, + cop2_putc(0, 0x00000800); // R11=0x800 (0.5) + cop2_putc(1, 0x00000000); + cop2_putc(2, 0x00001000); // R22=0x1000 (1.0) + cop2_putc(3, 0x00000000); + cop2_putc(4, 0x2000); // R33=0x2000 (2.0) + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(1, 0)); + int32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + // D=(0.5, 1.0, 2.0), IR=(100,200,300) + // cross.x = D.y*IR.z - D.z*IR.y = 1.0*300 - 2.0*200 = 300 - 400 = -100 + // cross.y = D.z*IR.x - D.x*IR.z = 2.0*100 - 0.5*300 = 200 - 150 = 50 + // cross.z = D.x*IR.y - D.y*IR.x = 0.5*200 - 1.0*100 = 100 - 100 = 0 + cester_assert_int_eq(-100, ir1); + cester_assert_int_eq(50, ir2); + cester_assert_int_eq(0, ir3); +) + +// OP with overflow - large values that exceed 44-bit accumulator +CESTER_TEST(op_overflow_flag, gte_tests, + cop2_putc(0, 0x00007fff); // R11=0x7fff + cop2_putc(2, 0x00007fff); // R22=0x7fff + cop2_putc(4, 0x7fff); // R33=0x7fff + cop2_put(9, 0x7fff); + cop2_put(10, 0x7fff); + cop2_put(11, 0x7fff); + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(0, 0)); // sf=0, no shift -> large products + uint32_t flag; + flag = gte_read_flag(); + ramsyscall_printf("OP overflow: FLAG=0x%08x\n", flag); + // With sf=0: MAC = 0x7fff*0x7fff - 0x7fff*0x7fff = 0 for all + // Actually this produces zero cross product since all components are equal + cester_assert_uint_eq(0x00000000, flag); +) diff --git a/src/mips/tests/gte/gte-precision.c b/src/mips/tests/gte/gte-precision.c new file mode 100644 index 000000000..c5001f8fe --- /dev/null +++ b/src/mips/tests/gte/gte-precision.c @@ -0,0 +1,364 @@ +// Precision tests: 44-bit MAC overflow detection, division table +// spot-checks, RTPS IR3/FLAG.22 sf=0 anomaly. +// These target the exact behaviors that cause subtle game glitches +// when emulated imprecisely. + +// ========================================================================== +// 44-bit MAC overflow detection (FLAG bits 25-30) +// ========================================================================== +// The GTE accumulator is 44 bits wide. Overflow is detected per-addition +// in the chain, not on the final result. Two overflows that cancel out +// will still both be flagged. + +// MAC1 positive overflow (FLAG.30): product exceeds +0x7FFFFFFFFFF +CESTER_TEST(prec_mac1_positive_overflow, gte_tests, + // MVMVA with large matrix and large vector, sf=0 (no shift) + // R11=0x7FFF, V0.X=0x7FFF -> R11*VX = 0x3FFF0001 + // With TR=0x7FFFFFFF and sf=0: TRX<<12 + R11*VX + R12*VY + R13*VZ + // TRX<<12 = 0x7FFFFFFF000 (43 bits) + 0x3FFF0001 = overflows 44-bit + cop2_putc(0, 0x00007fff); // R11=0x7FFF, R12=0 + cop2_putc(1, 0x00000000); + cop2_putc(2, 0x00000000); + cop2_putc(3, 0x00000000); + cop2_putc(4, 0); + cop2_putc(5, 0x7fffffff); // TRX = max positive 32-bit + cop2_putc(6, 0); + cop2_putc(7, 0); + cop2_put(0, (0 << 16) | 0x7fff); // VX=0x7FFF, VY=0 + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(0, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0)); + uint32_t flag = gte_read_flag(); + uint32_t f30 = (flag >> 30) & 1; + ramsyscall_printf("MAC1 pos overflow: FLAG=0x%08x F30=%u\n", flag, f30); + cester_assert_uint_eq(1, f30); +) + +// MAC1 negative overflow (FLAG.27) +CESTER_TEST(prec_mac1_negative_overflow, gte_tests, + cop2_putc(0, 0x00007fff); // R11=0x7FFF + cop2_putc(1, 0x00000000); + cop2_putc(2, 0x00000000); + cop2_putc(3, 0x00000000); + cop2_putc(4, 0); + cop2_putc(5, 0x80000000); // TRX = min negative 32-bit + cop2_putc(6, 0); + cop2_putc(7, 0); + cop2_put(0, (0 << 16) | 0x8000); // VX=-0x8000 (negative) + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(0, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0)); + uint32_t flag = gte_read_flag(); + uint32_t f27 = (flag >> 27) & 1; + ramsyscall_printf("MAC1 neg overflow: FLAG=0x%08x F27=%u\n", flag, f27); + cester_assert_uint_eq(1, f27); +) + +// MAC2 overflow (FLAG.29 positive, FLAG.26 negative) +CESTER_TEST(prec_mac2_overflow, gte_tests, + cop2_putc(0, 0x00000000); + cop2_putc(1, 0x7fff0000); // R21=0x7FFF (high16 of R13R21), R13=0 + cop2_putc(2, 0x00000000); + cop2_putc(3, 0x00000000); + cop2_putc(4, 0); + cop2_putc(5, 0); + cop2_putc(6, 0x7fffffff); // TRY = max + cop2_putc(7, 0); + cop2_put(0, (0 << 16) | 0x7fff); + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(0, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0)); + uint32_t flag = gte_read_flag(); + uint32_t f29 = (flag >> 29) & 1; + ramsyscall_printf("MAC2 pos overflow: FLAG=0x%08x F29=%u\n", flag, f29); + cester_assert_uint_eq(1, f29); +) + +// MAC3 overflow (FLAG.28 positive, FLAG.25 negative) +CESTER_TEST(prec_mac3_overflow, gte_tests, + cop2_putc(0, 0x00000000); + cop2_putc(1, 0x00000000); + cop2_putc(2, 0x00000000); + cop2_putc(3, 0x00007fff); // R31=0x7FFF (high16 of R31R32) + cop2_putc(4, 0); + cop2_putc(5, 0); + cop2_putc(6, 0); + cop2_putc(7, 0x7fffffff); // TRZ = max + cop2_put(0, (0 << 16) | 0x7fff); + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_MVMVA(0, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0)); + uint32_t flag = gte_read_flag(); + uint32_t f28 = (flag >> 28) & 1; + ramsyscall_printf("MAC3 pos overflow: FLAG=0x%08x F28=%u\n", flag, f28); + cester_assert_uint_eq(1, f28); +) + +// Two overflows that cancel: both positive and negative overflow +// should be flagged even if the final result is in range +CESTER_TEST(prec_mac_double_overflow, gte_tests, + // Use OP (cross product) sf=0 with values that cause intermediate + // overflow in both directions during the subtract + // MAC1 = R22*IR3 - R33*IR2 + // Make R22*IR3 overflow positive, then R33*IR2 brings it back + cop2_putc(0, 0x00000000); + cop2_putc(2, 0x00007fff); // R22=0x7FFF + cop2_putc(4, 0x7fff); // R33=0x7FFF + cop2_put(9, 0); + cop2_put(10, 0x7fff); // IR2 + cop2_put(11, 0x7fff); // IR3 + gte_clear_flag(); + cop2_cmd(COP2_OP_CP(0, 0)); // sf=0 + int32_t mac1; + uint32_t flag; + cop2_get(25, mac1); + flag = gte_read_flag(); + ramsyscall_printf("double overflow: MAC1=%d FLAG=0x%08x\n", mac1, flag); + // R22*IR3 = 0x7FFF*0x7FFF = 0x3FFF0001 (fits in 44-bit) + // Then subtract R33*IR2 = 0x7FFF*0x7FFF = 0x3FFF0001 + // Result = 0, but check if intermediate overflow flagged + cester_assert_int_eq(0, mac1); + cester_assert_uint_eq(0, flag); +) + +// ========================================================================== +// Division table spot-checks +// ========================================================================== +// The UNR table has 257 entries. Test specific H/SZ3 pairs that exercise +// known table entries and verify exact quotients. + +// Helper: run RTPS with given H and SZ3 (via VZ), return quotient via SX +// Uses VX=0x1000, OFX=0 so SX = VX * (H/SZ3) = 0x1000 * quotient >> 16 +// Actually simpler: set IR1=0x1000 before RTPS, read MAC0 for DQA path, +// or just check SX directly. + +// H/SZ3 = 1/1: quotient should be near 0x10000 (1.0 in 0.16 fixed) +CESTER_TEST(prec_div_1_over_1, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); // OFX=0 + cop2_putc(25, 0); + cop2_putc(26, 1); // H=1 + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, (0 << 16) | 0x1000); // VX=0x1000, VY=0 + cop2_put(1, 1); // VZ=1 -> SZ3=1 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("div 1/1: SX=%d FLAG=0x%08x\n", sx, flag); + // H=1, SZ3=1 -> H >= SZ3*2? 1 >= 2? No -> no overflow + // quotient = H*0x20000/SZ3 = 0x20000. Saturated to 0x1FFFF. + // SX = IR1 * quotient >> 16 = 0x1000 * 0x1FFFF >> 16 = 0x1FFF + // Then saturated to 0x3FF + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(0, f17); // no division overflow +) + +// H/SZ3 = 100/1000: quotient = 0.1 in fixed point +CESTER_TEST(prec_div_100_over_1000, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 100); // H=100 + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, (0 << 16) | 1000); // VX=1000 + cop2_put(1, 1000); // VZ=1000 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2; + cop2_get(14, sxy2); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("div 100/1000: SX=%d\n", sx); + // SX = 1000 * (100/1000) = 100 (roughly, depends on table rounding) + cester_assert_int_eq(100, sx); +) + +// The documented corner case: H=0xF015, SZ3=0x780B -> 0x20000 saturates to 0x1FFFF +CESTER_TEST(prec_div_corner_f015_780b, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 0xf015); // H + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, (0 << 16) | 1); // VX=1 (minimal to see quotient effect) + cop2_put(1, 0x780b); // VZ = 0x780B + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("div F015/780B: SX=%d FLAG=0x%08x\n", sx, flag); + // This should NOT set FLAG.17 (division overflow) + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(0, f17); +) + +// Large H, small SZ3 (just under overflow): H=0xFFFE, SZ3=0x8000 +CESTER_TEST(prec_div_large_h, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 0xfffe); // H near max + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, (0 << 16) | 1); + cop2_put(1, 0x7fff); // SZ3=0x7FFF -> H >= SZ3*2? 0xFFFE >= 0xFFFE -> yes, overflow + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + uint32_t f17 = (flag >> 17) & 1; + ramsyscall_printf("div large H: FLAG=0x%08x F17=%u\n", flag, f17); + cester_assert_uint_eq(1, f17); // H >= SZ3*2 is true (equal counts) +) + +// SZ3=1 with moderate H (quotient near max) +CESTER_TEST(prec_div_sz3_one, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 1); // H=1 + cop2_putc(27, 0); + cop2_putc(28, 0); + cop2_put(0, (0 << 16) | 1); + cop2_put(1, 1); // SZ3=1 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + int32_t ir1; + cop2_get(14, sxy2); + cop2_get(9, ir1); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("div SZ3=1: SX=%d IR1=%d FLAG=0x%08x\n", sx, ir1, flag); + // H/SZ3 = 1/1 -> quotient saturates to 0x1FFFF + // SX = IR1 * 0x1FFFF >> 16 = 1 * 0x1FFFF >> 16 = 1 + cester_assert_int_eq(1, sx); +) + +// ========================================================================== +// RTPS IR3/FLAG.22 anomaly with sf=0 +// ========================================================================== +// psx-spx: "When using RTP with sf=0, the IR3 saturation flag (FLAG.22) +// gets set only if MAC3 SAR 12 exceeds -8000h..+7FFFh, although IR3 is +// saturated when MAC3 exceeds -8000h..+7FFFh." +// +// Need MAC3 that is out of [-0x8000, 0x7FFF] range (so IR3 saturates) +// but MAC3 >> 12 is in range (so FLAG.22 should NOT be set). + +CESTER_TEST(prec_rtps_sf0_ir3_flag_anomaly, gte_tests, + gte_set_identity_rotation(); + // TRZ such that MAC3 is just over 0x7FFF but MAC3>>12 is in range + // With identity rotation and VZ=0: MAC3 = TRZ << 12 (sf=0, no shift) + // Wait - with sf=0 the formula is: MAC3 = TRZ*0x1000 + R3x*V + // Actually let's think more carefully. + // sf=0: A3 returns the raw 44-bit value without >>12 + // MAC3 = TRZ<<12 + R31*VX + R32*VY + R33*VZ (no shift applied) + // With identity: MAC3 = TRZ<<12 + VZ*0x1000 + // We want MAC3 > 0x7FFF (IR3 saturates) but MAC3>>12 in [-0x8000,0x7FFF] + // MAC3 = 0x8000 -> MAC3>>12 = 0 (in range) -> FLAG.22 NOT set but IR3 saturated + cop2_putc(5, 0); + cop2_putc(6, 0); + cop2_putc(7, 0); // TRZ = 0 + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 200); + cop2_putc(27, 0); + cop2_putc(28, 0); + // VZ = 8 -> MAC3 = 0 + 0x1000*8 = 0x8000 (just over 0x7FFF) + cop2_put(0, 0x00000000); + cop2_put(1, 8); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(0, 0)); // sf=0 + int32_t mac3; + uint32_t ir3, flag; + cop2_get(27, mac3); + cop2_get(11, ir3); + flag = gte_read_flag(); + uint32_t f22 = (flag >> 22) & 1; + ramsyscall_printf("sf=0 anomaly: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n", + mac3, ir3 & 0xffff, flag, f22); + // MAC3 = 0x8000 -> out of [-0x8000, 0x7FFF] for IR3 (it equals -0x8000 boundary!) + // Hmm, 0x8000 = 32768 which is > 0x7FFF. IR3 should saturate to 0x7FFF. + // MAC3 >> 12 = 0x8000 >> 12 = 0 -> in range -> FLAG.22 should NOT be set. + // This is the anomaly: IR3 saturated but FLAG.22 not set. + cester_assert_int_eq(32768, mac3); + cester_assert_uint_eq(0x7fff, ir3); + cester_assert_uint_eq(0, f22); + uint32_t f17 = (flag >> 17) & 1; + cester_assert_uint_eq(1, f17); +) + +// Stronger test: MAC3 = 0x10000 -> well above 0x7FFF, but >>12 = 1 (in range) +CESTER_TEST(prec_rtps_sf0_ir3_flag_strong, gte_tests, + gte_set_identity_rotation(); + cop2_putc(5, 0); + cop2_putc(6, 0); + cop2_putc(7, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 200); + cop2_putc(27, 0); + cop2_putc(28, 0); + // VZ = 16 -> MAC3 = 0x1000 * 16 = 0x10000 (65536, way above 0x7FFF) + cop2_put(0, 0x00000000); + cop2_put(1, 16); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(0, 0)); + int32_t mac3; + uint32_t ir3, flag; + cop2_get(27, mac3); + cop2_get(11, ir3); + flag = gte_read_flag(); + uint32_t f22 = (flag >> 22) & 1; + ramsyscall_printf("sf=0 strong: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n", + mac3, ir3 & 0xffff, flag, f22); + // MAC3 = 0x10000 -> IR3 saturated to 0x7FFF + cester_assert_uint_eq(0x7fff, ir3); + // MAC3 >> 12 = 0x10000 >> 12 = 16 -> in range -> FLAG.22 NOT set + cester_assert_uint_eq(0, f22); +) + +// Counter-test: MAC3 >> 12 exceeds range -> FLAG.22 SHOULD be set +CESTER_TEST(prec_rtps_sf0_ir3_flag_set, gte_tests, + gte_set_identity_rotation(); + cop2_putc(5, 0); + cop2_putc(6, 0); + cop2_putc(7, 8); // TRZ = 8, so MAC3 = 8<<12 + VZ*0x1000 + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 200); + cop2_putc(27, 0); + cop2_putc(28, 0); + // VZ = 0x7FF0 -> MAC3 = 8*4096 + 0x7FF0*0x1000 = 0x8000 + 0x7FF0000 = 0x7FF8000 + // MAC3 >> 12 = 0x7FF8 -> in range? 0x7FF8 < 0x7FFF -> yes, still in range + // Need TRZ large enough: TRZ = 0x7FFF -> MAC3 = 0x7FFF<<12 = 0x7FFF000 + // MAC3>>12 = 0x7FFF -> at boundary. With VZ=1: MAC3 = 0x7FFF000 + 0x1000 = 0x8000000 + // MAC3>>12 = 0x8000 -> OUT of range -> FLAG.22 should be set + cop2_putc(7, 0x7fff); + cop2_put(0, 0x00000000); + cop2_put(1, 1); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(0, 0)); + int32_t mac3; + uint32_t ir3, flag; + cop2_get(27, mac3); + cop2_get(11, ir3); + flag = gte_read_flag(); + uint32_t f22 = (flag >> 22) & 1; + ramsyscall_printf("sf=0 flag set: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n", + mac3, ir3 & 0xffff, flag, f22); + // MAC3>>12 = 0x8000 -> exceeds 0x7FFF -> FLAG.22 SHOULD be set + cester_assert_uint_eq(1, f22); +) diff --git a/src/mips/tests/gte/gte-regio.c b/src/mips/tests/gte/gte-regio.c new file mode 100644 index 000000000..e5df15026 --- /dev/null +++ b/src/mips/tests/gte/gte-regio.c @@ -0,0 +1,374 @@ +// GTE register I/O tests: data/control register read/write, sign extension, +// SXY FIFO, IRGB/ORGB, LZCS/LZCR, FLAG register, CTC2 sign extension. + +// ========================================================================== +// Data register roundtrip and sign/zero extension +// ========================================================================== + +CESTER_TEST(regio_mac0_roundtrip, gte_tests, + cop2_put(24, 0x12345678); + uint32_t out; + cop2_get(24, out); + cester_assert_uint_eq(0x12345678, out); +) + +CESTER_TEST(regio_mac1_roundtrip, gte_tests, + cop2_put(25, 0xdeadbeef); + uint32_t out; + cop2_get(25, out); + cester_assert_uint_eq(0xdeadbeef, out); +) + +CESTER_TEST(regio_ir0_sign_extend, gte_tests, + cop2_put(8, 0x0000ffff); + uint32_t out; + cop2_get(8, out); + cester_assert_uint_eq(0xffffffff, out); +) + +CESTER_TEST(regio_ir1_sign_extend, gte_tests, + cop2_put(9, 0x00008000); + uint32_t out; + cop2_get(9, out); + cester_assert_uint_eq(0xffff8000, out); +) + +CESTER_TEST(regio_ir2_positive, gte_tests, + cop2_put(10, 0x00001234); + uint32_t out; + cop2_get(10, out); + cester_assert_uint_eq(0x00001234, out); +) + +CESTER_TEST(regio_ir3_positive, gte_tests, + cop2_put(11, 0x00007fff); + uint32_t out; + cop2_get(11, out); + cester_assert_uint_eq(0x00007fff, out); +) + +CESTER_TEST(regio_vz0_sign_extend, gte_tests, + cop2_put(1, 0x0000ff00); + uint32_t out; + cop2_get(1, out); + cester_assert_uint_eq(0xffffff00, out); +) + +CESTER_TEST(regio_vxy0_packed, gte_tests, + cop2_put(0, 0x00640032); + uint32_t out; + cop2_get(0, out); + cester_assert_uint_eq(0x00640032, out); +) + +CESTER_TEST(regio_otz_zero_extend, gte_tests, + cop2_put(7, 0xffffffff); + uint32_t out; + cop2_get(7, out); + cester_assert_uint_eq(0x0000ffff, out); +) + +CESTER_TEST(regio_sz_zero_extend, gte_tests, + cop2_put(16, 0xdeadbeef); + uint32_t out; + cop2_get(16, out); + cester_assert_uint_eq(0x0000beef, out); +) + +CESTER_TEST(regio_rgbc_roundtrip, gte_tests, + cop2_put(6, 0xaa554080); + uint32_t out; + cop2_get(6, out); + cester_assert_uint_eq(0xaa554080, out); +) + +CESTER_TEST(regio_res1_readwrite, gte_tests, + cop2_put(23, 0xdeadbeef); + uint32_t out; + cop2_get(23, out); + cester_assert_uint_eq(0xdeadbeef, out); +) + +// ========================================================================== +// SXY FIFO +// ========================================================================== + +CESTER_TEST(regio_sxy_fifo_push, gte_tests, + cop2_put(12, 0x00010002); + cop2_put(13, 0x00030004); + cop2_put(14, 0x00050006); + cop2_put(15, 0x00070008); + uint32_t sxy0, sxy1, sxy2; + cop2_get(12, sxy0); + cop2_get(13, sxy1); + cop2_get(14, sxy2); + cester_assert_uint_eq(0x00030004, sxy0); + cester_assert_uint_eq(0x00050006, sxy1); + cester_assert_uint_eq(0x00070008, sxy2); +) + +CESTER_TEST(regio_sxyp_read_returns_sxy2, gte_tests, + cop2_put(14, 0xaabbccdd); + uint32_t sxyp; + cop2_get(15, sxyp); + cester_assert_uint_eq(0xaabbccdd, sxyp); +) + +CESTER_TEST(regio_sxy_fifo_triple_push, gte_tests, + cop2_put(15, 0x11111111); + cop2_put(15, 0x22222222); + cop2_put(15, 0x33333333); + uint32_t sxy0, sxy1, sxy2; + cop2_get(12, sxy0); + cop2_get(13, sxy1); + cop2_get(14, sxy2); + cester_assert_uint_eq(0x11111111, sxy0); + cester_assert_uint_eq(0x22222222, sxy1); + cester_assert_uint_eq(0x33333333, sxy2); +) + +// ========================================================================== +// IRGB / ORGB +// ========================================================================== + +CESTER_TEST(regio_irgb_expand, gte_tests, + cop2_put(28, 0x7fff); + __asm__ volatile("nop; nop; nop; nop"); + uint32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + cester_assert_uint_eq(0x00000f80, ir1); + cester_assert_uint_eq(0x00000f80, ir2); + cester_assert_uint_eq(0x00000f80, ir3); +) + +CESTER_TEST(regio_irgb_individual, gte_tests, + cop2_put(28, 0x000a); // R=10, G=0, B=0 + __asm__ volatile("nop; nop; nop; nop"); + uint32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + cester_assert_uint_eq(0x00000500, ir1); // 10 << 7 + cester_assert_uint_eq(0x00000000, ir2); + cester_assert_uint_eq(0x00000000, ir3); +) + +CESTER_TEST(regio_orgb_pack, gte_tests, + cop2_put(9, 0x0f80); + cop2_put(10, 0x0f80); + cop2_put(11, 0x0f80); + uint32_t orgb; + cop2_get(29, orgb); + cester_assert_uint_eq(0x7fff, orgb); +) + +// ORGB saturates, not truncates (psx-spx correct, Sony SDK wrong) +CESTER_TEST(regio_orgb_saturate_negative, gte_tests, + cop2_put(9, 0xffff8000); // IR1 = -32768 (negative) + cop2_put(10, 0x00002000); // IR2 = 8192 (large positive) + cop2_put(11, 0x00000380); // IR3 = 896 (normal) + uint32_t orgb; + cop2_get(29, orgb); + uint32_t r = orgb & 0x1f; + uint32_t g = (orgb >> 5) & 0x1f; + uint32_t b = (orgb >> 10) & 0x1f; + cester_assert_uint_eq(0, r); // negative saturated to 0 + cester_assert_uint_eq(31, g); // large saturated to 0x1f + cester_assert_uint_eq(7, b); // 896 >> 7 = 7 +) + +CESTER_TEST(regio_orgb_saturate_large, gte_tests, + cop2_put(9, 0x1000); + cop2_put(10, 0x1000); + cop2_put(11, 0x1000); + uint32_t orgb; + cop2_get(29, orgb); + // 0x1000>>7 = 0x20 = 32, saturated to 31 + cester_assert_uint_eq(0x7fff, orgb); +) + +// ========================================================================== +// LZCS / LZCR +// ========================================================================== + +CESTER_TEST(regio_lzcr_zero, gte_tests, + cop2_put(30, 0x00000000); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(32, lzcr); +) + +CESTER_TEST(regio_lzcr_all_ones, gte_tests, + cop2_put(30, 0xffffffff); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(32, lzcr); +) + +CESTER_TEST(regio_lzcr_one, gte_tests, + cop2_put(30, 0x00000001); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(31, lzcr); +) + +CESTER_TEST(regio_lzcr_msb_set, gte_tests, + cop2_put(30, 0x80000000); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(1, lzcr); +) + +CESTER_TEST(regio_lzcr_positive_mid, gte_tests, + cop2_put(30, 0x00010000); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(15, lzcr); +) + +CESTER_TEST(regio_lzcr_negative_mid, gte_tests, + cop2_put(30, 0xfffe0000); + uint32_t lzcr; + cop2_get(31, lzcr); + cester_assert_uint_eq(15, lzcr); +) + +// ========================================================================== +// FLAG register +// ========================================================================== + +CESTER_TEST(regio_flag_write_mask, gte_tests, + cop2_putc(31, 0xffffffff); + uint32_t flag = gte_read_flag(); + cester_assert_uint_eq(0xfffff000, flag); +) + +CESTER_TEST(regio_flag_low_bits_masked, gte_tests, + cop2_putc(31, 0x00000fff); + uint32_t flag = gte_read_flag(); + cester_assert_uint_eq(0, flag); +) + +CESTER_TEST(regio_flag_bit12_no_summary, gte_tests, + cop2_putc(31, (1 << 12)); + uint32_t flag = gte_read_flag(); + cester_assert_uint_eq((1 << 12), flag); +) + +CESTER_TEST(regio_flag_bits19_22_no_summary, gte_tests, + uint32_t flag; + int ok = 1; + int i; + for (i = 19; i <= 22; i++) { + cop2_putc(31, (1u << i)); + flag = gte_read_flag(); + if (flag != (1u << i)) ok = 0; + } + cester_assert_int_eq(1, ok); +) + +CESTER_TEST(regio_flag_bits13_18_set_summary, gte_tests, + uint32_t flag; + int ok = 1; + int i; + for (i = 13; i <= 18; i++) { + cop2_putc(31, (1u << i)); + flag = gte_read_flag(); + if (flag != ((1u << i) | (1u << 31))) ok = 0; + } + cester_assert_int_eq(1, ok); +) + +CESTER_TEST(regio_flag_bits23_30_set_summary, gte_tests, + uint32_t flag; + int ok = 1; + int i; + for (i = 23; i <= 30; i++) { + cop2_putc(31, (1u << i)); + flag = gte_read_flag(); + if (flag != ((1u << i) | (1u << 31))) ok = 0; + } + cester_assert_int_eq(1, ok); +) + +// ========================================================================== +// Control register sign extension +// ========================================================================== + +CESTER_TEST(regio_ctrl_r33_sign_extend, gte_tests, + cop2_putc(4, 0x00008000); + uint32_t out; + cop2_getc(4, out); + cester_assert_uint_eq(0xffff8000, out); +) + +CESTER_TEST(regio_ctrl_zsf3_sign_extend, gte_tests, + cop2_putc(29, 0x0000ffff); + uint32_t out; + cop2_getc(29, out); + cester_assert_uint_eq(0xffffffff, out); +) + +// H register sign-extension bug (psx-spx documented, Sony omitted) +CESTER_TEST(regio_h_sign_extension_bug, gte_tests, + cop2_putc(26, 0x8000); + uint32_t h; + cop2_getc(26, h); + cester_assert_uint_eq(0xffff8000, h); +) + +CESTER_TEST(regio_h_positive, gte_tests, + cop2_putc(26, 0x7fff); + uint32_t h; + cop2_getc(26, h); + cester_assert_uint_eq(0x00007fff, h); +) + +// All single-16bit control regs sign-extend +CESTER_TEST(regio_ctc2_sign_extend_all, gte_tests, + uint32_t out; + int ok = 1; + // R33(4), L33(12), LB3(20), H(26), DQA(27), ZSF3(29), ZSF4(30) + cop2_putc(4, 0x8000); cop2_getc(4, out); if (out != 0xffff8000) ok = 0; + cop2_putc(12, 0x8000); cop2_getc(12, out); if (out != 0xffff8000) ok = 0; + cop2_putc(20, 0x8000); cop2_getc(20, out); if (out != 0xffff8000) ok = 0; + cop2_putc(26, 0x8000); cop2_getc(26, out); if (out != 0xffff8000) ok = 0; + cop2_putc(27, 0x8000); cop2_getc(27, out); if (out != 0xffff8000) ok = 0; + cop2_putc(29, 0x8000); cop2_getc(29, out); if (out != 0xffff8000) ok = 0; + cop2_putc(30, 0x8000); cop2_getc(30, out); if (out != 0xffff8000) ok = 0; + cester_assert_int_eq(1, ok); +) + +// lm flag clamp behavior +CESTER_TEST(regio_lm_clamp, gte_tests, + // GPF sf=1 lm=0: IR clamp -0x8000..0x7fff + cop2_put(8, 0x1000); + cop2_put(9, 0xffff8000); + cop2_put(10, 0x100); + cop2_put(11, 0x7fff); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 0)); + int32_t mac1_lm0; + uint32_t ir1_lm0; + cop2_get(25, mac1_lm0); + cop2_get(9, ir1_lm0); + // GPF sf=1 lm=1 + cop2_put(8, 0x1000); + cop2_put(9, 0xffff8000); + cop2_put(10, 0x100); + cop2_put(11, 0x7fff); + cop2_put(6, 0x00808080); + gte_clear_flag(); + cop2_cmd(COP2_GPF(1, 1)); + int32_t mac1_lm1; + uint32_t ir1_lm1; + cop2_get(25, mac1_lm1); + cop2_get(9, ir1_lm1); + cester_assert_int_eq(-32768, mac1_lm0); + cester_assert_int_eq(-32768, mac1_lm1); + cester_assert_uint_eq(0xffff8000, ir1_lm0); // lm=0: stays -32768 + cester_assert_uint_eq(0x00000000, ir1_lm1); // lm=1: clamped to 0 +) diff --git a/src/mips/tests/gte/gte-rtps.c b/src/mips/tests/gte/gte-rtps.c new file mode 100644 index 000000000..338bfa3a5 --- /dev/null +++ b/src/mips/tests/gte/gte-rtps.c @@ -0,0 +1,224 @@ +// RTPS/RTPT: perspective transformation (single and triple) +// Also covers division table behavior and screen coordinate saturation. + +CESTER_TEST(rtps_identity_center, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 1000); + gte_set_screen(160 << 16, 120 << 16, 200); + cop2_put(0, 0x00000000); // V0 = (0, 0) + cop2_put(1, 0); // VZ0 = 0 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sz3, sxy2; + cop2_get(19, sz3); + cop2_get(14, sxy2); + cester_assert_uint_eq(1000, sz3); + cester_assert_int_eq(160, (int16_t)(sxy2 & 0xffff)); + cester_assert_int_eq(120, (int16_t)(sxy2 >> 16)); +) + +CESTER_TEST(rtps_offset_vertex, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(160 << 16, 120 << 16, 200); + cop2_put(0, (50 << 16) | (100 & 0xffff)); + cop2_put(1, 500); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sz3; + cop2_get(19, sz3); + cester_assert_uint_eq(500, sz3); + // SX = 160 + 100*200/500 = 160 + 40 ~ 199 (division rounding) + // SY = 120 + 50*200/500 = 120 + 20 ~ 139 + uint32_t sxy2; + cop2_get(14, sxy2); + int16_t sx = (int16_t)(sxy2 & 0xffff); + int16_t sy = (int16_t)(sxy2 >> 16); + ramsyscall_printf("RTPS offset: SX=%d SY=%d\n", sx, sy); + cester_assert_int_eq(199, sx); + cester_assert_int_eq(139, sy); + cester_assert_uint_eq(500, sz3); +) + +// RTPS MAC output +CESTER_TEST(rtps_mac_output, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(100, 200, 300); + gte_set_screen(0, 0, 200); + cop2_put(0, (50 << 16) | 10); // V0 = (10, 50) + cop2_put(1, 500); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // Identity rotation: MAC = V + TR + cester_assert_int_eq(110, mac1); + cester_assert_int_eq(250, mac2); + cester_assert_int_eq(800, mac3); +) + +// RTPS with Z=0 (division overflow) +CESTER_TEST(rtps_division_overflow, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 200); + cop2_put(0, (0 << 16) | 100); + cop2_put(1, 1); // VZ0 = 1, very small Z + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + // H=200, SZ3=1 -> H >= SZ3*2 (200 >= 2) -> division overflow FLAG.17 + ramsyscall_printf("RTPS div overflow: FLAG=0x%08x (bit17=%u)\n", flag, (flag >> 17) & 1); + uint32_t flag17 = (flag >> 17) & 1; + cester_assert_uint_eq(1, flag17); +) + +// RTPS screen coordinate saturation +CESTER_TEST(rtps_screen_saturation, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 200); + // Large X, small Z -> SX will exceed -0x400..0x3FF range + cop2_put(0, (0 << 16) | 0x7fff); // VX0 = 32767 + cop2_put(1, 100); // VZ0 = 100 + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + uint32_t sxy2, flag; + cop2_get(14, sxy2); + flag = gte_read_flag(); + int16_t sx = (int16_t)(sxy2 & 0xffff); + ramsyscall_printf("RTPS sat: SX=%d FLAG=0x%08x (bit14=%u)\n", sx, flag, (flag >> 14) & 1); + // SX should be saturated to 0x3FF + cester_assert_int_eq(0x3ff, sx); + uint32_t flag14 = (flag >> 14) & 1; + cester_assert_uint_eq(1, flag14); // FLAG.14 = SX2 saturated +) + +// RTPS depth cue output (MAC0/IR0) +CESTER_TEST(rtps_depth_cue, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + cop2_putc(24, 0); + cop2_putc(25, 0); + cop2_putc(26, 200); + cop2_putc(27, 0xfffff880); // DQA = -1920 (negative) + cop2_putc(28, 0x01000000); // DQB = 16777216 + cop2_put(0, 0x00000000); + cop2_put(1, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(1, 0)); + int32_t mac0; + uint32_t ir0; + cop2_get(24, mac0); + cop2_get(8, ir0); + ramsyscall_printf("RTPS depth: MAC0=%d IR0=0x%04x\n", mac0, ir0 & 0xffff); + // IR0 should be clamped to [0, 0x1000] + cester_assert_int_eq(-8388224, mac0); + cester_assert_uint_eq(0, ir0); +) + +// RTPS with sf=0 +CESTER_TEST(rtps_sf0, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0x1000); + gte_set_screen(0, 0, 200); + cop2_put(0, 0x00000000); + cop2_put(1, 0); + gte_clear_flag(); + cop2_cmd(COP2_RTPS(0, 0)); + int32_t mac3; + uint32_t ir3, sz3, flag; + cop2_get(27, mac3); + cop2_get(11, ir3); + cop2_get(19, sz3); + flag = gte_read_flag(); + ramsyscall_printf("RTPS sf=0: MAC3=%d IR3=0x%04x SZ3=%u FLAG=0x%08x\n", + mac3, ir3 & 0xffff, sz3, flag); + // sf=0: MAC3 = TRZ<<12 + rotation = 0x1000<<12 = 0x1000000 (no >>12 shift) + // IR3 uses Lm_B3_sf which checks MAC3>>12 for FLAG but clamps the unshifted value + cester_assert_int_eq(16777216, mac3); + cester_assert_uint_eq(0x7fff, ir3); + cester_assert_uint_eq(4096, sz3); + cester_assert_uint_eq(0, flag); +) + +// RTPT: triple perspective transform +CESTER_TEST(rtpt_three_vertices, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(160 << 16, 120 << 16, 200); + // V0 = (0, 0, 1000) + cop2_put(0, 0x00000000); + cop2_put(1, 1000); + // V1 = (100, 0, 1000) + cop2_put(2, (0 << 16) | 100); + cop2_put(3, 1000); + // V2 = (0, 100, 1000) + cop2_put(4, (100 << 16) | 0); + cop2_put(5, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPT(1, 0)); + uint32_t sxy0, sxy1, sxy2; + cop2_get(12, sxy0); + cop2_get(13, sxy1); + cop2_get(14, sxy2); + // V0 at origin -> (160, 120) + cester_assert_int_eq(160, (int16_t)(sxy0 & 0xffff)); + cester_assert_int_eq(120, (int16_t)(sxy0 >> 16)); + // V1 at (100,0,1000) -> SX ~ 180 + int16_t sx1 = (int16_t)(sxy1 & 0xffff); + int16_t sy1 = (int16_t)(sxy1 >> 16); + ramsyscall_printf("RTPT: V1=(%d,%d) V2=(%d,%d)\n", sx1, sy1, + (int16_t)(sxy2 & 0xffff), (int16_t)(sxy2 >> 16)); + cester_assert_int_eq(120, sy1); // Y unchanged +) + +// RTPT: FLAG accumulates across all three vertices +CESTER_TEST(rtpt_flag_accumulates, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(0, 0, 200); + // V0: normal + cop2_put(0, 0x00000000); + cop2_put(1, 1000); + // V1: will cause SX saturation (large X, small Z) + cop2_put(2, (0 << 16) | 0x7fff); + cop2_put(3, 100); + // V2: normal + cop2_put(4, 0x00000000); + cop2_put(5, 1000); + gte_clear_flag(); + cop2_cmd(COP2_RTPT(1, 0)); + uint32_t flag; + flag = gte_read_flag(); + // FLAG should have SX2 saturation from V1, even though V2 was fine + ramsyscall_printf("RTPT flag accum: FLAG=0x%08x\n", flag); + // Division overflow from V1 (H=200, SZ3=100, 200 >= 200) + uint32_t flag17 = (flag >> 17) & 1; + cester_assert_uint_eq(1, flag17); +) + +// RTPT pushes SZ FIFO correctly +CESTER_TEST(rtpt_sz_fifo, gte_tests, + gte_set_identity_rotation(); + gte_set_translation(0, 0, 0); + gte_set_screen(160 << 16, 120 << 16, 200); + cop2_put(0, 0x00000000); + cop2_put(1, 100); + cop2_put(2, 0x00000000); + cop2_put(3, 200); + cop2_put(4, 0x00000000); + cop2_put(5, 300); + gte_clear_flag(); + cop2_cmd(COP2_RTPT(1, 0)); + uint32_t sz1, sz2, sz3; + cop2_get(17, sz1); + cop2_get(18, sz2); + cop2_get(19, sz3); + cester_assert_uint_eq(100, sz1); + cester_assert_uint_eq(200, sz2); + cester_assert_uint_eq(300, sz3); +) diff --git a/src/mips/tests/gte/gte-sqr.c b/src/mips/tests/gte/gte-sqr.c new file mode 100644 index 000000000..615fa9814 --- /dev/null +++ b/src/mips/tests/gte/gte-sqr.c @@ -0,0 +1,88 @@ +// SQR: square of IR vector + +CESTER_TEST(sqr_shifted, gte_tests, + cop2_put(9, 0x1000); // 1.0 + cop2_put(10, 0x0800); // 0.5 + cop2_put(11, 0x2000); // 2.0 + gte_clear_flag(); + cop2_cmd(COP2_SQR(1, 0)); + uint32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + cester_assert_uint_eq(0x1000, ir1); // 1.0^2 = 1.0 + cester_assert_uint_eq(0x0400, ir2); // 0.5^2 = 0.25 + cester_assert_uint_eq(0x4000, ir3); // 2.0^2 = 4.0 (no saturation, lm=0) +) + +CESTER_TEST(sqr_unshifted, gte_tests, + cop2_put(9, 4); + cop2_put(10, 5); + cop2_put(11, 6); + gte_clear_flag(); + cop2_cmd(COP2_SQR(0, 0)); + uint32_t ir1, ir2, ir3; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + cester_assert_uint_eq(16, ir1); + cester_assert_uint_eq(25, ir2); + cester_assert_uint_eq(36, ir3); +) + +// SQR sets MAC1-3 as well +CESTER_TEST(sqr_mac_output, gte_tests, + cop2_put(9, 100); + cop2_put(10, 200); + cop2_put(11, 300); + gte_clear_flag(); + cop2_cmd(COP2_SQR(0, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + cester_assert_int_eq(10000, mac1); + cester_assert_int_eq(40000, mac2); + cester_assert_int_eq(90000, mac3); +) + +// SQR with IR saturation (shifted, result > 0x7fff with lm=0) +CESTER_TEST(sqr_saturation_shifted, gte_tests, + cop2_put(9, 0x4000); // 4.0 in 4.12; 4^2 = 16, >>12 = 0x4000 (fits) + cop2_put(10, 0x5a82); // ~5.656 (sqrt(32)); 32 >>12 = 0x8000 = saturates + cop2_put(11, 0x7fff); // max positive; 0x7fff^2 >>12 = huge, saturates + gte_clear_flag(); + cop2_cmd(COP2_SQR(1, 0)); + uint32_t ir1, ir2, ir3; + uint32_t flag; + cop2_get(9, ir1); + cop2_get(10, ir2); + cop2_get(11, ir3); + flag = gte_read_flag(); + ramsyscall_printf("SQR sat: IR1=0x%04x IR2=0x%04x IR3=0x%04x FLAG=0x%08x\n", + ir1 & 0xffff, ir2 & 0xffff, ir3 & 0xffff, flag); + cester_assert_uint_eq(0x7fff, ir1 & 0xffff); + cester_assert_uint_eq(0x7fff, ir2 & 0xffff); + cester_assert_uint_eq(0x7fff, ir3 & 0xffff); + cester_assert_uint_eq(0x81c00000, flag); +) + +// SQR with negative input (result should still be positive: square) +CESTER_TEST(sqr_negative_input, gte_tests, + cop2_put(9, 0xfffffff6); // -10 (sign-extended) + cop2_put(10, 0xffffffce); // -50 + cop2_put(11, 0xffffff9c); // -100 + gte_clear_flag(); + cop2_cmd(COP2_SQR(0, 0)); + int32_t mac1, mac2, mac3; + cop2_get(25, mac1); + cop2_get(26, mac2); + cop2_get(27, mac3); + // Squares of negative numbers are positive + // But GTE multiplies IR*IR where IR is 16-bit signed + // -10 * -10 = 100, -50 * -50 = 2500, -100 * -100 = 10000 + ramsyscall_printf("SQR neg: MAC1=%d MAC2=%d MAC3=%d\n", mac1, mac2, mac3); + cester_assert_int_eq(100, mac1); + cester_assert_int_eq(2500, mac2); + cester_assert_int_eq(10000, mac3); +) diff --git a/src/mips/tests/gte/gte.c b/src/mips/tests/gte/gte.c new file mode 100644 index 000000000..43fd73913 --- /dev/null +++ b/src/mips/tests/gte/gte.c @@ -0,0 +1,138 @@ +/* + +MIT License + +Copyright (c) 2026 PCSX-Redux authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +// GTE (Geometry Transformation Engine) hardware validation test suite. +// All test expectations verified against SCPH-5501 silicon. +// +// Sub-test files are included into this single compilation unit +// because libcester requires a single TU via __BASE_FILE__ re-include. + +#include "common/hardware/cop2.h" +#include "common/syscalls/syscalls.h" + +// clang-format off + +// ========================================================================== +// Helper functions (guarded against cester double-include) +// ========================================================================== + +#ifndef GTE_HELPERS_DEFINED +#define GTE_HELPERS_DEFINED + +static inline void gte_enable(void) { + uint32_t sr; + __asm__ volatile("mfc0 %0, $12" : "=r"(sr)); + sr |= 0x40000000; + __asm__ volatile("mtc0 %0, $12; nop; nop" : : "r"(sr)); +} + +static inline void gte_clear_flag(void) { + cop2_putc(31, 0); +} + +static inline uint32_t gte_read_flag(void) { + uint32_t flag; + cop2_getc(31, flag); + return flag; +} + +static inline void gte_set_identity_rotation(void) { + cop2_putc(0, 0x00001000); + cop2_putc(1, 0x00000000); + cop2_putc(2, 0x00001000); + cop2_putc(3, 0x00000000); + cop2_putc(4, 0x1000); +} + +static inline void gte_set_simple_light(void) { + cop2_putc(8, 0x00000000); + cop2_putc(9, 0x00000000); + cop2_putc(10, 0x00000000); + cop2_putc(11, 0x00000000); + cop2_putc(12, 0x1000); +} + +static inline void gte_set_white_light_color(void) { + cop2_putc(16, 0x00001000); + cop2_putc(17, 0x00000000); + cop2_putc(18, 0x00001000); + cop2_putc(19, 0x00000000); + cop2_putc(20, 0x1000); +} + +static inline void gte_set_zero_bk(void) { + cop2_putc(13, 0); + cop2_putc(14, 0); + cop2_putc(15, 0); +} + +static inline void gte_set_far_color(int32_t r, int32_t g, int32_t b) { + cop2_putc(21, r); + cop2_putc(22, g); + cop2_putc(23, b); +} + +static inline void gte_set_translation(int32_t x, int32_t y, int32_t z) { + cop2_putc(5, x); + cop2_putc(6, y); + cop2_putc(7, z); +} + +static inline void gte_set_screen(int32_t ofx, int32_t ofy, uint16_t h) { + cop2_putc(24, ofx); + cop2_putc(25, ofy); + cop2_putc(26, h); + cop2_putc(27, 0); + cop2_putc(28, 0); +} + +#endif // GTE_HELPERS_DEFINED + +#undef unix +#define CESTER_NO_SIGNAL +#define CESTER_NO_TIME +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#include "exotic/cester.h" + +CESTER_BEFORE_ALL(gte_tests, + gte_enable(); +) + +// Include sub-test files +#include "gte-regio.c" +#include "gte-nclip.c" +#include "gte-avsz.c" +#include "gte-sqr.c" +#include "gte-op.c" +#include "gte-gpf-gpl.c" +#include "gte-rtps.c" +#include "gte-mvmva.c" +#include "gte-depthcue.c" +#include "gte-lighting.c" +#include "gte-edgecase.c" +#include "gte-precision.c" +#include "gte-encoding.c" diff --git a/tests/pcsxrunner/gte.cc b/tests/pcsxrunner/gte.cc new file mode 100644 index 000000000..57d78286d --- /dev/null +++ b/tests/pcsxrunner/gte.cc @@ -0,0 +1,35 @@ +/*************************************************************************** + * Copyright (C) 2026 PCSX-Redux authors * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * + ***************************************************************************/ + +#include "gtest/gtest.h" +#include "main/main.h" + +TEST(GTE, Interpreter) { + MainInvoker invoker("-no-ui", "-run", "-bios", "src/mips/openbios/openbios.bin", "-testmode", "-interpreter", + "-luacov", "-loadexe", "src/mips/tests/gte/gte.ps-exe"); + int ret = invoker.invoke(); + EXPECT_EQ(ret, 0); +} + +TEST(GTE, Dynarec) { + MainInvoker invoker("-no-ui", "-run", "-bios", "src/mips/openbios/openbios.bin", "-testmode", "-dynarec", + "-luacov", "-loadexe", "src/mips/tests/gte/gte.ps-exe"); + int ret = invoker.invoke(); + EXPECT_EQ(ret, 0); +} diff --git a/vsprojects/core/core.vcxproj b/vsprojects/core/core.vcxproj index c8c3617a3..430513445 100644 --- a/vsprojects/core/core.vcxproj +++ b/vsprojects/core/core.vcxproj @@ -139,12 +139,13 @@ + + - @@ -192,6 +193,7 @@ + diff --git a/vsprojects/core/core.vcxproj.filters b/vsprojects/core/core.vcxproj.filters index ea375fdb1..6fd384b85 100644 --- a/vsprojects/core/core.vcxproj.filters +++ b/vsprojects/core/core.vcxproj.filters @@ -22,9 +22,6 @@ Source Files - - Source Files - Source Files @@ -145,7 +142,15 @@ Source Files - + + Source Files + + + Source Files + + + Source Files + @@ -298,7 +303,12 @@ Header Files - + + Header Files + + + Header Files + diff --git a/vsprojects/gui/gui.vcxproj.filters b/vsprojects/gui/gui.vcxproj.filters index 563743a78..ec2ad6ed2 100644 --- a/vsprojects/gui/gui.vcxproj.filters +++ b/vsprojects/gui/gui.vcxproj.filters @@ -109,8 +109,12 @@ Source Files\widgets - - + + Source Files + + + Source Files + @@ -212,8 +216,12 @@ Header Files\widgets - - + + Header Files + + + Header Files + diff --git a/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj b/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj index dd4fdef08..eee7ad5e8 100644 --- a/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj +++ b/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj @@ -255,6 +255,7 @@ + diff --git a/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj.filters b/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj.filters index 10d27629b..2cca3fd97 100644 --- a/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj.filters +++ b/vsprojects/tests/pcsxrunner/pcsxrunner.vcxproj.filters @@ -36,6 +36,9 @@ Source Files + + Source Files + Source Files