Skip to content

Commit b531aaa

Browse files
committed
SPU optimizer
1 parent 1c08439 commit b531aaa

18 files changed

Lines changed: 2729 additions & 368 deletions

rpcs3/Emu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ target_sources(rpcs3_emu PRIVATE
182182
Cell/SPUCommonRecompiler.cpp
183183
Cell/SPULLVMRecompiler.cpp
184184
Cell/SPUThread.cpp
185+
Cell/spu_optimizer.cpp
185186
Cell/lv2/lv2.cpp
186187
Cell/lv2/sys_bdemu.cpp
187188
Cell/lv2/sys_btsetting.cpp

rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
8181
u8 output[20];
8282

8383
sha1_starts(&ctx);
84-
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
84+
sha1_update(&ctx, reinterpret_cast<const u8*>(func.get_data().data()), func.get_data().size() * 4);
8585
sha1_finish(&ctx, output);
8686

8787
be_t<u64> hash_start;
@@ -167,14 +167,14 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
167167
// Start compilation
168168
m_pos = func.lower_bound;
169169
m_base = func.entry_point;
170-
m_size = ::size32(func.data) * 4;
170+
m_size = ::size32(func.get_data()) * 4;
171171
const u32 start = m_pos;
172172
const u32 end = start + m_size;
173173

174174
// Create block labels
175-
for (u32 i = 0; i < func.data.size(); i++)
175+
for (u32 i = 0; i < func.get_data().size(); i++)
176176
{
177-
if (func.data[i] && m_block_info[i + start / 4])
177+
if (func.get_data()[i] && m_block_info[i + start / 4])
178178
{
179179
instr_labels[i * 4 + start] = c->newLabel();
180180
}
@@ -208,7 +208,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
208208
for (u32 addr = starta, m = 1; addr < enda && m; addr += 4, m <<= 1)
209209
{
210210
// Filter out if out of range, or is a hole
211-
if (addr >= start && addr < end && func.data[(addr - start) / 4])
211+
if (addr >= start && addr < end && func.get_data()[(addr - start) / 4])
212212
{
213213
result |= m;
214214
}
@@ -223,7 +223,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
223223
// Skip holes at the beginning (giga only)
224224
for (u32 j = start; j < end; j += 4)
225225
{
226-
if (!func.data[(j - start) / 4])
226+
if (!func.get_data()[(j - start) / 4])
227227
{
228228
starta += 4;
229229
}
@@ -258,7 +258,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
258258
}
259259
else if (m_size == 8)
260260
{
261-
c->mov(x86::rax, static_cast<u64>(func.data[1]) << 32 | func.data[0]);
261+
c->mov(x86::rax, static_cast<u64>(func.get_data()[1]) << 32 | func.get_data()[0]);
262262
c->cmp(x86::rax, x86::qword_ptr(*ls, *pc0));
263263
c->jnz(label_diff);
264264

@@ -269,7 +269,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
269269
}
270270
else if (m_size == 4)
271271
{
272-
c->cmp(x86::dword_ptr(*ls, *pc0), func.data[0]);
272+
c->cmp(x86::dword_ptr(*ls, *pc0), func.get_data()[0]);
273273
c->jnz(label_diff);
274274

275275
if (utils::has_avx())
@@ -348,7 +348,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
348348

349349
for (u32 i = j; i < j + 64; i += 4)
350350
{
351-
words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
351+
words.push_back(i >= start && i < end ? func.get_data()[(i - start) / 4] : 0);
352352
}
353353

354354
code_off += 64;
@@ -388,7 +388,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
388388

389389
for (u32 i = starta; i < enda; i += 4)
390390
{
391-
words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
391+
words.push_back(i >= start && i < end ? func.get_data()[(i - start) / 4] : 0);
392392
}
393393
}
394394
else if (sizea == 2 && (end - start) <= 32)
@@ -405,7 +405,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
405405

406406
for (u32 i = starta; i < starta + 32; i += 4)
407407
{
408-
words.push_back(i >= start ? func.data[(i - start) / 4] : i + 32 < end ? func.data[(i + 32 - start) / 4] : 0);
408+
words.push_back(i >= start ? func.get_data()[(i - start) / 4] : i + 32 < end ? func.get_data()[(i + 32 - start) / 4] : 0);
409409
}
410410
}
411411
else
@@ -468,7 +468,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
468468

469469
for (u32 i = j; i < j + 32; i += 4)
470470
{
471-
words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
471+
words.push_back(i >= start && i < end ? func.get_data()[(i - start) / 4] : 0);
472472
}
473473

474474
code_off += 32;
@@ -510,7 +510,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
510510

511511
for (u32 i = starta; i < enda; i += 4)
512512
{
513-
words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
513+
words.push_back(i >= start && i < end ? func.get_data()[(i - start) / 4] : 0);
514514
}
515515
}
516516
else if (sizea == 2 && (end - start) <= 32)
@@ -527,7 +527,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
527527

528528
for (u32 i = starta; i < starta + 32; i += 4)
529529
{
530-
words.push_back(i >= start ? func.data[(i - start) / 4] : i + 32 < end ? func.data[(i + 32 - start) / 4] : 0);
530+
words.push_back(i >= start ? func.get_data()[(i - start) / 4] : i + 32 < end ? func.get_data()[(i + 32 - start) / 4] : 0);
531531
}
532532
}
533533
else
@@ -602,7 +602,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
602602

603603
for (u32 i = j; i < j + 32; i += 4)
604604
{
605-
words.push_back(i >= start && i < end ? func.data[(i - start) / 4] : 0);
605+
words.push_back(i >= start && i < end ? func.get_data()[(i - start) / 4] : 0);
606606
}
607607

608608
code_off += 32;
@@ -672,10 +672,10 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
672672
}
673673

674674
// Determine which value will be duplicated at hole positions
675-
const u32 w3 = ::at32(func.data, (j - start + ~static_cast<u32>(std::countl_zero(cmask)) % 4 * 4) / 4);
676-
words.push_back(cmask & 1 ? func.data[(j - start + 0) / 4] : w3);
677-
words.push_back(cmask & 2 ? func.data[(j - start + 4) / 4] : w3);
678-
words.push_back(cmask & 4 ? func.data[(j - start + 8) / 4] : w3);
675+
const u32 w3 = ::at32(func.get_data(), (j - start + ~static_cast<u32>(std::countl_zero(cmask)) % 4 * 4) / 4);
676+
words.push_back(cmask & 1 ? func.get_data()[(j - start + 0) / 4] : w3);
677+
words.push_back(cmask & 2 ? func.get_data()[(j - start + 4) / 4] : w3);
678+
words.push_back(cmask & 4 ? func.get_data()[(j - start + 8) / 4] : w3);
679679
words.push_back(w3);
680680

681681
// PSHUFD immediate table for all possible hole mask values, holes repeat highest valid word
@@ -768,10 +768,10 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
768768
m_pos = -1;
769769
}
770770

771-
for (u32 i = 0; i < func.data.size(); i++)
771+
for (u32 i = 0; i < func.get_data().size(); i++)
772772
{
773773
const u32 pos = start + i * 4;
774-
const u32 op = std::bit_cast<be_t<u32>>(func.data[i]);
774+
const u32 op = std::bit_cast<be_t<u32>>(func.get_data()[i]);
775775

776776
if (!op)
777777
{
@@ -904,7 +904,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
904904
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);
905905

906906
// Rebuild trampoline if necessary
907-
if (!m_spurt->rebuild_ubertrampoline(func.data[0]))
907+
if (!m_spurt->rebuild_ubertrampoline(func.get_data()[0]))
908908
{
909909
return nullptr;
910910
}
@@ -4876,3 +4876,7 @@ void spu_recompiler::FMS(spu_opcode_t op)
48764876
c->subps(va, SPU_OFF_128(gpr, op.rc));
48774877
c->movaps(SPU_OFF_128(gpr, op.rt4), va);
48784878
}
4879+
4880+
void spu_recompiler::RPCS3_OPTIMIZER(spu_opcode_t op)
4881+
{
4882+
}

rpcs3/Emu/Cell/SPUASMJITRecompiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,4 +297,5 @@ class spu_recompiler : public spu_recompiler_base
297297
void FNMS(spu_opcode_t op);
298298
void FMA(spu_opcode_t op);
299299
void FMS(spu_opcode_t op);
300+
void RPCS3_OPTIMIZER(spu_opcode_t op);
300301
};

rpcs3/Emu/Cell/SPUAnalyser.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ struct spu_itype
232232
CLGTHI,
233233
CLGT,
234234
CLGTI, // compare_tag last
235+
236+
RPCS3_OPTIMIZER,
235237
};
236238

237239
using enum type;
@@ -257,7 +259,7 @@ struct spu_itype
257259
// Test for 4-op instruction
258260
friend constexpr bool operator &(type value, quadrop_tag)
259261
{
260-
return value >= MPYA && value <= FMS;
262+
return (value >= MPYA && value <= FMS) || value == RPCS3_OPTIMIZER;
261263
}
262264

263265
// Test for xfloat instruction
@@ -519,6 +521,7 @@ struct spu_iflag
519521
FCMEQ,
520522
FCGT,
521523
FCMGT,
524+
RPCS3_OPTIMIZER,
522525
};
523526

524527
// Enable address-of operator for spu_decoder<>
@@ -732,6 +735,7 @@ struct spu_iname
732735
NAME(FMA);
733736
NAME(FNMS);
734737
NAME(FMS);
738+
NAME(RPCS3_OPTIMIZER);
735739
};
736740

737741
#undef NAME

0 commit comments

Comments
 (0)