From ce9a48b987a995198dab18727f7d8f55874cf8d2 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 3 Jan 2023 00:51:27 +0100 Subject: [PATCH] PPCRec: Rework CR bit handling CR bits are now resident in registers instead of being baked into the instruction definitions. Same for XER SO, and LWARX reservation EA and value. Reworked LWARX/STWCX, CRxx ops, compare and branch instructions. As well as RC bit handling. Not all CR-related instructions are reimplemented yet. Introduced atomic_cmp_store operation to allow implementing STWCX in architecture agnostic IML Removed legacy CR-based compare and jump operations --- .../Interpreter/PPCInterpreterALU.hpp | 20 +- .../Interpreter/PPCInterpreterInternal.h | 7 +- .../Interpreter/PPCInterpreterLoadStore.hpp | 3 +- .../Interpreter/PPCInterpreterMain.cpp | 16 +- src/Cafe/HW/Espresso/PPCState.h | 4 +- .../Recompiler/BackendX64/BackendX64.cpp | 450 +++-------- .../Recompiler/BackendX64/BackendX64.h | 7 +- .../Recompiler/BackendX64/BackendX64FPU.cpp | 200 ++--- .../Recompiler/BackendX64/x86Emitter.h | 215 +++++ .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 113 +-- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 105 ++- .../Recompiler/IML/IMLInstruction.cpp | 45 +- .../Espresso/Recompiler/IML/IMLInstruction.h | 152 ++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 154 ++-- .../HW/Espresso/Recompiler/IML/IMLSegment.h | 2 +- .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 13 +- .../HW/Espresso/Recompiler/PPCRecompiler.h | 2 +- .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 6 - .../Recompiler/PPCRecompilerImlGen.cpp | 760 ++++++++---------- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 71 +- .../Recompiler/PPCRecompilerIntermediate.cpp | 2 +- 21 files changed, 1115 insertions(+), 1232 deletions(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index fe9316f0..769344f8 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow) { if (hasOverflow) { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; + hCPU->xer_so = 1; + hCPU->xer_ov = 1; } else { - hCPU->spr.XER &= ~XER_OV; + hCPU->xer_ov = 0; } } @@ -246,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode) uint32 a = hCPU->gpr[rA]; uint32 b = hCPU->gpr[rB]; hCPU->gpr[rD] = ~a + b + 1; - // update xer + // update carry if (ppc_carry_3(~a, b, 1)) hCPU->xer_ca = 1; else @@ -848,8 +848,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -871,8 +870,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -895,8 +893,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -919,8 +916,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h index bc8458d9..bac253c4 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h @@ -50,9 +50,9 @@ #define CR_BIT_EQ 2 #define CR_BIT_SO 3 -#define XER_SO (1<<31) // summary overflow bit -#define XER_OV (1<<30) // overflow bit #define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8 +#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO +#define XER_BIT_OV (30) // FPSCR #define FPSCR_VXSNAN (1<<24) @@ -118,7 +118,8 @@ static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r) { - hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0; + cemu_assert_debug(hCPU->xer_so <= 1); + hCPU->cr[CR_BIT_SO] = hCPU->xer_so; hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0); hCPU->cr[CR_BIT_EQ] = (r == 0); hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0 diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp index 694e05e6..26467458 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp @@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode) ppc_setCRBit(hCPU, CR_BIT_GT, 0); ppc_setCRBit(hCPU, CR_BIT_EQ, 1); } - ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0); + cemu_assert_debug(hCPU->xer_so <= 1); + ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so); // remove reservation hCPU->reservedMemAddr = 0; hCPU->reservedMemValue = 0; diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp index ace1601f..08d6765a 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp @@ -63,16 +63,24 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue) uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU) { uint32 xerValue = hCPU->spr.XER; - xerValue &= ~(1<xer_ca ) - xerValue |= (1<xer_ca) + xerValue |= (1 << XER_BIT_CA); + if (hCPU->xer_so) + xerValue |= (1 << XER_BIT_SO); + if (hCPU->xer_ov) + xerValue |= (1 << XER_BIT_OV); return xerValue; } void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v) { hCPU->spr.XER = v; - hCPU->xer_ca = (v>>XER_BIT_CA)&1; + hCPU->xer_ca = (v >> XER_BIT_CA) & 1; + hCPU->xer_so = (v >> XER_BIT_SO) & 1; + hCPU->xer_ov = (v >> XER_BIT_OV) & 1; } uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU) diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index ea7edfa2..8f27ee93 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -49,6 +49,8 @@ struct PPCInterpreter_t uint32 fpscr; uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..) uint8 xer_ca; // carry from xer + uint8 xer_so; + uint8 xer_ov; uint8 LSQE; uint8 PSE; // thread remaining cycles @@ -67,7 +69,7 @@ struct PPCInterpreter_t uint32 reservedMemValue; // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this + uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode struct diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index c8d81fac..ef60c0af 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -28,6 +28,10 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } +static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR64)regId; +} X86Cond _x86Cond(IMLCondition imlCond) { @@ -41,6 +45,10 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_NBE; case IMLCondition::UNSIGNED_LT: return X86_CONDITION_B; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_NLE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_L; default: break; } @@ -88,18 +96,6 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si } } -void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - sint32 crRegister = imlInstruction->crRegister; - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Set CR SO if XER SO bit is set -} - void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) { void* prevRSPTemp = hCPU->rspTemp; @@ -424,7 +420,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 8 ) { - // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( signExtend ) @@ -434,22 +429,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) - { - if( imlInstruction->op_storeLoad.immS32 != 0 ) - assert_dbg(); // not supported - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation - // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); - } else return false; return true; @@ -529,106 +508,62 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - if (imlInstruction->op_storeLoad.immS32 != 0) - assert_dbg(); // todo - // reset cr0 LT, GT and EQ - sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // realRegisterMem now holds EA - x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // EA matches reservation - // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); - // backup REG_RESV_MEMBASE - x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); - // add mem register to REG_RESV_MEMBASE - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); - // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); - // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX); - - x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - - // reset reservation - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); - - // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - // restore REG_RESV_MEMBASE - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); - - // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex()); - } else return false; return true; } +bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut)); + auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA); + auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue); + auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue); + + // make sure non of the regs are in EAX + if (regEA == X86_REG_EAX || + regBoolOut == X86_REG_EAX || + regVal == X86_REG_EAX || + regCmp == X86_REG_EAX) + { + printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n"); + return false; + } + + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc + x64GenContext->emitter->LockPrefix(); + x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut); + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { // registerResult = registerA - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - if(imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) - { - // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } - else - { + if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) - assert_dbg(); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // if movbe is available we can move and swap in a single instruction? x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA @@ -647,7 +582,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -655,7 +589,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if (imlInstruction->operation == PPCREC_IML_OP_NEG) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -663,9 +596,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { @@ -686,47 +617,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerA CMP registerB (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - return false; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else - assert_dbg(); - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); @@ -758,98 +650,50 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - // registerResult = immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { - // registerResult &= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { - // registerResult |= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { - // registerResult ^= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult <<<= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerResult CMP immS32 (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); - return true; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); - // set cr bits - uint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else - assert_dbg(); - // todo: Also set summary overflow if xer bit is set? - } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - for(sint32 f=0; f<32; f++) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - } + __debugbreak(); + //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; + //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //for(sint32 f=0; f<32; f++) + //{ + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); + // x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //} } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; - uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - for (sint32 f = 0; f < 32; f++) - { - if(((crBitMask >> f) & 1) == 0) - continue; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - } + __debugbreak(); + //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; + //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //for (sint32 f = 0; f < 32; f++) + //{ + // if(((crBitMask >> f) & 1) == 0) + // continue; + // x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); + // x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); + // x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); + //} } else { @@ -861,30 +705,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // registerResult = immS32 (conditional) - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } + cemu_assert_unimplemented(); + //if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + //{ + // // registerResult = immS32 (conditional) + // if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) + // { + // assert_dbg(); + // } - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } + // x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); + // uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); + // if (imlInstruction->op_conditional_r_s32.bitMustBeSet) + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // else + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // return true; + //} return false; } bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 @@ -908,7 +751,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { - // registerResult = registerOperand1 - registerOperand2 sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -940,7 +782,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegA = imlInstruction->op_r_r_r.registerA; sint32 rRegB = imlInstruction->op_r_r_r.registerB; @@ -1140,7 +981,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1175,7 +1015,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1310,15 +1149,12 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 regResult = imlInstruction->op_r_r_s32.registerResult; sint32 regOperand = imlInstruction->op_r_r_s32.registerA; uint32 immS32 = imlInstruction->op_r_r_s32.immS32; if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1328,7 +1164,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); @@ -1337,7 +1172,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); if (imlInstruction->operation == PPCREC_IML_OP_AND) @@ -1355,8 +1189,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 me = (vImm>>8)&0xFF; uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); - // save cr - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy rS to temporary register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); // rotate destination register @@ -1434,50 +1266,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) -{ - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) - { - // jump always - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - // generate jump update marker - if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) - { - // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - sint32 condition = imlInstruction->op_conditionalJump.condition; - if( condition == PPCREC_JUMP_CONDITION_E ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - else if( condition == PPCREC_JUMP_CONDITION_NE ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - else - assert_dbg(); - } - else - { - uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - cemu_assert_debug(imlSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); - if( imlInstruction->op_conditionalJump.bitMustBeSet ) - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); - } - else - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - } - } - } - return true; -} - bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { // some tests (all performed on a i7-4790K) @@ -1492,49 +1280,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction return true; } -/* -* PPC condition register operation -*/ -bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) - { - // clear cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); - return true; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - // set cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); - return true; - } - else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC ) - { - x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); - if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - return false; // untested - x64Gen_int3(x64GenContext); - x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement - } - if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC) - x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - else - x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - - x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); - - return true; - } - else - { - assert_dbg(); - } - return false; -} - void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -1567,6 +1312,22 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, { x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + } else assert_dbg(); } @@ -1603,6 +1364,22 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, { x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.registerIndex); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.registerIndex); + } else assert_dbg(); } @@ -1713,13 +1490,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) - { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); @@ -1759,12 +1529,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) + else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { + if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction)) codeGenerationFailed = true; - } } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { @@ -1822,6 +1590,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } else { debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 1683c5b9..066078cb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -62,11 +62,6 @@ enum X86_CONDITION_NONE, // no condition, jump always }; -#define PPCREC_CR_TEMPORARY (8) // never stored -#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) - #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register @@ -86,6 +81,8 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); + // ASM gen void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v); void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 14d05d5a..47312487 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -5,6 +5,31 @@ #include "asm/x64util.h" // for recompiler_fres / frsqrte +uint32 _regF64(IMLReg r) +{ + return (uint32)r; +} + +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +{ + return (x86Assembler64::GPR8_REX)physRegId; +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -690,18 +715,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction { if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // VPUNPCKHQDQ if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) { @@ -725,170 +742,73 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // use unpckhpd here? x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP); } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM ) - { - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM) - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP) - { - // temporarily switch top/bottom of both operands and compare - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - } - } - else - x64Gen_comisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - // todo: handle FPSCR updates - // update cr - sint32 crRegister = imlInstruction->crRegister; - // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -901,7 +821,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -914,7 +833,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -925,7 +843,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -936,7 +853,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // calculate bottom half of result x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) @@ -968,10 +884,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -988,8 +900,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) { - // registerResult(fp0) = registerOperandA(fp0) + registerOperandB(fp0) - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Use AVX 3-operand VADDSD if available if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { @@ -1008,7 +918,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) { // registerResult = registerOperandA - registerOperandB - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1031,7 +940,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1059,8 +967,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc { if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // todo: Investigate if there are other optimizations possible if the operand registers overlap // generic case // 1) move frA bottom to frTemp bottom and top @@ -1074,7 +980,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Investigate if there are other optimizations possible if the operand registers overlap // 1) move frA bottom to frTemp bottom and top x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); @@ -1094,7 +999,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); @@ -1110,7 +1014,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // select bottom x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); @@ -1145,32 +1048,22 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc assert_dbg(); } -/* - * Single FPR operation - */ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // toggle sign bit x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // mask out sign bit x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // set sign bit x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit single x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit double @@ -1178,7 +1071,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit singles x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit doubles @@ -1186,7 +1078,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert bottom to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // copy to top half @@ -1197,3 +1088,44 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, cemu_assert_unimplemented(); } } + +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg8(imlInstruction->op_fpr_compare.regR); + auto regA = _regF64(imlInstruction->op_fpr_compare.regA); + auto regB = _regF64(imlInstruction->op_fpr_compare.regB); + + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); + x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB); + + if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT) + { + // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both) + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR); + return; + } + else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U) + { + // unordered case can be checked via PF + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR); + return; + } + + // remember unordered state + auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP)); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions + + X86Cond x86Cond; + switch (imlInstruction->op_fpr_compare.cond) + { + case IMLCondition::UNORDERED_LT: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR); + break; + case IMLCondition::UNORDERED_EQ: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR); + break; + default: + cemu_assert_unimplemented(); + } + x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h index 6b05a514..eae3835d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -84,6 +84,7 @@ public: using GPR64 = X86Reg; using GPR32 = X86Reg; using GPR8_REX = X86Reg; + void LockPrefix() { _emitU8(0xF0); }; void ADD_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -3194,6 +3195,124 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void XCHG_bb(GPR8_REX dst, GPR8_REX src) + { + if ((dst >= 4) || (src >= 4)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_dd(GPR32 dst, GPR32 src) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } void MOV_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -4032,6 +4151,102 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void CMPXCHG_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void BSWAP_d(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } + void BSWAP_q(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } void BT_du8(GPR32 dst, u8 imm) { if (((dst & 8) != 0)) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index cd40de7f..d24fec87 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -18,7 +18,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) // loops using BDNZ are assumed to always be finite for(const IMLInstruction& instIt : imlSegment->imlList) { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB) { return true; } @@ -92,59 +92,60 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) { - crTracking->readCRBits = 0; - crTracking->writtenCRBits = 0; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - crTracking->readCRBits = (crBitFlag); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - crTracking->readCRBits = crBitFlag; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - { - crTracking->readCRBits = 0xFFFFFFFF; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crA); - crTracking->readCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crB); - crTracking->readCRBits |= crBitFlag; - } - else - assert_dbg(); - } - else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) - { - crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); - } - else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - // overwrites CR0 - crTracking->writtenCRBits |= (0xF << 0); - } + __debugbreak(); + //crTracking->readCRBits = 0; + //crTracking->writtenCRBits = 0; + //if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); + // crTracking->readCRBits = (crBitFlag); + // } + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + //{ + // uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); + // crTracking->readCRBits = crBitFlag; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) + //{ + // crTracking->readCRBits = 0xFFFFFFFF; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) + //{ + // crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CR) + //{ + // if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || + // imlInstruction->operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // } + // else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || + // imlInstruction->operation == PPCREC_IML_OP_CR_ORC || + // imlInstruction->operation == PPCREC_IML_OP_CR_AND || + // imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crA); + // crTracking->readCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crB); + // crTracking->readCRBits |= crBitFlag; + // } + // else + // assert_dbg(); + //} + //else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) + //{ + // crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); + //} + //else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) + //{ + // // overwrites CR0 + // crTracking->writtenCRBits |= (0xF << 0); + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 4dafaf18..2fbf2b6f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -206,6 +206,18 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); } + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); else strOutput.add("ukn"); strOutput.add(")"); @@ -217,11 +229,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R) { @@ -231,10 +238,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -274,9 +277,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); - if(!inst.op_conditionalJump2.mustBeTrue) + if (!inst.op_conditionalJump2.mustBeTrue) strOutput.add("(inverted)"); } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); @@ -286,11 +293,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA); IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) { @@ -311,55 +313,42 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex); IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - { - if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); + else + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); - - if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); - else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - } - else if (inst.type == PPCREC_IML_TYPE_CJUMP) - { - if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) - strOutput.add("JE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) - strOutput.add("JNE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) - strOutput.add("JG"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) - strOutput.add("JGE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) - strOutput.add("JL"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) - strOutput.add("JLE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) - strOutput.add("JALW"); // jump always - else - cemu_assert_unimplemented(); - strOutput.addFmt(" (cr{})", inst.crRegister); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); } else if (inst.type == PPCREC_IML_TYPE_NO_OP) { @@ -487,10 +476,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); strOutput.add(" (conditional)"); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> and update CR{}", inst.crRegister); - } } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 52e19e8c..b7e2294c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -154,7 +154,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // carry is always written registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers } @@ -222,9 +222,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) registersUsed->readNamedReg3 = op_storeLoad.registerMem2; } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + registersUsed->readNamedReg1 = op_atomic_compare_store.regEA; + registersUsed->readNamedReg2 = op_atomic_compare_store.regCompareValue; + registersUsed->readNamedReg3 = op_atomic_compare_store.regWriteValue; + registersUsed->writtenNamedReg1 = op_atomic_compare_store.regBoolOut; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -467,6 +470,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else cemu_assert_unimplemented(); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + registersUsed->writtenNamedReg1 = op_fpr_compare.regR; + registersUsed->readFPR1 = op_fpr_compare.regA; + registersUsed->readFPR2 = op_fpr_compare.regB; + } else { cemu_assert_unimplemented(); @@ -560,7 +569,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl { op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { // no effect on registers } @@ -613,9 +622,12 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable); + op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable); + op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable); + op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -689,6 +701,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl else if (type == PPCREC_IML_TYPE_FPR_R) { } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable); + } else { cemu_assert_unimplemented(); @@ -725,7 +741,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -753,9 +769,9 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -803,6 +819,11 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); + } else { cemu_assert_unimplemented(); @@ -839,7 +860,7 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -867,9 +888,9 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 9491136e..08955b39 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -29,13 +29,6 @@ enum PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 PPCREC_IML_OP_MFCR, // copy cr to gpr PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) - // condition register - PPCREC_IML_OP_CR_CLEAR, // clear cr bit - PPCREC_IML_OP_CR_SET, // set cr bit - PPCREC_IML_OP_CR_OR, // OR cr bits - PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first - PPCREC_IML_OP_CR_AND, // AND cr bits - PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -54,9 +47,9 @@ enum PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_TOP, + PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated PPCREC_IML_OP_FPR_NEGATE_BOTTOM, PPCREC_IML_OP_FPR_NEGATE_PAIR, PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) @@ -111,21 +104,6 @@ enum PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak }; -enum // deprecated condition codes -{ - PPCREC_JUMP_CONDITION_NONE, - PPCREC_JUMP_CONDITION_E, // equal / zero - PPCREC_JUMP_CONDITION_NE, // not equal / not zero - PPCREC_JUMP_CONDITION_LE, // less or equal - PPCREC_JUMP_CONDITION_L, // less - PPCREC_JUMP_CONDITION_GE, // greater or equal - PPCREC_JUMP_CONDITION_G, // greater - // special case: - PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling - PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow - -}; - enum class IMLCondition : uint8 { EQ, @@ -137,14 +115,17 @@ enum class IMLCondition : uint8 SIGNED_OVERFLOW, SIGNED_NOVERFLOW, -}; -enum -{ - PPCREC_CR_MODE_COMPARE_SIGNED, - PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare + // floating point conditions + UNORDERED_GT, // a > b, false if either is NaN + UNORDERED_LT, // a < b, false if either is NaN + UNORDERED_EQ, // a == b, false if either is NaN + UNORDERED_U, // unordered (true if either operand is NaN) - PPCREC_CR_MODE_LOGICAL, + ORDERED_GT, + ORDERED_LT, + ORDERED_EQ, + ORDERED_U }; enum @@ -164,18 +145,20 @@ enum PPCREC_IML_TYPE_NAME_R, // name* = r* PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, - PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 - PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) - // new style of handling conditions and branches: + // conditions and branches PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm - PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition - PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register + PPCREC_IML_TYPE_JUMP, // jump always + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register - // conditional + // atomic + PPCREC_IML_TYPE_ATOMIC_CMP_STORE, + + // conditional (legacy) PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // FPR PPCREC_IML_TYPE_FPR_R_NAME, // name = f* PPCREC_IML_TYPE_FPR_NAME_R, // f* = name @@ -187,6 +170,8 @@ enum PPCREC_IML_TYPE_FPR_R_R_R, PPCREC_IML_TYPE_FPR_R_R_R_R, PPCREC_IML_TYPE_FPR_R, + + PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* }; enum @@ -197,15 +182,18 @@ enum PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 - PPCREC_NAME_XER_CA = 6000, // carry bit + PPCREC_NAME_XER_CA = 6000, // carry bit from XER + PPCREC_NAME_XER_OV = 6001, // overflow bit from XER + PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER + PPCREC_NAME_CR = 7000, // CR register bits (31 to 0) + PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31, + PPCREC_NAME_CPU_MEMRES_EA = 8000, + PPCREC_NAME_CPU_MEMRES_VAL = 8001 }; -// special cases for LOAD/STORE -#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) -#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) - -#define PPC_REC_INVALID_REGISTER 0xFF +#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead +// deprecated, use Espresso namespace #define PPCREC_CR_BIT_LT 0 #define PPCREC_CR_BIT_GT 1 #define PPCREC_CR_BIT_EQ 2 @@ -337,13 +325,12 @@ struct IMLUsedRegisters using IMLReg = uint8; +inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; + struct IMLInstruction { uint8 type; uint8 operation; - uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. - uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior - uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated union { struct @@ -352,13 +339,11 @@ struct IMLInstruction }padding; struct { - // R (op) A [update cr* in mode *] uint8 registerResult; uint8 registerA; }op_r_r; struct { - // R = A (op) B [update cr* in mode *] uint8 registerResult; uint8 registerA; uint8 registerB; @@ -385,13 +370,11 @@ struct IMLInstruction }op_r_r_s32_carry; struct { - // R/F = NAME or NAME = R/F uint8 registerIndex; uint32 name; - }op_r_name; + }op_r_name; // alias op_name_r struct { - // R (op) s32 [update cr* in mode *] uint8 registerIndex; sint32 immS32; }op_r_immS32; @@ -402,13 +385,6 @@ struct IMLInstruction uint16 paramU16; }op_macro; struct - { - uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditionalJump; // legacy jump - struct { uint8 registerData; uint8 registerMem; @@ -450,6 +426,13 @@ struct IMLInstruction uint8 registerResult; }op_fpr_r; struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; + IMLCondition cond; + }op_fpr_compare; + struct { uint8 crD; // crBitIndex (result) uint8 crA; // crBitIndex @@ -474,6 +457,13 @@ struct IMLInstruction uint8 registerBool; bool mustBeTrue; }op_conditionalJump2; + struct + { + IMLReg regEA; + IMLReg regCompareValue; + IMLReg regWriteValue; + IMLReg regBoolOut; // boolean 0/1 + }op_atomic_compare_store; // conditional operations (emitted if supported by target platform) struct { @@ -495,7 +485,6 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || - type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) @@ -508,8 +497,6 @@ struct IMLInstruction { type = PPCREC_IML_TYPE_NO_OP; operation = 0; - crRegister = PPC_REC_INVALID_REGISTER; - crMode = 0; } void make_debugbreak(uint32 currentPPCAddress = 0) @@ -530,7 +517,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; } @@ -539,8 +525,6 @@ struct IMLInstruction // operation with two register operands (e.g. "t0 = t1") this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r.registerResult = registerResult; this->op_r_r.registerA = registerA; } @@ -550,8 +534,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_immS32.registerIndex = registerIndex; this->op_r_immS32.immS32 = immS32; } @@ -561,8 +543,6 @@ struct IMLInstruction // operation with three register operands (e.g. "t0 = t1 + t4") this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_r.registerResult = registerResult; this->op_r_r_r.registerA = registerA; this->op_r_r_r.registerB = registerB; @@ -572,8 +552,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_R_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_r_carry.regR = registerResult; this->op_r_r_r_carry.regA = registerA; this->op_r_r_r_carry.regB = registerB; @@ -585,8 +563,6 @@ struct IMLInstruction // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_s32.registerResult = registerResult; this->op_r_r_s32.registerA = registerA; this->op_r_r_s32.immS32 = immS32; @@ -596,8 +572,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_s32_carry.regR = registerResult; this->op_r_r_s32_carry.regA = registerA; this->op_r_r_s32_carry.immS32 = immS32; @@ -608,8 +582,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare.registerResult = registerResult; this->op_compare.registerOperandA = registerA; this->op_compare.registerOperandB = registerB; @@ -620,8 +592,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE_S32; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare_s32.registerResult = registerResult; this->op_compare_s32.registerOperandA = registerA; this->op_compare_s32.immS32 = immS32; @@ -632,8 +602,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_conditionalJump2.registerBool = registerBool; this->op_conditionalJump2.mustBeTrue = mustBeTrue; } @@ -642,8 +610,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; } // load from memory @@ -651,7 +617,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_LOAD; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerDestination; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -665,7 +630,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_STORE; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerSource; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -674,6 +638,26 @@ struct IMLInstruction this->op_storeLoad.flags2.signExtend = false; } + void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput) + { + this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE; + this->operation = 0; + this->op_atomic_compare_store.regEA = regEA; + this->op_atomic_compare_store.regCompareValue = regCompareValue; + this->op_atomic_compare_store.regWriteValue = regWriteValue; + this->op_atomic_compare_store.regBoolOut = regSuccessOutput; + } + + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_FPR_COMPARE; + this->operation = -999; + this->op_fpr_compare.regR = regR; + this->op_fpr_compare.regA = regA; + this->op_fpr_compare.regB = regB; + this->op_fpr_compare.cond = cond; + } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index f67b49e1..a1569d33 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -511,6 +511,8 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, I */ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { + __debugbreak(); // deprecated + if (imlSegment->nextSegmentIsUncertain) { return 0; @@ -535,81 +537,83 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IM void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for(IMLInstruction& instIt : segIt->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_CJUMP) - { - if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - } - else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) - { - segIt->crBitsRead |= 0xFFFFFFFF; - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) - { - segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); - } - else if( instIt.type == PPCREC_IML_TYPE_CR ) - { - if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || - instIt.operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - } - else if (instIt.operation == PPCREC_IML_OP_CR_OR || - instIt.operation == PPCREC_IML_OP_CR_ORC || - instIt.operation == PPCREC_IML_OP_CR_AND || - instIt.operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - crBitFlag = 1 << (instIt.op_cr.crA); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - crBitFlag = 1 << (instIt.op_cr.crB); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - } - else - cemu_assert_unimplemented(); - } - else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); - } - else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) - { - // overwrites CR0 - segIt->crBitsWritten |= (0xF<<0); - } - } - } - // flag instructions that write to CR where we can ignore individual CR bits - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for (IMLInstruction& instIt : segIt->imlList) - { - if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); - uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; - instIt.crIgnoreMask = crIgnoreMask; - } - } - } + __debugbreak(); // deprecated + + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for(IMLInstruction& instIt : segIt->imlList) + // { + // if (instIt.type == PPCREC_IML_TYPE_CJUMP) + // { + // if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // } + // else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) + // { + // segIt->crBitsRead |= 0xFFFFFFFF; + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) + // { + // segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); + // } + // else if( instIt.type == PPCREC_IML_TYPE_CR ) + // { + // if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || + // instIt.operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // } + // else if (instIt.operation == PPCREC_IML_OP_CR_OR || + // instIt.operation == PPCREC_IML_OP_CR_ORC || + // instIt.operation == PPCREC_IML_OP_CR_AND || + // instIt.operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // crBitFlag = 1 << (instIt.op_cr.crA); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // crBitFlag = 1 << (instIt.op_cr.crB); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // } + // else + // cemu_assert_unimplemented(); + // } + // else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); + // } + // else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) + // { + // // overwrites CR0 + // segIt->crBitsWritten |= (0xF<<0); + // } + // } + //} + //// flag instructions that write to CR where we can ignore individual CR bits + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for (IMLInstruction& instIt : segIt->imlList) + // { + // if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); + // uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); + // uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; + // instIt.crIgnoreMask = crIgnoreMask; + // } + // } + //} } //bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index add7098e..8ef0669e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,7 +1,7 @@ #pragma once #include "IMLInstruction.h" -#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic +#define IML_RA_VIRT_REG_COUNT_MAX (40 + 32) // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic struct IMLSegmentPoint { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index f74cd225..dd445b2c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -186,6 +186,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x30DF5F8) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + //if (ppcRecFunc->ppcAddress == 0x11223344) //{ // //debug_printf("----------------------------------------\n"); @@ -302,9 +310,8 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); - // remove redundant name load and store instructions - PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); + //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); + //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 5a4484da..c80fad8d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -6,7 +6,7 @@ #define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1)) -#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) +#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) struct ppcRecRange_t { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 8377671a..d1475ffe 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -23,13 +23,7 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); - - -// IML instruction generation (new style, can generate new instructions but also overwrite existing ones) - void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 435a5a7e..2a1f2c71 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -17,8 +17,7 @@ struct PPCBasicBlockInfo uint32 startAddress; uint32 lastAddress; // inclusive bool isEnterable{ false }; - //uint32 enterableAddress{}; -> covered by startAddress - bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasContinuedFlow{ true }; // non-branch path goes to next segment, assumed by default bool hasBranchTarget{ false }; uint32 branchTarget{}; @@ -52,7 +51,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext { IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); memset(&inst, 0x00, sizeof(IMLInstruction)); - inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default return &inst; } @@ -82,7 +80,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte memset(imlInstruction, 0, sizeof(IMLInstruction)); imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // r_s32 operation imlInstruction->op_conditional_r_s32.registerIndex = registerIndex; imlInstruction->op_conditional_r_s32.immS32 = immS32; @@ -92,48 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; } - -// jump based on segment branches -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) -{ - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) -{ - // multiple variations: - // operation involving only one cr bit (like clear crD bit) - // operation involving three cr bits (like crD = crA or crB) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CR; - imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; - imlInstruction->op_cr.crD = crD; - imlInstruction->op_cr.crA = crA; - imlInstruction->op_cr.crB = crB; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); @@ -145,7 +100,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -165,7 +119,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -303,6 +256,13 @@ uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGen return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); } +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) +{ + cemu_assert_debug(crReg < 8); + cemu_assert_debug(crBit < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + (crReg * 4) + (crBit)); +} + /* * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register @@ -408,7 +368,18 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin // for handling RC bit of many instructions void PPCImlGen_UpdateCR0Logical(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + // todo - SO bit? + + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegGT, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegEQ, IMLCondition::EQ); + + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER + + //ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); } void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -494,71 +465,82 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 rD, rA, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); - return true; + printf("MFCR: Not implemented\n"); + return false; + + //sint32 rD, rA, rB; + //PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); + //return true; } bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - uint32 rS; - uint32 crMask; - PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); + printf("MTCRF: Not implemented\n"); + return false; + + //uint32 rS; + //uint32 crMask; + //PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); + //return true; +} + +void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA, rB; + PPC_OPC_TEMPL_X(opcode, cr, rA, rB); + cr >>= 2; + + IMLReg gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); +} + +bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA; + uint32 imm; + if (isUnsigned) + { + PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); + } + cr >>= 2; + + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); + return true; } -void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - -void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); - cr >>= 2; - sint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); - cr >>= 2; - uint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { uint32 li; @@ -575,8 +557,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) return true; } // is jump destination within recompiled function? - if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) + ppcImlGenContext->emitInst().make_jump_new(); else ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; @@ -589,6 +571,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + // decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK) + Espresso::BOField boField(BO); + uint32 crRegister = BI/4; uint32 crBit = BI%4; uint32 jumpCondition = 0; @@ -597,6 +582,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; + IMLReg regCRBit; + if (!ignoreCondition) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 jumpAddressDest = BD; if( (opcode&PPC_OPC_AA) == 0 ) { @@ -605,35 +594,14 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { + if (useDecrementer) + return false; // conditional function calls are not supported if( ignoreCondition == false ) { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } @@ -644,8 +612,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { if( ignoreCondition == false ) return false; // not supported for the moment - uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); + IMLReg tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); @@ -661,34 +629,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } else { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); } else { @@ -713,6 +657,10 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crRegister = BI/4; uint32 crBit = BI%4; + IMLReg regCRBit; + if (!BO.conditionIgnore()) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); if (LK) { @@ -738,39 +686,9 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // no decrementer but CR check cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - // generate jump condition - uint32 jumpCondition = 0; - if (!BO.conditionInverted()) - { - // CR bit must be set - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - - // write the dynamic branch instruction to a new segment that is set as a branch target for the current segment PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, !BO.conditionInverted()); - - + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, !BO.conditionInverted()); bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); } else @@ -1706,31 +1624,12 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } -bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_LOAD_LWARX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_LOAD_LWARX_MARKER, false, true); - return true; -} - void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm; + cemu_assert_debug(rA != 0); sint32 index = 0; while( rD <= 31 ) { @@ -1935,22 +1834,6 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte return true; } -bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_STORE_STWCX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_STORE_STWCX_MARKER, true); - return true; -} - bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rS, rB; @@ -1972,6 +1855,7 @@ void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); + cemu_assert_debug(rA != 0); sint32 index = 0; while( rS <= 31 ) { @@ -2063,6 +1947,86 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } +bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rD, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // calculate EA + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + // load word + ppcImlGenContext->emitInst().make_r_memory(regD, regMemResEA, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResVal, regD); + return true; +} + +bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regData = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg regTmpDataBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + // calculate EA + IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); + // get CR bit regs and set LT, GT and SO immediately + IMLReg regCrLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); + IMLReg regCrGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); + IMLReg regCrEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); + IMLReg regCrSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regXerSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO); + // get regs for reservation address and value + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // compare calculated EA with reservation + IMLReg regTmpBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_conditional_jump_new(regTmpBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken, EA matching */ + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpDataBE, regData); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpCompareBE, regMemResVal); + ppcImlGenContext->emitInst().make_atomic_cmp_store(regMemResEA, regTmpCompareBE, regTmpDataBE, regCrEQ); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, EA mismatching */ + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrEQ, 0); + } + ); + + // reset reservation + // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required + // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure + // In contrast, "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared + // There may also be differences between individual PPC generations + // In disassembly I have never seen more than one STWCX after each LWARX, which hints at reservation always being cleared or at least the compiler assuming this + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); + + return true; +} + bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rB; @@ -2339,41 +2303,23 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg gprDestReg; if( rS == rB ) { // xor register with itself - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS ^ rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - // rA ^= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } + IMLReg gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg, gprSource2Reg); } + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); return true; } @@ -2427,15 +2373,9 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2443,15 +2383,9 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2459,15 +2393,9 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORI does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2475,22 +2403,19 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORIS does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_OR, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB); return true; } @@ -2498,7 +2423,12 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ORC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); return true; } @@ -2506,7 +2436,10 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_AND, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB); return true; } @@ -2514,7 +2447,12 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ANDC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); return true; } @@ -2522,17 +2460,15 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, clear bit in crD - // PPC's assert() uses this to pass a parameter to OSPanic - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_CLEAR, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0); return true; } - else - { - return false; - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regCrB); return true; } @@ -2540,16 +2476,17 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, set bit in crD - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_SET, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); return true; } - else - { - return false; - } + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp); return true; } @@ -2682,15 +2619,18 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 25, 5)) { case 0: - PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 1: - PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 2: - PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; default: @@ -2843,14 +2783,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) ) + if (!PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; case 10: // CMPLI - PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; break; case 11: // CMPI - PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, false)) + unsupportedInstructionFound = true; break; case 12: // ADDIC if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) @@ -2964,7 +2906,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 30, 10)) { case 0: - PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, false); break; case 4: PPCRecompilerImlGen_TW(ppcImlGenContext, opcode); @@ -3009,7 +2951,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 32: - PPCRecompilerImlGen_CMPL(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, true); // CMPL break; case 40: if (PPCRecompilerImlGen_SUBF(ppcImlGenContext, opcode) == false) @@ -3764,15 +3706,7 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext } // check last instruction of segment IMLInstruction* imlInstruction = segIt->GetLastInstruction(); - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - cemu_assert_debug(segIt->GetBranchTaken()); - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - cemu_assert_debug(segIt->GetBranchNotTaken()); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) { auto macroType = imlInstruction->operation; switch (macroType) @@ -3854,7 +3788,6 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; - seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; seg->imlList[0].op_macro.param = ppcInstructionCount; } @@ -3937,20 +3870,13 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction cemu_assert_debug(seg->GetBranchTaken()); cemu_assert_debug(seg->GetBranchNotTaken()); } - if (inst->type == PPCREC_IML_TYPE_CJUMP) + if (inst->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) { - if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) - { - debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", (int)segIndex); - IMLDebug_Dump(&ppcImlGenContext); - cemu_assert_error(); - } - } - else - { - // proper error checking for branch-always (or branch-never if invert bit is set) + debug_printf("---------------- SegmentDump (Missing branch for conditional jump in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); } } } @@ -3968,90 +3894,90 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction void IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext_t& ppcImlGenContext) { // optimization pass - replace segments with conditional MOVs if possible - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) - continue; // not a branching segment - IMLInstruction* lastInstruction = segIt->GetLastInstruction(); - if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) - continue; - IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; - IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; - if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) - continue; - if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) - continue; - if (conditionalSegment->list_prevSegments.size() != 1) - continue; // the reduced segment must not be the target of any other branch - if (conditionalSegment->isEnterable) - continue; - // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) - bool canReduceSegment = true; - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - continue; - // todo: Register to register copy - canReduceSegment = false; - break; - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) + // continue; // not a branching segment + // IMLInstruction* lastInstruction = segIt->GetLastInstruction(); + // if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) + // continue; + // IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; + // IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; + // if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) + // continue; + // if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) + // continue; + // if (conditionalSegment->list_prevSegments.size() != 1) + // continue; // the reduced segment must not be the target of any other branch + // if (conditionalSegment->isEnterable) + // continue; + // // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) + // bool canReduceSegment = true; + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // continue; + // // todo: Register to register copy + // canReduceSegment = false; + // break; + // } - if (canReduceSegment == false) - continue; + // if (canReduceSegment == false) + // continue; - // remove the branch instruction - uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; - uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; - bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - lastInstruction->make_no_op(); + // // remove the branch instruction + // uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; + // uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; + // bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; + // lastInstruction->make_no_op(); - // append conditional moves based on branch condition - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); - else - assert_dbg(); - } - // update segment links - // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - IMLSegment_RemoveLink(segIt, conditionalSegment); - IMLSegment_RemoveLink(segIt, finalSegment); - IMLSegment_RemoveLink(conditionalSegment, finalSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); - // remove all instructions from conditional segment - conditionalSegment->imlList.clear(); + // // append conditional moves based on branch condition + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); + // else + // assert_dbg(); + // } + // // update segment links + // // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment + // IMLSegment_RemoveLink(segIt, conditionalSegment); + // IMLSegment_RemoveLink(segIt, finalSegment); + // IMLSegment_RemoveLink(conditionalSegment, finalSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); + // // remove all instructions from conditional segment + // conditionalSegment->imlList.clear(); - // if possible, merge imlSegment with finalSegment - if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) - { - // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - IMLSegment_RemoveLink(segIt, finalSegment); - if (finalSegment->nextSegmentBranchNotTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); - } - if (finalSegment->nextSegmentBranchTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchTaken(segIt, tempSegment); - } - // copy IML instructions - cemu_assert_debug(segIt != finalSegment); - for (sint32 f = 0; f < finalSegment->imlList.size(); f++) - { - memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); - } - finalSegment->imlList.clear(); - } + // // if possible, merge imlSegment with finalSegment + // if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) + // { + // // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() + // IMLSegment_RemoveLink(segIt, finalSegment); + // if (finalSegment->nextSegmentBranchNotTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); + // } + // if (finalSegment->nextSegmentBranchTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchTaken(segIt, tempSegment); + // } + // // copy IML instructions + // cemu_assert_debug(segIt != finalSegment); + // for (sint32 f = 0; f < finalSegment->imlList.size(); f++) + // { + // memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); + // } + // finalSegment->imlList.clear(); + // } - // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) - } + // // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) + //} } bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 95cfd176..b8986db4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -1,14 +1,16 @@ +#include "Cafe/HW/Espresso/EspressoISA.h" #include "../Interpreter/PPCInterpreterInternal.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); + void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -23,7 +25,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -39,7 +40,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -54,7 +54,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -73,7 +72,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI imlInstruction->operation = operation; imlInstruction->op_fpr_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r.registerOperand = registerOperand; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r.flags = 0; } @@ -86,7 +84,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp imlInstruction->op_fpr_r_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1; imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r.flags = 0; } @@ -100,7 +97,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA; imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB; imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r_r.flags = 0; } @@ -112,7 +108,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml imlInstruction->type = PPCREC_IML_TYPE_FPR_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r.registerResult = registerResult; - imlInstruction->crRegister = crRegister; } /* @@ -916,12 +911,33 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 crfD, frA, frB; - PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); + printf("FCMPO: Not implemented\n"); + return false; + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo - set fpscr + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + //PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } @@ -930,9 +946,21 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr + return true; } @@ -1837,6 +1865,9 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPO0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; uint32 c=0; frB = (opcode>>11)&0x1F; @@ -1851,6 +1882,9 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; @@ -1863,6 +1897,9 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU1: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 7b4b94fb..61be66aa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -81,7 +81,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0); + entrySegment->imlList.data()[0].make_jump_new(); IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false;