diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index fe9316f0..769344f8 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -3,12 +3,12 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow) { if (hasOverflow) { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; + hCPU->xer_so = 1; + hCPU->xer_ov = 1; } else { - hCPU->spr.XER &= ~XER_OV; + hCPU->xer_ov = 0; } } @@ -246,7 +246,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode) uint32 a = hCPU->gpr[rA]; uint32 b = hCPU->gpr[rB]; hCPU->gpr[rD] = ~a + b + 1; - // update xer + // update carry if (ppc_carry_3(~a, b, 1)) hCPU->xer_ca = 1; else @@ -848,8 +848,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -871,8 +870,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -895,8 +893,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -919,8 +916,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h index bc8458d9..bac253c4 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h @@ -50,9 +50,9 @@ #define CR_BIT_EQ 2 #define CR_BIT_SO 3 -#define XER_SO (1<<31) // summary overflow bit -#define XER_OV (1<<30) // overflow bit #define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8 +#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO +#define XER_BIT_OV (30) // FPSCR #define FPSCR_VXSNAN (1<<24) @@ -118,7 +118,8 @@ static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r) { - hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0; + cemu_assert_debug(hCPU->xer_so <= 1); + hCPU->cr[CR_BIT_SO] = hCPU->xer_so; hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0); hCPU->cr[CR_BIT_EQ] = (r == 0); hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0 diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp index 694e05e6..26467458 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp @@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode) ppc_setCRBit(hCPU, CR_BIT_GT, 0); ppc_setCRBit(hCPU, CR_BIT_EQ, 1); } - ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0); + cemu_assert_debug(hCPU->xer_so <= 1); + ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so); // remove reservation hCPU->reservedMemAddr = 0; hCPU->reservedMemValue = 0; diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp index ace1601f..08d6765a 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp @@ -63,16 +63,24 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue) uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU) { uint32 xerValue = hCPU->spr.XER; - xerValue &= ~(1<xer_ca ) - xerValue |= (1<xer_ca) + xerValue |= (1 << XER_BIT_CA); + if (hCPU->xer_so) + xerValue |= (1 << XER_BIT_SO); + if (hCPU->xer_ov) + xerValue |= (1 << XER_BIT_OV); return xerValue; } void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v) { hCPU->spr.XER = v; - hCPU->xer_ca = (v>>XER_BIT_CA)&1; + hCPU->xer_ca = (v >> XER_BIT_CA) & 1; + hCPU->xer_so = (v >> XER_BIT_SO) & 1; + hCPU->xer_ov = (v >> XER_BIT_OV) & 1; } uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU) diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index ea7edfa2..8f27ee93 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -49,6 +49,8 @@ struct PPCInterpreter_t uint32 fpscr; uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..) uint8 xer_ca; // carry from xer + uint8 xer_so; + uint8 xer_ov; uint8 LSQE; uint8 PSE; // thread remaining cycles @@ -67,7 +69,7 @@ struct PPCInterpreter_t uint32 reservedMemValue; // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this + uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode struct diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index c8d81fac..ef60c0af 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -28,6 +28,10 @@ static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) return (x86Assembler64::GPR8_REX)regId; } +static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR64)regId; +} X86Cond _x86Cond(IMLCondition imlCond) { @@ -41,6 +45,10 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_NBE; case IMLCondition::UNSIGNED_LT: return X86_CONDITION_B; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_NLE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_L; default: break; } @@ -88,18 +96,6 @@ void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, si } } -void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - sint32 crRegister = imlInstruction->crRegister; - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Set CR SO if XER SO bit is set -} - void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) { void* prevRSPTemp = hCPU->rspTemp; @@ -424,7 +420,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 8 ) { - // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); if( signExtend ) @@ -434,22 +429,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p if( indexed && realRegisterMem != realRegisterData ) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) - { - if( imlInstruction->op_storeLoad.immS32 != 0 ) - assert_dbg(); // not supported - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation - // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); - } else return false; return true; @@ -529,106 +508,62 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, if (indexed) x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - if (imlInstruction->op_storeLoad.immS32 != 0) - assert_dbg(); // todo - // reset cr0 LT, GT and EQ - sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // realRegisterMem now holds EA - x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // EA matches reservation - // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, X86_REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); - // backup REG_RESV_MEMBASE - x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); - // add mem register to REG_RESV_MEMBASE - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); - // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); - // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, X86_REG_EAX); - - x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - - // reset reservation - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); - - // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - // restore REG_RESV_MEMBASE - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); - - // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex()); - } else return false; return true; } +bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut)); + auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA); + auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue); + auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue); + + // make sure non of the regs are in EAX + if (regEA == X86_REG_EAX || + regBoolOut == X86_REG_EAX || + regVal == X86_REG_EAX || + regCmp == X86_REG_EAX) + { + printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n"); + return false; + } + + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc + x64GenContext->emitter->LockPrefix(); + x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut); + x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) { // registerResult = registerA - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - if(imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) - { - // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } - else - { + if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } } else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) { if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) - assert_dbg(); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // if movbe is available we can move and swap in a single instruction? x64Gen_bswap_reg64Lower32bit(x64GenContext, imlInstruction->op_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA @@ -647,7 +582,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -655,7 +589,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if (imlInstruction->operation == PPCREC_IML_OP_NEG) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); @@ -663,9 +596,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp } else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) { @@ -686,47 +617,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerA CMP registerB (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - return false; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else - assert_dbg(); - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); @@ -758,98 +650,50 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - // registerResult = immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { - // registerResult &= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { - // registerResult |= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { - // registerResult ^= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult <<<= immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen x64Gen_rol_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint8)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerResult CMP immS32 (arithmetic compare) - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); - return true; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); - return false; - } - // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); - // set cr bits - uint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else - assert_dbg(); - // todo: Also set summary overflow if xer bit is set? - } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - for(sint32 f=0; f<32; f++) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - } + __debugbreak(); + //uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; + //x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //for(sint32 f=0; f<32; f++) + //{ + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); + // x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); + //} } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; - uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - for (sint32 f = 0; f < 32; f++) - { - if(((crBitMask >> f) & 1) == 0) - continue; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - } + __debugbreak(); + //uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; + //uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //for (sint32 f = 0; f < 32; f++) + //{ + // if(((crBitMask >> f) & 1) == 0) + // continue; + // x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); + // x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); + // x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); + //} } else { @@ -861,30 +705,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // registerResult = immS32 (conditional) - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } + cemu_assert_unimplemented(); + //if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + //{ + // // registerResult = immS32 (conditional) + // if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) + // { + // assert_dbg(); + // } - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } + // x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); + // uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; + // x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); + // if (imlInstruction->op_conditional_r_s32.bitMustBeSet) + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // else + // x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); + // return true; + //} return false; } bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 @@ -908,7 +751,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { - // registerResult = registerOperand1 - registerOperand2 sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -940,7 +782,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegA = imlInstruction->op_r_r_r.registerA; sint32 rRegB = imlInstruction->op_r_r_r.registerB; @@ -1140,7 +981,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1175,7 +1015,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1310,15 +1149,12 @@ bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 regResult = imlInstruction->op_r_r_s32.registerResult; sint32 regOperand = imlInstruction->op_r_r_s32.registerA; uint32 immS32 = imlInstruction->op_r_r_s32.immS32; if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1328,7 +1164,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); @@ -1337,7 +1172,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_XOR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); if (imlInstruction->operation == PPCREC_IML_OP_AND) @@ -1355,8 +1189,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction uint32 me = (vImm>>8)&0xFF; uint32 sh = (vImm>>16)&0xFF; uint32 mask = ppc_mask(mb, me); - // save cr - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy rS to temporary register x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r_s32.registerA); // rotate destination register @@ -1434,50 +1266,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu return true; } -bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) -{ - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) - { - // jump always - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - cemu_assert_debug(imlSegment->nextSegmentBranchTaken); - // generate jump update marker - if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) - { - // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - sint32 condition = imlInstruction->op_conditionalJump.condition; - if( condition == PPCREC_JUMP_CONDITION_E ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - else if( condition == PPCREC_JUMP_CONDITION_NE ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - else - assert_dbg(); - } - else - { - uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - cemu_assert_debug(imlSegment->GetBranchTaken()); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); - if( imlInstruction->op_conditionalJump.bitMustBeSet ) - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); - } - else - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - } - } - } - return true; -} - bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { // some tests (all performed on a i7-4790K) @@ -1492,49 +1280,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction return true; } -/* -* PPC condition register operation -*/ -bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) -{ - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) - { - // clear cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); - return true; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - // set cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); - return true; - } - else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC ) - { - x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); - if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - return false; // untested - x64Gen_int3(x64GenContext); - x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement - } - if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC) - x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - else - x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - - x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); - - return true; - } - else - { - assert_dbg(); - } - return false; -} - void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -1567,6 +1312,22 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, { x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue)); + } else assert_dbg(); } @@ -1603,6 +1364,22 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, { x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemAddr), imlInstruction->op_r_name.registerIndex); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, reservedMemValue), imlInstruction->op_r_name.registerIndex); + } else assert_dbg(); } @@ -1713,13 +1490,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) - { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) { PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); @@ -1759,12 +1529,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) + else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { + if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction)) codeGenerationFailed = true; - } } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { @@ -1822,6 +1590,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } else { debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 1683c5b9..066078cb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -62,11 +62,6 @@ enum X86_CONDITION_NONE, // no condition, jump always }; -#define PPCREC_CR_TEMPORARY (8) // never stored -#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) - #define PPC_X64_GPR_USABLE_REGISTERS (16-4) #define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register @@ -86,6 +81,8 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); + // ASM gen void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v); void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index 14d05d5a..47312487 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -5,6 +5,31 @@ #include "asm/x64util.h" // for recompiler_fres / frsqrte +uint32 _regF64(IMLReg r) +{ + return (uint32)r; +} + +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +{ + return (x86Assembler64::GPR8_REX)physRegId; +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { uint32 name = imlInstruction->op_r_name.name; @@ -690,18 +715,10 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction { if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // VPUNPCKHQDQ if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) { @@ -725,170 +742,73 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // use unpckhpd here? x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3); _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand); x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); // move to FPR register x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP); } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM ) - { - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM) - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP) - { - // temporarily switch top/bottom of both operands and compare - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - } - } - else - x64Gen_comisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - // todo: handle FPSCR updates - // update cr - sint32 crRegister = imlInstruction->crRegister; - // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -901,7 +821,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // move register to XMM15 x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); @@ -914,7 +833,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -925,7 +843,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) { @@ -936,7 +853,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // calculate bottom half of result x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) @@ -968,10 +884,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti { if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -988,8 +900,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) { - // registerResult(fp0) = registerOperandA(fp0) + registerOperandB(fp0) - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Use AVX 3-operand VADDSD if available if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) { @@ -1008,7 +918,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) { // registerResult = registerOperandA - registerOperandB - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1031,7 +940,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) { x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); @@ -1059,8 +967,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc { if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // todo: Investigate if there are other optimizations possible if the operand registers overlap // generic case // 1) move frA bottom to frTemp bottom and top @@ -1074,7 +980,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // todo: Investigate if there are other optimizations possible if the operand registers overlap // 1) move frA bottom to frTemp bottom and top x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); @@ -1094,7 +999,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); @@ -1110,7 +1014,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // select bottom x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); @@ -1145,32 +1048,22 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc assert_dbg(); } -/* - * Single FPR operation - */ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // toggle sign bit x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // mask out sign bit x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // set sign bit x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit single x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit double @@ -1178,7 +1071,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert to 32bit singles x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // convert back to 64bit doubles @@ -1186,7 +1078,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // convert bottom to 64bit double x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); // copy to top half @@ -1197,3 +1088,44 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, cemu_assert_unimplemented(); } } + +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg8(imlInstruction->op_fpr_compare.regR); + auto regA = _regF64(imlInstruction->op_fpr_compare.regA); + auto regB = _regF64(imlInstruction->op_fpr_compare.regB); + + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); + x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB); + + if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT) + { + // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both) + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR); + return; + } + else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U) + { + // unordered case can be checked via PF + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR); + return; + } + + // remember unordered state + auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP)); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions + + X86Cond x86Cond; + switch (imlInstruction->op_fpr_compare.cond) + { + case IMLCondition::UNORDERED_LT: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR); + break; + case IMLCondition::UNORDERED_EQ: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR); + break; + default: + cemu_assert_unimplemented(); + } + x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h index 6b05a514..eae3835d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -84,6 +84,7 @@ public: using GPR64 = X86Reg; using GPR32 = X86Reg; using GPR8_REX = X86Reg; + void LockPrefix() { _emitU8(0xF0); }; void ADD_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -3194,6 +3195,124 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void XCHG_bb(GPR8_REX dst, GPR8_REX src) + { + if ((dst >= 4) || (src >= 4)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_dd(GPR32 dst, GPR32 src) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } void MOV_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -4032,6 +4151,102 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void CMPXCHG_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void BSWAP_d(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } + void BSWAP_q(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } void BT_du8(GPR32 dst, u8 imm) { if (((dst & 8) != 0)) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index cd40de7f..d24fec87 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -18,7 +18,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) // loops using BDNZ are assumed to always be finite for(const IMLInstruction& instIt : imlSegment->imlList) { - if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB && instIt.crRegister == 8) + if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB) { return true; } @@ -92,59 +92,60 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_t* crTracking) { - crTracking->readCRBits = 0; - crTracking->writtenCRBits = 0; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - crTracking->readCRBits = (crBitFlag); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - crTracking->readCRBits = crBitFlag; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - { - crTracking->readCRBits = 0xFFFFFFFF; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crA); - crTracking->readCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crB); - crTracking->readCRBits |= crBitFlag; - } - else - assert_dbg(); - } - else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) - { - crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); - } - else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - // overwrites CR0 - crTracking->writtenCRBits |= (0xF << 0); - } + __debugbreak(); + //crTracking->readCRBits = 0; + //crTracking->writtenCRBits = 0; + //if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); + // crTracking->readCRBits = (crBitFlag); + // } + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + //{ + // uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); + // crTracking->readCRBits = crBitFlag; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) + //{ + // crTracking->readCRBits = 0xFFFFFFFF; + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) + //{ + // crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + //} + //else if (imlInstruction->type == PPCREC_IML_TYPE_CR) + //{ + // if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || + // imlInstruction->operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // } + // else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || + // imlInstruction->operation == PPCREC_IML_OP_CR_ORC || + // imlInstruction->operation == PPCREC_IML_OP_CR_AND || + // imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); + // crTracking->writtenCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crA); + // crTracking->readCRBits = crBitFlag; + // crBitFlag = 1 << (imlInstruction->op_cr.crB); + // crTracking->readCRBits |= crBitFlag; + // } + // else + // assert_dbg(); + //} + //else if (IMLAnalyzer_CanTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) + //{ + // crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); + //} + //else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) + //{ + // // overwrites CR0 + // crTracking->writtenCRBits |= (0xF << 0); + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 4dafaf18..2fbf2b6f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -206,6 +206,18 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool { strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); } + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); else strOutput.add("ukn"); strOutput.add(")"); @@ -217,11 +229,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.registerA, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R) { @@ -231,10 +238,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerA); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.registerB, true); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) { @@ -274,9 +277,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); - if(!inst.op_conditionalJump2.mustBeTrue) + if (!inst.op_conditionalJump2.mustBeTrue) strOutput.add("(inverted)"); } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); @@ -286,11 +293,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerResult); IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.registerA); IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) { @@ -311,55 +313,42 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.registerIndex); IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); - - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", inst.crRegister); - } } else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - { - if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); - if (inst.op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); + else + strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); while ((sint32)strOutput.getLen() < lineOffsetParameters) strOutput.add(" "); - IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); - - if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.registerMem2); - else - strOutput.addFmt("[t{}+{}]", inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - } - else if (inst.type == PPCREC_IML_TYPE_CJUMP) - { - if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) - strOutput.add("JE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) - strOutput.add("JNE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) - strOutput.add("JG"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) - strOutput.add("JGE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) - strOutput.add("JL"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) - strOutput.add("JLE"); - else if (inst.op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) - strOutput.add("JALW"); // jump always - else - cemu_assert_unimplemented(); - strOutput.addFmt(" (cr{})", inst.crRegister); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); } else if (inst.type == PPCREC_IML_TYPE_NO_OP) { @@ -487,10 +476,6 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32); strOutput.add(" (conditional)"); - if (inst.crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> and update CR{}", inst.crRegister); - } } else { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 52e19e8c..b7e2294c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -154,7 +154,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const // carry is always written registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers } @@ -222,9 +222,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) registersUsed->readNamedReg3 = op_storeLoad.registerMem2; } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + registersUsed->readNamedReg1 = op_atomic_compare_store.regEA; + registersUsed->readNamedReg2 = op_atomic_compare_store.regCompareValue; + registersUsed->readNamedReg3 = op_atomic_compare_store.regWriteValue; + registersUsed->writtenNamedReg1 = op_atomic_compare_store.regBoolOut; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -467,6 +470,12 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else cemu_assert_unimplemented(); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + registersUsed->writtenNamedReg1 = op_fpr_compare.regR; + registersUsed->readFPR1 = op_fpr_compare.regA; + registersUsed->readFPR2 = op_fpr_compare.regB; + } else { cemu_assert_unimplemented(); @@ -560,7 +569,7 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl { op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { // no effect on registers } @@ -613,9 +622,12 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + op_atomic_compare_store.regEA = replaceRegisterMultiple(op_atomic_compare_store.regEA, translationTable); + op_atomic_compare_store.regCompareValue = replaceRegisterMultiple(op_atomic_compare_store.regCompareValue, translationTable); + op_atomic_compare_store.regWriteValue = replaceRegisterMultiple(op_atomic_compare_store.regWriteValue, translationTable); + op_atomic_compare_store.regBoolOut = replaceRegisterMultiple(op_atomic_compare_store.regBoolOut, translationTable); } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -689,6 +701,10 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& transl else if (type == PPCREC_IML_TYPE_FPR_R) { } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regR = replaceRegisterMultiple(op_fpr_compare.regR, translationTable); + } else { cemu_assert_unimplemented(); @@ -725,7 +741,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -753,9 +769,9 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { @@ -803,6 +819,11 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { op_fpr_r.registerResult = replaceRegisterMultiple(op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regA = replaceRegisterMultiple(op_fpr_compare.regA, fprRegisterSearched, fprRegisterReplaced); + op_fpr_compare.regB = replaceRegisterMultiple(op_fpr_compare.regB, fprRegisterSearched, fprRegisterReplaced); + } else { cemu_assert_unimplemented(); @@ -839,7 +860,7 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // not affected } @@ -867,9 +888,9 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_CR) + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - // only affects cr register + ; } else if (type == PPCREC_IML_TYPE_FPR_R_NAME) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 9491136e..08955b39 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -29,13 +29,6 @@ enum PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 PPCREC_IML_OP_MFCR, // copy cr to gpr PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) - // condition register - PPCREC_IML_OP_CR_CLEAR, // clear cr bit - PPCREC_IML_OP_CR_SET, // set cr bit - PPCREC_IML_OP_CR_OR, // OR cr bits - PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first - PPCREC_IML_OP_CR_AND, // AND cr bits - PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first // FPU PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_PAIR, @@ -54,9 +47,9 @@ enum PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_TOP, + PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated + PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated PPCREC_IML_OP_FPR_NEGATE_BOTTOM, PPCREC_IML_OP_FPR_NEGATE_PAIR, PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) @@ -111,21 +104,6 @@ enum PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak }; -enum // deprecated condition codes -{ - PPCREC_JUMP_CONDITION_NONE, - PPCREC_JUMP_CONDITION_E, // equal / zero - PPCREC_JUMP_CONDITION_NE, // not equal / not zero - PPCREC_JUMP_CONDITION_LE, // less or equal - PPCREC_JUMP_CONDITION_L, // less - PPCREC_JUMP_CONDITION_GE, // greater or equal - PPCREC_JUMP_CONDITION_G, // greater - // special case: - PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling - PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow - -}; - enum class IMLCondition : uint8 { EQ, @@ -137,14 +115,17 @@ enum class IMLCondition : uint8 SIGNED_OVERFLOW, SIGNED_NOVERFLOW, -}; -enum -{ - PPCREC_CR_MODE_COMPARE_SIGNED, - PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare + // floating point conditions + UNORDERED_GT, // a > b, false if either is NaN + UNORDERED_LT, // a < b, false if either is NaN + UNORDERED_EQ, // a == b, false if either is NaN + UNORDERED_U, // unordered (true if either operand is NaN) - PPCREC_CR_MODE_LOGICAL, + ORDERED_GT, + ORDERED_LT, + ORDERED_EQ, + ORDERED_U }; enum @@ -164,18 +145,20 @@ enum PPCREC_IML_TYPE_NAME_R, // name* = r* PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, - PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 - PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) - // new style of handling conditions and branches: + // conditions and branches PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm - PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition - PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register + PPCREC_IML_TYPE_JUMP, // jump always + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register - // conditional + // atomic + PPCREC_IML_TYPE_ATOMIC_CMP_STORE, + + // conditional (legacy) PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // FPR PPCREC_IML_TYPE_FPR_R_NAME, // name = f* PPCREC_IML_TYPE_FPR_NAME_R, // f* = name @@ -187,6 +170,8 @@ enum PPCREC_IML_TYPE_FPR_R_R_R, PPCREC_IML_TYPE_FPR_R_R_R_R, PPCREC_IML_TYPE_FPR_R, + + PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* }; enum @@ -197,15 +182,18 @@ enum PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 - PPCREC_NAME_XER_CA = 6000, // carry bit + PPCREC_NAME_XER_CA = 6000, // carry bit from XER + PPCREC_NAME_XER_OV = 6001, // overflow bit from XER + PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER + PPCREC_NAME_CR = 7000, // CR register bits (31 to 0) + PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31, + PPCREC_NAME_CPU_MEMRES_EA = 8000, + PPCREC_NAME_CPU_MEMRES_VAL = 8001 }; -// special cases for LOAD/STORE -#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) -#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) - -#define PPC_REC_INVALID_REGISTER 0xFF +#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead +// deprecated, use Espresso namespace #define PPCREC_CR_BIT_LT 0 #define PPCREC_CR_BIT_GT 1 #define PPCREC_CR_BIT_EQ 2 @@ -337,13 +325,12 @@ struct IMLUsedRegisters using IMLReg = uint8; +inline constexpr IMLReg IMLREG_INVALID = (IMLReg)-1; + struct IMLInstruction { uint8 type; uint8 operation; - uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. - uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior - uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated union { struct @@ -352,13 +339,11 @@ struct IMLInstruction }padding; struct { - // R (op) A [update cr* in mode *] uint8 registerResult; uint8 registerA; }op_r_r; struct { - // R = A (op) B [update cr* in mode *] uint8 registerResult; uint8 registerA; uint8 registerB; @@ -385,13 +370,11 @@ struct IMLInstruction }op_r_r_s32_carry; struct { - // R/F = NAME or NAME = R/F uint8 registerIndex; uint32 name; - }op_r_name; + }op_r_name; // alias op_name_r struct { - // R (op) s32 [update cr* in mode *] uint8 registerIndex; sint32 immS32; }op_r_immS32; @@ -402,13 +385,6 @@ struct IMLInstruction uint16 paramU16; }op_macro; struct - { - uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditionalJump; // legacy jump - struct { uint8 registerData; uint8 registerMem; @@ -450,6 +426,13 @@ struct IMLInstruction uint8 registerResult; }op_fpr_r; struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; + IMLCondition cond; + }op_fpr_compare; + struct { uint8 crD; // crBitIndex (result) uint8 crA; // crBitIndex @@ -474,6 +457,13 @@ struct IMLInstruction uint8 registerBool; bool mustBeTrue; }op_conditionalJump2; + struct + { + IMLReg regEA; + IMLReg regCompareValue; + IMLReg regWriteValue; + IMLReg regBoolOut; // boolean 0/1 + }op_atomic_compare_store; // conditional operations (emitted if supported by target platform) struct { @@ -495,7 +485,6 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || - type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) @@ -508,8 +497,6 @@ struct IMLInstruction { type = PPCREC_IML_TYPE_NO_OP; operation = 0; - crRegister = PPC_REC_INVALID_REGISTER; - crMode = 0; } void make_debugbreak(uint32 currentPPCAddress = 0) @@ -530,7 +517,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; } @@ -539,8 +525,6 @@ struct IMLInstruction // operation with two register operands (e.g. "t0 = t1") this->type = PPCREC_IML_TYPE_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r.registerResult = registerResult; this->op_r_r.registerA = registerA; } @@ -550,8 +534,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_immS32.registerIndex = registerIndex; this->op_r_immS32.immS32 = immS32; } @@ -561,8 +543,6 @@ struct IMLInstruction // operation with three register operands (e.g. "t0 = t1 + t4") this->type = PPCREC_IML_TYPE_R_R_R; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_r.registerResult = registerResult; this->op_r_r_r.registerA = registerA; this->op_r_r_r.registerB = registerB; @@ -572,8 +552,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_R_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_r_carry.regR = registerResult; this->op_r_r_r_carry.regA = registerA; this->op_r_r_r_carry.regB = registerB; @@ -585,8 +563,6 @@ struct IMLInstruction // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") this->type = PPCREC_IML_TYPE_R_R_S32; this->operation = operation; - this->crRegister = crRegister; - this->crMode = crMode; this->op_r_r_s32.registerResult = registerResult; this->op_r_r_s32.registerA = registerA; this->op_r_r_s32.immS32 = immS32; @@ -596,8 +572,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; this->operation = operation; - this->crRegister = 0xFF; - this->crMode = 0xFF; this->op_r_r_s32_carry.regR = registerResult; this->op_r_r_s32_carry.regA = registerA; this->op_r_r_s32_carry.immS32 = immS32; @@ -608,8 +582,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare.registerResult = registerResult; this->op_compare.registerOperandA = registerA; this->op_compare.registerOperandB = registerB; @@ -620,8 +592,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_COMPARE_S32; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_compare_s32.registerResult = registerResult; this->op_compare_s32.registerOperandA = registerA; this->op_compare_s32.immS32 = immS32; @@ -632,8 +602,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; this->op_conditionalJump2.registerBool = registerBool; this->op_conditionalJump2.mustBeTrue = mustBeTrue; } @@ -642,8 +610,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_JUMP; this->operation = -999; - this->crRegister = PPC_REC_INVALID_REGISTER; - this->crMode = 0; } // load from memory @@ -651,7 +617,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_LOAD; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerDestination; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -665,7 +630,6 @@ struct IMLInstruction { this->type = PPCREC_IML_TYPE_STORE; this->operation = 0; - this->crRegister = PPC_REC_INVALID_REGISTER; this->op_storeLoad.registerData = registerSource; this->op_storeLoad.registerMem = registerMemory; this->op_storeLoad.immS32 = immS32; @@ -674,6 +638,26 @@ struct IMLInstruction this->op_storeLoad.flags2.signExtend = false; } + void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput) + { + this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE; + this->operation = 0; + this->op_atomic_compare_store.regEA = regEA; + this->op_atomic_compare_store.regCompareValue = regCompareValue; + this->op_atomic_compare_store.regWriteValue = regWriteValue; + this->op_atomic_compare_store.regBoolOut = regSuccessOutput; + } + + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_FPR_COMPARE; + this->operation = -999; + this->op_fpr_compare.regR = regR; + this->op_fpr_compare.regA = regA; + this->op_fpr_compare.regB = regB; + this->op_fpr_compare.cond = cond; + } + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index f67b49e1..a1569d33 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -511,6 +511,8 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, I */ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { + __debugbreak(); // deprecated + if (imlSegment->nextSegmentIsUncertain) { return 0; @@ -535,81 +537,83 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, IM void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for(IMLInstruction& instIt : segIt->imlList) - { - if (instIt.type == PPCREC_IML_TYPE_CJUMP) - { - if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - } - else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); - segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written - segIt->crBitsRead |= (crBitFlag); - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) - { - segIt->crBitsRead |= 0xFFFFFFFF; - } - else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) - { - segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); - } - else if( instIt.type == PPCREC_IML_TYPE_CR ) - { - if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || - instIt.operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - } - else if (instIt.operation == PPCREC_IML_OP_CR_OR || - instIt.operation == PPCREC_IML_OP_CR_ORC || - instIt.operation == PPCREC_IML_OP_CR_AND || - instIt.operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (instIt.op_cr.crD); - segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); - crBitFlag = 1 << (instIt.op_cr.crA); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - crBitFlag = 1 << (instIt.op_cr.crB); - segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); - } - else - cemu_assert_unimplemented(); - } - else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); - } - else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) - { - // overwrites CR0 - segIt->crBitsWritten |= (0xF<<0); - } - } - } - // flag instructions that write to CR where we can ignore individual CR bits - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - for (IMLInstruction& instIt : segIt->imlList) - { - if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) - { - uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); - uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; - instIt.crIgnoreMask = crIgnoreMask; - } - } - } + __debugbreak(); // deprecated + + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for(IMLInstruction& instIt : segIt->imlList) + // { + // if (instIt.type == PPCREC_IML_TYPE_CJUMP) + // { + // if (instIt.op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditionalJump.crRegisterIndex * 4 + instIt.op_conditionalJump.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // } + // else if (instIt.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) + // { + // uint32 crBitFlag = 1 << (instIt.op_conditional_r_s32.crRegisterIndex * 4 + instIt.op_conditional_r_s32.crBitIndex); + // segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + // segIt->crBitsRead |= (crBitFlag); + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MFCR) + // { + // segIt->crBitsRead |= 0xFFFFFFFF; + // } + // else if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_MTCRF) + // { + // segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)instIt.op_r_immS32.immS32); + // } + // else if( instIt.type == PPCREC_IML_TYPE_CR ) + // { + // if (instIt.operation == PPCREC_IML_OP_CR_CLEAR || + // instIt.operation == PPCREC_IML_OP_CR_SET) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // } + // else if (instIt.operation == PPCREC_IML_OP_CR_OR || + // instIt.operation == PPCREC_IML_OP_CR_ORC || + // instIt.operation == PPCREC_IML_OP_CR_AND || + // instIt.operation == PPCREC_IML_OP_CR_ANDC) + // { + // uint32 crBitFlag = 1 << (instIt.op_cr.crD); + // segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); + // crBitFlag = 1 << (instIt.op_cr.crA); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // crBitFlag = 1 << (instIt.op_cr.crB); + // segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); + // } + // else + // cemu_assert_unimplemented(); + // } + // else if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // segIt->crBitsWritten |= (0xF<<(instIt.crRegister*4)); + // } + // else if( (instIt.type == PPCREC_IML_TYPE_STORE || instIt.type == PPCREC_IML_TYPE_STORE_INDEXED) && instIt.op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) + // { + // // overwrites CR0 + // segIt->crBitsWritten |= (0xF<<0); + // } + // } + //} + //// flag instructions that write to CR where we can ignore individual CR bits + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // for (IMLInstruction& instIt : segIt->imlList) + // { + // if (IMLAnalyzer_CanTypeWriteCR(&instIt) && instIt.crRegister >= 0 && instIt.crRegister <= 7) + // { + // uint32 crBitFlags = 0xF<<((uint32)instIt.crRegister*4); + // uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); + // uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; + // instIt.crIgnoreMask = crIgnoreMask; + // } + // } + //} } //bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index add7098e..8ef0669e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -1,7 +1,7 @@ #pragma once #include "IMLInstruction.h" -#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic +#define IML_RA_VIRT_REG_COUNT_MAX (40 + 32) // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic struct IMLSegmentPoint { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index f74cd225..dd445b2c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -186,6 +186,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x30DF5F8) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + //if (ppcRecFunc->ppcAddress == 0x11223344) //{ // //debug_printf("----------------------------------------\n"); @@ -302,9 +310,8 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); - // remove redundant name load and store instructions - PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); + //PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); + //PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 5a4484da..c80fad8d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -6,7 +6,7 @@ #define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1)) -#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) +#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) struct ppcRecRange_t { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 8377671a..d1475ffe 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -23,13 +23,7 @@ uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); - - -// IML instruction generation (new style, can generate new instructions but also overwrite existing ones) - void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); // IML generation - FPU diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 435a5a7e..2a1f2c71 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -17,8 +17,7 @@ struct PPCBasicBlockInfo uint32 startAddress; uint32 lastAddress; // inclusive bool isEnterable{ false }; - //uint32 enterableAddress{}; -> covered by startAddress - bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasContinuedFlow{ true }; // non-branch path goes to next segment, assumed by default bool hasBranchTarget{ false }; uint32 branchTarget{}; @@ -52,7 +51,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext { IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); memset(&inst, 0x00, sizeof(IMLInstruction)); - inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default return &inst; } @@ -82,7 +80,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte memset(imlInstruction, 0, sizeof(IMLInstruction)); imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // r_s32 operation imlInstruction->op_conditional_r_s32.registerIndex = registerIndex; imlInstruction->op_conditional_r_s32.immS32 = immS32; @@ -92,48 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; } - -// jump based on segment branches -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction) -{ - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) -{ - // multiple variations: - // operation involving only one cr bit (like clear crD bit) - // operation involving three cr bits (like crD = crA or crB) - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CR; - imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; - imlInstruction->op_cr.crD = crD; - imlInstruction->op_cr.crA = crA; - imlInstruction->op_cr.crB = crB; -} - void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { ppcImlGenContext->emitInst().make_r_memory(registerDestination, registerMemory, immS32, copyWidth, signExtend, switchEndian); @@ -145,7 +100,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -165,7 +119,6 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; @@ -303,6 +256,13 @@ uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGen return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); } +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) +{ + cemu_assert_debug(crReg < 8); + cemu_assert_debug(crBit < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + (crReg * 4) + (crBit)); +} + /* * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register @@ -408,7 +368,18 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin // for handling RC bit of many instructions void PPCImlGen_UpdateCR0Logical(ppcImlGenContext_t* ppcImlGenContext, uint32 registerR) { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + // todo - SO bit? + + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegLT, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegGT, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(registerR, 0, crBitRegEQ, IMLCondition::EQ); + + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER + + //ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); } void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -494,71 +465,82 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 rD, rA, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); - return true; + printf("MFCR: Not implemented\n"); + return false; + + //sint32 rD, rA, rB; + //PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); + //return true; } bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - uint32 rS; - uint32 crMask; - PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); + printf("MTCRF: Not implemented\n"); + return false; + + //uint32 rS; + //uint32 crMask; + //PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + //uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); + //return true; +} + +void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA, rB; + PPC_OPC_TEMPL_X(opcode, cr, rA, rB); + cr >>= 2; + + IMLReg gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); +} + +bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA; + uint32 imm; + if (isUnsigned) + { + PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); + } + cr >>= 2; + + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); + return true; } -void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - -void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); - cr >>= 2; - sint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); - cr >>= 2; - uint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { uint32 li; @@ -575,8 +557,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) return true; } // is jump destination within recompiled function? - if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) + ppcImlGenContext->emitInst().make_jump_new(); else ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; @@ -589,6 +571,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + // decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK) + Espresso::BOField boField(BO); + uint32 crRegister = BI/4; uint32 crBit = BI%4; uint32 jumpCondition = 0; @@ -597,6 +582,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; + IMLReg regCRBit; + if (!ignoreCondition) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 jumpAddressDest = BD; if( (opcode&PPC_OPC_AA) == 0 ) { @@ -605,35 +594,14 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { + if (useDecrementer) + return false; // conditional function calls are not supported if( ignoreCondition == false ) { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } @@ -644,8 +612,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { if( ignoreCondition == false ) return false; // not supported for the moment - uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); + IMLReg tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); @@ -661,34 +629,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) } else { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, conditionMustBeTrue); } else { @@ -713,6 +657,10 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crRegister = BI/4; uint32 crBit = BI%4; + IMLReg regCRBit; + if (!BO.conditionIgnore()) + regCRBit = _GetCRReg(ppcImlGenContext, crRegister, crBit); + uint32 branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); if (LK) { @@ -738,39 +686,9 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // no decrementer but CR check cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); - // generate jump condition - uint32 jumpCondition = 0; - if (!BO.conditionInverted()) - { - // CR bit must be set - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if (crBit == 0) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if (crBit == 1) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if (crBit == 2) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if (crBit == 3) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - - // write the dynamic branch instruction to a new segment that is set as a branch target for the current segment PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); - - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, !BO.conditionInverted()); - - + ppcImlGenContext->emitInst().make_conditional_jump_new(regCRBit, !BO.conditionInverted()); bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, branchDestReg, 0, 0); } else @@ -1706,31 +1624,12 @@ bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } -bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_LOAD_LWARX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_LOAD_LWARX_MARKER, false, true); - return true; -} - void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm; + cemu_assert_debug(rA != 0); sint32 index = 0; while( rD <= 31 ) { @@ -1935,22 +1834,6 @@ bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenConte return true; } -bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_STORE_STWCX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_STORE_STWCX_MARKER, true); - return true; -} - bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rS, rB; @@ -1972,6 +1855,7 @@ void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); + cemu_assert_debug(rA != 0); sint32 index = 0; while( rS <= 31 ) { @@ -2063,6 +1947,86 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } +bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rD, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // calculate EA + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + // load word + ppcImlGenContext->emitInst().make_r_memory(regD, regMemResEA, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResVal, regD); + return true; +} + +bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regData = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg regTmpDataBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + // calculate EA + IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + if (regA != IMLREG_INVALID) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); + // get CR bit regs and set LT, GT and SO immediately + IMLReg regCrLT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); + IMLReg regCrGT = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); + IMLReg regCrEQ = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); + IMLReg regCrSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regXerSO = _GetCRReg(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO); + // get regs for reservation address and value + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // compare calculated EA with reservation + IMLReg regTmpBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_conditional_jump_new(regTmpBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken, EA matching */ + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpDataBE, regData); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpCompareBE, regMemResVal); + ppcImlGenContext->emitInst().make_atomic_cmp_store(regMemResEA, regTmpCompareBE, regTmpDataBE, regCrEQ); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, EA mismatching */ + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrEQ, 0); + } + ); + + // reset reservation + // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required + // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure + // In contrast, "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared + // There may also be differences between individual PPC generations + // In disassembly I have never seen more than one STWCX after each LWARX, which hints at reservation always being cleared or at least the compiler assuming this + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); + + return true; +} + bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rA, rB; @@ -2339,41 +2303,23 @@ bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg gprDestReg; if( rS == rB ) { // xor register with itself - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); } else { // rA = rS ^ rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - else - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - // rA ^= rB - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); - } + IMLReg gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg, gprSource2Reg); } + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, gprDestReg); return true; } @@ -2427,15 +2373,9 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2443,15 +2383,9 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_OR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2459,15 +2393,9 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORI does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2475,22 +2403,19 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORIS does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm); + IMLReg gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, gprDestReg, gprSourceReg, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_OR, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB); return true; } @@ -2498,7 +2423,12 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ORC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); return true; } @@ -2506,7 +2436,10 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_AND, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB); return true; } @@ -2514,7 +2447,12 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ANDC, crD, crA, crB); + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); return true; } @@ -2522,17 +2460,15 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, clear bit in crD - // PPC's assert() uses this to pass a parameter to OSPanic - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_CLEAR, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0); return true; } - else - { - return false; - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regCrB); return true; } @@ -2540,16 +2476,17 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crA); + IMLReg regCrB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + crB); + IMLReg regCrR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_CR + crD); + if (regCrA == regCrB) { - // both operands equal, set bit in crD - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_SET, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); return true; } - else - { - return false; - } + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp); return true; } @@ -2682,15 +2619,18 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 25, 5)) { case 0: - PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 1: - PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 2: - PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; default: @@ -2843,14 +2783,16 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) ) + if (!PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; case 10: // CMPLI - PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; break; case 11: // CMPI - PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, false)) + unsupportedInstructionFound = true; break; case 12: // ADDIC if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) @@ -2964,7 +2906,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 30, 10)) { case 0: - PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, false); break; case 4: PPCRecompilerImlGen_TW(ppcImlGenContext, opcode); @@ -3009,7 +2951,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 32: - PPCRecompilerImlGen_CMPL(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, true); // CMPL break; case 40: if (PPCRecompilerImlGen_SUBF(ppcImlGenContext, opcode) == false) @@ -3764,15 +3706,7 @@ void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext } // check last instruction of segment IMLInstruction* imlInstruction = segIt->GetLastInstruction(); - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - cemu_assert_debug(segIt->GetBranchTaken()); - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - cemu_assert_debug(segIt->GetBranchNotTaken()); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) { auto macroType = imlInstruction->operation; switch (macroType) @@ -3854,7 +3788,6 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; - seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; seg->imlList[0].op_macro.param = ppcInstructionCount; } @@ -3937,20 +3870,13 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction cemu_assert_debug(seg->GetBranchTaken()); cemu_assert_debug(seg->GetBranchNotTaken()); } - if (inst->type == PPCREC_IML_TYPE_CJUMP) + if (inst->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) { - if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) - { - debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", (int)segIndex); - IMLDebug_Dump(&ppcImlGenContext); - cemu_assert_error(); - } - } - else - { - // proper error checking for branch-always (or branch-never if invert bit is set) + debug_printf("---------------- SegmentDump (Missing branch for conditional jump in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); } } } @@ -3968,90 +3894,90 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction void IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext_t& ppcImlGenContext) { // optimization pass - replace segments with conditional MOVs if possible - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) - continue; // not a branching segment - IMLInstruction* lastInstruction = segIt->GetLastInstruction(); - if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) - continue; - IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; - IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; - if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) - continue; - if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) - continue; - if (conditionalSegment->list_prevSegments.size() != 1) - continue; // the reduced segment must not be the target of any other branch - if (conditionalSegment->isEnterable) - continue; - // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) - bool canReduceSegment = true; - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - continue; - // todo: Register to register copy - canReduceSegment = false; - break; - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) + // continue; // not a branching segment + // IMLInstruction* lastInstruction = segIt->GetLastInstruction(); + // if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) + // continue; + // IMLSegment* conditionalSegment = segIt->nextSegmentBranchNotTaken; + // IMLSegment* finalSegment = segIt->nextSegmentBranchTaken; + // if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) + // continue; + // if (segIt->nextSegmentBranchNotTaken->imlList.size() > 4) + // continue; + // if (conditionalSegment->list_prevSegments.size() != 1) + // continue; // the reduced segment must not be the target of any other branch + // if (conditionalSegment->isEnterable) + // continue; + // // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) + // bool canReduceSegment = true; + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // continue; + // // todo: Register to register copy + // canReduceSegment = false; + // break; + // } - if (canReduceSegment == false) - continue; + // if (canReduceSegment == false) + // continue; - // remove the branch instruction - uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; - uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; - bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - lastInstruction->make_no_op(); + // // remove the branch instruction + // uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; + // uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; + // bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; + // lastInstruction->make_no_op(); - // append conditional moves based on branch condition - for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) - { - IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); - else - assert_dbg(); - } - // update segment links - // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - IMLSegment_RemoveLink(segIt, conditionalSegment); - IMLSegment_RemoveLink(segIt, finalSegment); - IMLSegment_RemoveLink(conditionalSegment, finalSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); - // remove all instructions from conditional segment - conditionalSegment->imlList.clear(); + // // append conditional moves based on branch condition + // for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) + // { + // IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; + // if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + // PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); + // else + // assert_dbg(); + // } + // // update segment links + // // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment + // IMLSegment_RemoveLink(segIt, conditionalSegment); + // IMLSegment_RemoveLink(segIt, finalSegment); + // IMLSegment_RemoveLink(conditionalSegment, finalSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, finalSegment); + // // remove all instructions from conditional segment + // conditionalSegment->imlList.clear(); - // if possible, merge imlSegment with finalSegment - if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) - { - // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - IMLSegment_RemoveLink(segIt, finalSegment); - if (finalSegment->nextSegmentBranchNotTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); - } - if (finalSegment->nextSegmentBranchTaken) - { - IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; - IMLSegment_RemoveLink(finalSegment, tempSegment); - IMLSegment_SetLinkBranchTaken(segIt, tempSegment); - } - // copy IML instructions - cemu_assert_debug(segIt != finalSegment); - for (sint32 f = 0; f < finalSegment->imlList.size(); f++) - { - memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); - } - finalSegment->imlList.clear(); - } + // // if possible, merge imlSegment with finalSegment + // if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) + // { + // // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() + // IMLSegment_RemoveLink(segIt, finalSegment); + // if (finalSegment->nextSegmentBranchNotTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchNotTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchNotTaken(segIt, tempSegment); + // } + // if (finalSegment->nextSegmentBranchTaken) + // { + // IMLSegment* tempSegment = finalSegment->nextSegmentBranchTaken; + // IMLSegment_RemoveLink(finalSegment, tempSegment); + // IMLSegment_SetLinkBranchTaken(segIt, tempSegment); + // } + // // copy IML instructions + // cemu_assert_debug(segIt != finalSegment); + // for (sint32 f = 0; f < finalSegment->imlList.size(); f++) + // { + // memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList.data() + f, sizeof(IMLInstruction)); + // } + // finalSegment->imlList.clear(); + // } - // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) - } + // // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) + //} } bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index 95cfd176..b8986db4 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -1,14 +1,16 @@ +#include "Cafe/HW/Espresso/EspressoISA.h" #include "../Interpreter/PPCInterpreterInternal.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +IMLReg _GetCRReg(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); + void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -23,7 +25,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenCo // load from memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -39,7 +40,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory; @@ -54,7 +54,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenCo // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->operation = 0; imlInstruction->op_storeLoad.registerData = registerSource; imlInstruction->op_storeLoad.registerMem = registerMemory1; @@ -73,7 +72,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcI imlInstruction->operation = operation; imlInstruction->op_fpr_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r.registerOperand = registerOperand; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r.flags = 0; } @@ -86,7 +84,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* pp imlInstruction->op_fpr_r_r_r.registerResult = registerResult; imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1; imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r.flags = 0; } @@ -100,7 +97,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA; imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB; imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC; - imlInstruction->crRegister = crRegister; imlInstruction->op_fpr_r_r_r_r.flags = 0; } @@ -112,7 +108,6 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcIml imlInstruction->type = PPCREC_IML_TYPE_FPR_R; imlInstruction->operation = operation; imlInstruction->op_fpr_r.registerResult = registerResult; - imlInstruction->crRegister = crRegister; } /* @@ -916,12 +911,33 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 crfD, frA, frB; - PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); + printf("FCMPO: Not implemented\n"); + return false; + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + //IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + //IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + //IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + //IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + //IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + //ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo - set fpscr + + //sint32 crfD, frA, frB; + //PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); + //crfD >>= 2; + //uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); + //uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); + //PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); return true; } @@ -930,9 +946,21 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + IMLReg regFprA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); + IMLReg regFprB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); + + IMLReg crBitRegLT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetCRReg(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(regFprA, regFprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr + return true; } @@ -1837,6 +1865,9 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPO0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; uint32 c=0; frB = (opcode>>11)&0x1F; @@ -1851,6 +1882,9 @@ bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU0: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; @@ -1863,6 +1897,9 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PS_CMPU1: Not implemented\n"); + return false; + sint32 crfD, frA, frB; frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index 7b4b94fb..61be66aa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -81,7 +81,7 @@ void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenCont entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList.data() + 0); + entrySegment->imlList.data()[0].make_jump_new(); IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false;