From a51a8bb7d54d188f10df31285f9549a6fc05340a Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 17 Dec 2022 21:06:27 +0100 Subject: [PATCH] PPCRec: New compare and cond jump instrs, update RA Storing the condition result in a register instead of imitating PPC CR lets us simplify the backend a lot. Only implemented as PoC for BDZ/BDNZ so far. --- .../Recompiler/BackendX64/BackendX64.cpp | 251 +++++++++++------- .../Recompiler/BackendX64/BackendX64.h | 87 +++--- .../Recompiler/BackendX64/BackendX64FPU.cpp | 104 ++++---- .../Recompiler/BackendX64/BackendX64Gen.cpp | 32 +-- src/Cafe/HW/Espresso/Recompiler/IML/IML.h | 2 +- .../Espresso/Recompiler/IML/IMLAnalyzer.cpp | 6 +- .../HW/Espresso/Recompiler/IML/IMLDebug.cpp | 56 +++- .../Recompiler/IML/IMLInstruction.cpp | 48 +++- .../Espresso/Recompiler/IML/IMLInstruction.h | 115 ++++++-- .../Espresso/Recompiler/IML/IMLOptimizer.cpp | 4 +- .../Recompiler/IML/IMLRegisterAllocator.cpp | 62 +++-- .../HW/Espresso/Recompiler/IML/IMLSegment.cpp | 6 + .../HW/Espresso/Recompiler/IML/IMLSegment.h | 1 + .../HW/Espresso/Recompiler/PPCRecompiler.cpp | 14 + .../HW/Espresso/Recompiler/PPCRecompilerIml.h | 2 +- .../Recompiler/PPCRecompilerImlGen.cpp | 104 ++++---- .../Recompiler/PPCRecompilerImlGenFPU.cpp | 12 +- src/config/LaunchSettings.cpp | 19 +- src/config/LaunchSettings.h | 7 + 19 files changed, 609 insertions(+), 323 deletions(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 428d2f0d..08847839 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -8,13 +8,44 @@ #include "util/MemMapper/MemMapper.h" #include "Common/cpu_features.h" +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(sint8 physRegId) +{ + return (x86Assembler64::GPR8_REX)physRegId; +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + + +X86Cond _x86Cond(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_Z; + case IMLCondition::NEQ: + return X86_CONDITION_NZ; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + /* * Remember current instruction output offset for reloc * The instruction generated after this method has been called will be adjusted */ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr) { - x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, extraInfo); + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo); } /* @@ -37,7 +68,7 @@ void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunctio void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) { - uint8* instructionData = x64GenContext->codeBuffer + jumpInstructionOffset; + uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset; if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F)) { // far conditional jump @@ -241,7 +272,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_R13, (uint64)memory_base); // check if cycles where decreased beyond zero, if yes -> leave recompiler x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); //x64Gen_int3(x64GenContext); //x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress); @@ -254,7 +285,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, //// JMP [recompilerCallTable+EAX/4*8] //x64Gen_int3(x64GenContext); x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // check if instruction pointer was changed // assign new instruction pointer to EAX x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, X86_REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); @@ -537,7 +568,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // realRegisterMem now holds EA x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); // EA matches reservation // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) @@ -569,7 +600,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->emitter->GetWriteIndex()); } else return false; @@ -690,16 +721,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else { x64Gen_test_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerA, imlInstruction->op_r_r.registerA); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32-1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 32); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } } else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) @@ -909,21 +940,21 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } x64Gen_add_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) - { - // registerResult -= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) - { - // do nothing -> SUB is for BDNZ instruction - } - else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // update cr register - assert_dbg(); - } - x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); - } + //else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) + //{ + // // registerResult -= immS32 + // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + // if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) + // { + // // do nothing -> SUB is for BDNZ instruction + // } + // else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) + // { + // // update cr register + // assert_dbg(); + // } + // x64Gen_sub_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); + //} else if( imlInstruction->operation == PPCREC_IML_OP_AND ) { // registerResult &= immS32 @@ -1349,11 +1380,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA); x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 @@ -1361,11 +1392,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // do subtraction if( rRegOperandB == rRegOperandA ) { @@ -1419,7 +1450,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, for (sint32 b = 0; b < 6; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b)); - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set if (b == 5) { @@ -1432,7 +1463,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); } - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } @@ -1475,10 +1506,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, for(sint32 b=0; b<5; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } @@ -1512,15 +1543,13 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); - sint32 jumpInstructionJumpToSignedShift = x64GenContext->codeBufferIndex; + sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - //sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_EQUAL, 0); // unsigned shift (MSB of input register is not set) for(sint32 b=0; b<6; b++) { x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set if( b == 5 ) { @@ -1531,24 +1560,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } - sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; + sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0); // signed shift - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex()); for(sint32 b=0; b<6; b++) { // check if we need to shift by (1<codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set // set ca if any non-zero bit is shifted out x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<codeBufferIndex; + sint32 jumpInstructionJumpToAfterCa = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex()); // arithmetic shift if( b == 5 ) { @@ -1560,10 +1589,10 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); } // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); // update CR if requested // todo @@ -1693,22 +1722,67 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regR = _reg8(imlInstruction->op_compare.registerResult); + auto regA = _reg32(imlInstruction->op_compare.registerOperandA); + auto regB = _reg32(imlInstruction->op_compare.registerOperandB); + X86Cond cond = _x86Cond(imlInstruction->op_compare.cond); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_dd(regA, regB); + x64GenContext->emitter->SETcc_b(cond, regR); + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regR = _reg8(imlInstruction->op_compare_s32.registerResult); + auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA); + sint32 imm = imlInstruction->op_compare_s32.immS32; + X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond); + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc + x64GenContext->emitter->CMP_di32(regA, imm); + x64GenContext->emitter->SETcc_b(cond, regR); + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool); + bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue; + x64GenContext->emitter->TEST_bb(regBool, regBool); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { + sint32 regResult = imlInstruction->op_r_r_s32.registerResult; + sint32 regOperand = imlInstruction->op_r_r_s32.registerA; + uint32 immS32 = imlInstruction->op_r_r_s32.immS32; + if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { - // registerResult = registerOperand + immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); + if(regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + x64Gen_add_reg64Low32_imm32(x64GenContext, regResult, (uint32)immU32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SUB) + { + cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + if (regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) { @@ -1733,9 +1807,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction assert_dbg(); } sint32 crRegister = imlInstruction->crRegister; - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); } } @@ -1761,11 +1832,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_not_reg64Low32(x64GenContext, rRegOperand); x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand); x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // OR ((~a+b+1)<1) == true -> ca = 1 x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 @@ -1773,11 +1844,11 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); // reset carry flag + jump destination afterwards x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // do actual computation of value, note: a - b is equivalent to a + ~b + 1 x64Gen_not_reg64Low32(x64GenContext, rRegResult); x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1); @@ -1839,12 +1910,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction // SAR registerResult, SH x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); // JNS (if sign not set) - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here // MOV BYTE [ESP+xer_ca], 0 x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); // jump destination - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); // CR update if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) { @@ -2147,9 +2218,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // generate iml instruction code @@ -2157,7 +2225,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { x64GenContext.currentSegment = segIt; - segIt->x64Offset = x64GenContext.codeBufferIndex; + segIt->x64Offset = x64GenContext.emitter->GetWriteIndex(); for(size_t i=0; iimlList.size(); i++) { IMLInstruction* imlInstruction = segIt->imlList.data() + i; @@ -2198,9 +2266,24 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) { - if( PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) + if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + { + codeGenerationFailed = true; + } + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + { + PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) { codeGenerationFailed = true; } @@ -2324,11 +2407,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // handle failed code generation if( codeGenerationFailed ) { - free(x64GenContext.codeBuffer); return false; } // allocate executable memory - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); size_t baseAddress = (size_t)executableMemory; // fix relocs for(auto& relocIt : x64GenContext.relocateOffsetTable2) @@ -2341,7 +2423,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo x64Offset = destSegment->x64Offset; uint32 relocBase = relocIt.offset; - uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; + uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase; if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { // Jcc relativeImm32 @@ -2374,21 +2456,17 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); - x64GenContext.codeBuffer = nullptr; + std::span codeBuffer = x64GenContext.emitter->GetBuffer(); + memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes()); // set code PPCRecFunction->x86Code = executableMemory; - PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; + PPCRecFunction->x86Size = codeBuffer.size_bytes(); return true; } void PPCRecompilerX64Gen_generateEnterRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // start of recompiler entry function @@ -2419,7 +2497,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() x64Gen_writeU8(&x64GenContext, 0x83); x64Gen_writeU8(&x64GenContext, 0x04); x64Gen_writeU8(&x64GenContext, 0x24); - uint32 jmpPatchOffset = x64GenContext.codeBufferIndex; + uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex(); x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP); @@ -2434,7 +2512,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() //JMP recFunc x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1 - x64GenContext.codeBuffer[jmpPatchOffset] = (x64GenContext.codeBufferIndex-(jmpPatchOffset-4)); + x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4)); //recompilerExit1: x64Gen_pop_reg64(&x64GenContext, X86_REG_R15); @@ -2455,10 +2533,9 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() // RET x64Gen_ret(&x64GenContext); - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory; } @@ -2466,9 +2543,6 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.codeBufferSize = 128; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // update instruction pointer @@ -2481,10 +2555,9 @@ void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() // RET x64Gen_ret(&x64GenContext); - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); return executableMemory; } diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 4cbf01a9..eefd9da3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -1,6 +1,8 @@ #include "../PPCRecompiler.h" // todo - get rid of dependency +#include "x86Emitter.h" + struct x64RelocEntry_t { x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {}; @@ -12,10 +14,18 @@ struct x64RelocEntry_t struct x64GenContext_t { IMLSegment* currentSegment{}; + x86Assembler64* emitter; + + x64GenContext_t() + { + emitter = new x86Assembler64(); + } + + ~x64GenContext_t() + { + delete emitter; + } - uint8* codeBuffer{}; - sint32 codeBufferIndex{}; - sint32 codeBufferSize{}; // cr state sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) @@ -24,41 +34,41 @@ struct x64GenContext_t }; // todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually -#define X86_REG_EAX 0 -#define X86_REG_ECX 1 -#define X86_REG_EDX 2 -#define X86_REG_EBX 3 -#define X86_REG_ESP 4 // reserved for low half of hCPU pointer -#define X86_REG_EBP 5 -#define X86_REG_ESI 6 -#define X86_REG_EDI 7 -#define X86_REG_NONE -1 - -#define X86_REG_RAX 0 -#define X86_REG_RCX 1 -#define X86_REG_RDX 2 -#define X86_REG_RBX 3 -#define X86_REG_RSP 4 // reserved for hCPU pointer -#define X86_REG_RBP 5 -#define X86_REG_RSI 6 -#define X86_REG_RDI 7 -#define X86_REG_R8 8 -#define X86_REG_R9 9 -#define X86_REG_R10 10 -#define X86_REG_R11 11 -#define X86_REG_R12 12 -#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -#define X86_REG_R14 14 // reserved as temporary register -#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData - -#define X86_REG_AL 0 -#define X86_REG_CL 1 -#define X86_REG_DL 2 -#define X86_REG_BL 3 -#define X86_REG_AH 4 -#define X86_REG_CH 5 -#define X86_REG_DH 6 -#define X86_REG_BH 7 +//#define X86_REG_EAX 0 +//#define X86_REG_ECX 1 +//#define X86_REG_EDX 2 +//#define X86_REG_EBX 3 +//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer +//#define X86_REG_EBP 5 +//#define X86_REG_ESI 6 +//#define X86_REG_EDI 7 +//#define X86_REG_NONE -1 +// +//#define X86_REG_RAX 0 +//#define X86_REG_RCX 1 +//#define X86_REG_RDX 2 +//#define X86_REG_RBX 3 +//#define X86_REG_RSP 4 // reserved for hCPU pointer +//#define X86_REG_RBP 5 +//#define X86_REG_RSI 6 +//#define X86_REG_RDI 7 +//#define X86_REG_R8 8 +//#define X86_REG_R9 9 +//#define X86_REG_R10 10 +//#define X86_REG_R11 11 +//#define X86_REG_R12 12 +//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) +//#define X86_REG_R14 14 // reserved as temporary register +//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData +// +//#define X86_REG_AL 0 +//#define X86_REG_CL 1 +//#define X86_REG_DL 2 +//#define X86_REG_BL 3 +//#define X86_REG_AH 4 -> Adressable via non-REX only +//#define X86_REG_CH 5 +//#define X86_REG_DH 6 +//#define X86_REG_BH 7 // reserved registers #define REG_RESV_TEMP (X86_REG_R14) @@ -72,6 +82,7 @@ struct x64GenContext_t #define reg32ToReg16(__x) (__x) +// deprecated condition flags enum { X86_CONDITION_EQUAL, // or zero diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index ed2fb7d9..c7e11d42 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -217,16 +217,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // jump cases x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); // default case -> float @@ -237,31 +237,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen uint32 jumpOffset_endOfS8; PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; + jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); } // load from memory @@ -495,16 +495,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext } // max(i, -clampMin) x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0); x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); // min(i, clampMax) x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0); x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); // endian swap if( bitWriteSize == 16) x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); @@ -528,16 +528,16 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // jump cases x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; + sint32 jumpOffset_caseS16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); // default case -> float @@ -548,31 +548,31 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe uint32 jumpOffset_endOfS8; PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; + jumpOffset_endOfFloat = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; + jumpOffset_endOfU16 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; + jumpOffset_endOfS8 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmp_imm32(x64GenContext, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->emitter->GetWriteIndex()); PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->emitter->GetWriteIndex()); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->emitter->GetWriteIndex()); } // store to memory @@ -873,18 +873,18 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction sint32 crRegister = imlInstruction->crRegister; // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) { @@ -1102,50 +1102,50 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); } else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // select bottom x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_bottom = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C bottom x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2_bottom = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2_bottom = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B bottom - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->emitter->GetWriteIndex()); x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->emitter->GetWriteIndex()); // select top x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); // copy top to bottom (todo: May cause stall?) x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_top = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset1_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); // select C top //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC, 2); - sint32 jumpInstructionOffset2_top = x64GenContext->codeBufferIndex; + sint32 jumpInstructionOffset2_top = x64GenContext->emitter->GetWriteIndex(); x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); // select B top - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->emitter->GetWriteIndex()); //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB, 2); // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->codeBufferIndex); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->emitter->GetWriteIndex()); } else assert_dbg(); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 1094693a..bc5f5f6c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -7,46 +7,22 @@ void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { - if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex++; + x64GenContext->emitter->_emitU8(v); } void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 2; + x64GenContext->emitter->_emitU16(v); } void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 4; + x64GenContext->emitter->_emitU32(v); } void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v) { - if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 8; + x64GenContext->emitter->_emitU64(v); } #include "x64Emit.hpp" diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 3dcd50b6..650946f3 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -26,4 +26,4 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* // debug void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); -void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index 160554d6..fae49541 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -55,7 +55,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) } /* -* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister) +* Returns true if the instruction can overwrite CR (depending on value of ->crRegister) */ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) { @@ -63,6 +63,10 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) return true; + if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + return true; // ?? + if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + return true; // ?? if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 008c2fad..72f706d9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -121,6 +121,20 @@ std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) return ""; } +std::string IMLDebug_GetConditionName(IMLCondition cond) +{ + switch (cond) + { + case IMLCondition::EQ: + return "EQ"; + case IMLCondition::NEQ: + return "NEQ"; + default: + cemu_assert_unimplemented(); + } + return "ukn"; +} + void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); @@ -143,9 +157,12 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool if (printLivenessRangeInfo) { + strOutput.reset(); IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + debug_printf("%s\n", strOutput.c_str()); } //debug_printf("\n"); + strOutput.reset(); sint32 lineOffsetParameters = 18; @@ -207,6 +224,37 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_COMPARE) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerOperandB); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.registerResult, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerOperandA); + strOutput.addFmt("{}", inst.op_compare_s32.immS32); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.registerResult, true); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + strOutput.add("CJUMP2 "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditionalJump2.registerBool, true); + if(!inst.op_conditionalJump2.mustBeTrue) + strOutput.add("(inverted)"); + } else if (inst.type == PPCREC_IML_TYPE_R_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); @@ -369,7 +417,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool else strOutput.add("U"); strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem, inst.op_storeLoad.immS32); - strOutput.addFmt("= fpr_t{} mode {}\n", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); + strOutput.addFmt(" = fpr_t{} mode {}", inst.op_storeLoad.registerData, inst.op_storeLoad.mode); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { @@ -388,7 +436,7 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK\n"); + strOutput.addFmt("CYCLE_CHECK"); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { @@ -460,11 +508,11 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool debug_printf("\n"); } -void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) +void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo) { for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false); + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo); debug_printf("\n"); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index f471c827..2084d168 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -141,6 +141,21 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else cemu_assert_unimplemented(); } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + registersUsed->readNamedReg1 = op_compare.registerOperandA; + registersUsed->readNamedReg2 = op_compare.registerOperandB; + registersUsed->writtenNamedReg1 = op_compare.registerResult; + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + registersUsed->readNamedReg1 = op_compare_s32.registerOperandA; + registersUsed->writtenNamedReg1 = op_compare_s32.registerResult; + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + registersUsed->readNamedReg1 = op_conditionalJump2.registerBool; + } else if (type == PPCREC_IML_TYPE_LOAD) { registersUsed->writtenNamedReg1 = op_storeLoad.registerData; @@ -455,17 +470,30 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (type == PPCREC_IML_TYPE_R_R_S32) { - // in all cases result is written and other operand is read only op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); } else if (type == PPCREC_IML_TYPE_R_R_R) { - // in all cases result is written and other operands are read only op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); + op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced); + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers @@ -627,13 +655,17 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + // not affected + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_NO_OP) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_MACRO) { @@ -737,13 +769,17 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + // not affected + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_NO_OP) { - // no effect on registers + // not affected } else if (type == PPCREC_IML_TYPE_MACRO) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 89f14af4..a9245baa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -4,8 +4,6 @@ enum { PPCREC_IML_OP_ASSIGN, // '=' operator PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_ADD, // '+' operator - PPCREC_IML_OP_SUB, // '-' operator PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) @@ -85,8 +83,19 @@ enum // PS PPCREC_IML_OP_FPR_SUM0, PPCREC_IML_OP_FPR_SUM1, -}; + + + // working towards defining ops per-form + // R_R_R only + + // R_R_S32 only + + // R_R_R + R_R_S32 + PPCREC_IML_OP_ADD, + PPCREC_IML_OP_SUB, + +}; #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) enum @@ -118,6 +127,19 @@ enum }; +enum class IMLCondition : uint8 +{ + EQ, + NEQ, + SIGNED_GT, + SIGNED_LT, + UNSIGNED_GT, + UNSIGNED_LT, + + SIGNED_OVERFLOW, + SIGNED_NOVERFLOW, +}; + enum { PPCREC_CR_MODE_COMPARE_SIGNED, @@ -131,7 +153,7 @@ enum { PPCREC_IML_TYPE_NONE, PPCREC_IML_TYPE_NO_OP, // no-op instruction - PPCREC_IML_TYPE_R_R, // r* (op) *r + PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r) PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] @@ -145,6 +167,12 @@ enum PPCREC_IML_TYPE_CJUMP, // conditional jump PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) + + // new style of handling conditions and branches: + PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* + PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register + // conditional PPCREC_IML_TYPE_CONDITIONAL_R_S32, // FPR @@ -294,11 +322,6 @@ struct IMLInstruction sint32 immS32; }op_r_immS32; struct - { - uint32 address; - uint8 flags; - }op_jumpmark; - struct { uint32 param; uint32 param2; @@ -310,7 +333,7 @@ struct IMLInstruction uint8 crRegisterIndex; uint8 crBitIndex; bool bitMustBeSet; - }op_conditionalJump; + }op_conditionalJump; // legacy jump struct { uint8 registerData; @@ -353,16 +376,30 @@ struct IMLInstruction uint8 registerResult; }op_fpr_r; struct - { - uint32 ppcAddress; - uint32 x64Offset; - }op_ppcEnter; - struct { uint8 crD; // crBitIndex (result) uint8 crA; // crBitIndex uint8 crB; // crBitIndex }op_cr; + struct + { + uint8 registerResult; // stores the boolean result of the comparison + uint8 registerOperandA; + uint8 registerOperandB; + IMLCondition cond; + }op_compare; + struct + { + uint8 registerResult; // stores the boolean result of the comparison + uint8 registerOperandA; + sint32 immS32; + IMLCondition cond; + }op_compare_s32; + struct + { + uint8 registerBool; + bool mustBeTrue; + }op_conditionalJump2; // conditional operations (emitted if supported by target platform) struct { @@ -385,7 +422,8 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP || - type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || + type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) return true; return false; } @@ -432,6 +470,17 @@ struct IMLInstruction this->op_r_r.registerA = registerA; } + + void make_r_s32(uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint32 crMode = 0) + { + this->type = PPCREC_IML_TYPE_R_S32; + this->operation = operation; + this->crRegister = crRegister; + this->crMode = crMode; + this->op_r_immS32.registerIndex = registerIndex; + this->op_r_immS32.immS32 = immS32; + } + void make_r_r_r(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) { // operation with three register operands (e.g. "t0 = t1 + t4") @@ -456,6 +505,40 @@ struct IMLInstruction this->op_r_r_s32.immS32 = immS32; } + void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_compare.registerResult = registerResult; + this->op_compare.registerOperandA = registerA; + this->op_compare.registerOperandB = registerB; + this->op_compare.cond = cond; + } + + void make_compare_s32(uint8 registerA, sint32 immS32, uint8 registerResult, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE_S32; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_compare_s32.registerResult = registerResult; + this->op_compare_s32.registerOperandA = registerA; + this->op_compare_s32.immS32 = immS32; + this->op_compare_s32.cond = cond; + } + + void make_conditional_jump_new(uint8 registerBool, bool mustBeTrue) + { + this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + this->op_conditionalJump2.registerBool = registerBool; + this->op_conditionalJump2.mustBeTrue = mustBeTrue; + } + // load from memory void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index ae3c6c79..2cbcb0c1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -835,9 +835,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp } if (foundMatch) { - // insert expand instruction - IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, i); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); + PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 1b720d26..8c7c807d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -218,10 +218,20 @@ typedef struct sint32 liveRangesCount; }raLiveRangeInfo_t; +bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) +{ + if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) + return true; + if ((rangeA->start.index == RA_INTER_RANGE_START && rangeA->start.index == rangeB->start.index)) + return true; + if (rangeA->end.index == RA_INTER_RANGE_END && rangeA->end.index == rangeB->end.index) + return true; + return false; +} + // mark occupied registers by any overlapping range as unavailable in physRegSet void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) { - //uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; for (auto& subrange : range->list_subranges) { IMLSegment* imlSegment = subrange->imlSegment; @@ -235,9 +245,10 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IML continue; } - if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || - (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || - (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) + //if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || + // (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || + // (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) + if(IsRangeOverlapping(subrange, subrangeItr)) { if (subrangeItr->range->physicalRegister >= 0) physRegSet.SetReserved(subrangeItr->range->physicalRegister); @@ -272,19 +283,6 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) } // sort std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare); - //for (sint32 i1 = 0; i1 < count; i1++) - //{ - // for (sint32 i2 = i1+1; i2 < count; i2++) - // { - // if (subrangeList[i1]->start.index > subrangeList[i2]->start.index) - // { - // // swap - // raLivenessSubrange_t* temp = subrangeList[i1]; - // subrangeList[i1] = subrangeList[i2]; - // subrangeList[i2] = temp; - // } - // } - //} // reassemble linked list subrangeList[count] = nullptr; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; @@ -478,6 +476,7 @@ bool PPCRecRA_assignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGen } } // evaluate strategy: Split current range to fit in available holes + // todo - are checks required to avoid splitting on the suffix instruction? spillStrategies.availableRegisterHole.cost = INT_MAX; spillStrategies.availableRegisterHole.distance = -1; spillStrategies.availableRegisterHole.physRegister = -1; @@ -770,6 +769,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, liveInfo.liveRangesCount = 0; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; + //sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) @@ -806,7 +806,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - // store GPR + // store GPR if required + // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (liverange->hasStore) { PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); @@ -827,6 +828,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; liveInfo.liveRangesCount++; // load GPR + // similar to stores, any loads for the next segment need to happen before the suffix instruction + // however, starting 17-12-2022 ranges that exit the segment at the end but do not cover the suffix instruction are illegal (e.g. RA_INTER_RANGE_END to RA_INTER_RANGE_END subrange) + // the limitation that name loads (for the follow-up segments) need to happen before the suffix instruction require that the range also reflects this, otherwise the RA would erroneously assume registers to be available during the suffix instruction + if (imlSegment->HasSuffixInstruction()) + { + cemu_assert_debug(subrangeItr->start.index <= imlSegment->GetSuffixInstructionIndex()); + } if (subrangeItr->_noLoad == false) { PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlList.size() - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); @@ -839,7 +847,8 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } - // replace registers + // rewrite registers + // todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table if (index < imlSegment->imlList.size()) { IMLUsedRegisters gprTracking; @@ -1004,7 +1013,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_assignRegisters(ctx, ppcImlGenContext); PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); @@ -1095,6 +1103,15 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); } } + // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction + // this is due to range load instructions being inserted before the suffix instruction + if (subrange->end.index == RA_INTER_RANGE_END) + { + if (imlSegment->HasSuffixInstruction()) + { + cemu_assert_debug(subrange->start.index <= imlSegment->GetSuffixInstructionIndex()); + } + } return subrange; } @@ -1155,7 +1172,10 @@ void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IM { if (_isRangeDefined(imlSegment, vGPR) == false) { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; + if(imlSegment->HasSuffixInstruction()) + imlSegment->raDistances.reg[vGPR].usageStart = imlSegment->GetSuffixInstructionIndex(); + else + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; return; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index 2b2c56a2..f3b6834f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -16,6 +16,12 @@ bool IMLSegment::HasSuffixInstruction() const return imlInstruction.IsSuffixInstruction(); } +sint32 IMLSegment::GetSuffixInstructionIndex() const +{ + cemu_assert_debug(HasSuffixInstruction()); + return (sint32)(imlList.size() - 1); +} + IMLInstruction* IMLSegment::GetLastInstruction() { if (imlList.empty()) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 7ea7903b..add7098e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -135,6 +135,7 @@ struct IMLSegment IMLInstruction* AppendInstruction(); bool HasSuffixInstruction() const; + sint32 GetSuffixInstructionIndex() const; IMLInstruction* GetLastInstruction(); // segment points diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index b4fc62d0..ed8bee87 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -167,6 +167,20 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + + uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr(); + uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr(); + + if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0) + { + + if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr) + { + delete ppcRecFunc; + return nullptr; + } + } + // apply passes if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 4843fd1c..0a5bd8ab 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -28,7 +28,7 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe // IML instruction generation void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); +void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode); void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 40844bb2..c5ec96c8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -58,15 +58,15 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) { - // operation with two register operands (e.g. "t0 = t1") - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r.registerResult = registerResult; - imlInstruction->op_r_r.registerA = registerA; + if (imlInstruction) + __debugbreak(); // not supported + + ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode); +} + +void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode) +{ + ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode); } void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) @@ -89,20 +89,6 @@ void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcIm imlInstruction->op_r_name.name = name; } -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode) -{ - // two variations: - // operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register) - // operation with store (e.g. "'r3' = 123") - IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_immS32.registerIndex = registerIndex; - imlInstruction->op_r_immS32.immS32 = immS32; -} - void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { if(imlInstruction == NULL) @@ -292,6 +278,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenCo return registerIndex; } +// get throw-away register. Only valid for the scope of a single translated instruction +// be careful to not collide with manually loaded temporary register +uint32 PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext_t* ppcImlGenContext, uint32 temporaryIndex) +{ + return PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + temporaryIndex); +} + /* * Loads a PPC fpr into any of the available IML FPU registers * If loadNew is false, it will check first if the fpr is already loaded into any IML register @@ -407,7 +400,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -417,7 +410,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crMask; PPC_OPC_TEMPL_XFX(opcode, rS, crMask); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); return true; } @@ -453,7 +446,7 @@ void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_SIGNED); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, cr, PPCREC_CR_MODE_COMPARE_SIGNED); } void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -466,7 +459,7 @@ void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 b = imm; // load gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) @@ -628,11 +621,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition == false ) return false; // not supported for the moment uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); - if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); - else - PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + uint32 tmpBoolReg = PPCRecompilerImlGen_grabTemporaryS8Register(ppcImlGenContext, 1); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); + ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); + ppcImlGenContext->emitInst().make_conditional_jump_new(tmpBoolReg, true); return true; } else @@ -709,7 +701,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco branchDestReg = tmpRegister; } uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); } if (!BO.decrementerIgnore()) @@ -901,7 +893,7 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // rA not used, instruction is value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -924,7 +916,7 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // rA not used, instruction turns into simple value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; @@ -1170,15 +1162,15 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc if( registerRA != registerRS ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS); if( SH != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, PPC_REC_INVALID_REGISTER, 0); if(opcode&PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); } else { if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); } return true; } @@ -1213,12 +1205,12 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); if (opcode & PPC_OPC_RC) { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, PPCREC_CR_MODE_LOGICAL); } else { if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, PPC_REC_INVALID_REGISTER, 0); } return true; } @@ -1438,7 +1430,7 @@ void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1482,7 +1474,7 @@ void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1527,7 +1519,7 @@ void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1571,7 +1563,7 @@ void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // load memory gpr into register uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // check if destination register is already loaded uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); if( destinationRegister == PPC_REC_INVALID_REGISTER ) @@ -1880,12 +1872,12 @@ void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1924,12 +1916,12 @@ void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store word PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1968,12 +1960,12 @@ void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister // add imm to memory register early if possible if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // store byte PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); // add imm to memory register late if we couldn't do it early if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX) @@ -2481,7 +2473,7 @@ void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); } void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2496,7 +2488,7 @@ void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, PPCREC_CR_MODE_LOGICAL); } bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2623,7 +2615,7 @@ void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2639,7 +2631,7 @@ void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2655,7 +2647,7 @@ void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2671,7 +2663,7 @@ void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco if( gprDestReg != gprSourceReg ) PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index fddc5293..da53ea55 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -163,7 +163,7 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); if( ppcImlGenContext->LSQE ) @@ -258,7 +258,7 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); // emit load iml @@ -326,7 +326,7 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -412,7 +412,7 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); @@ -1114,7 +1114,7 @@ bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired load @@ -1165,7 +1165,7 @@ bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 op // get memory gpr register index uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); // get fpr register index uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); // paired store diff --git a/src/config/LaunchSettings.cpp b/src/config/LaunchSettings.cpp index 32a069c6..d68eb412 100644 --- a/src/config/LaunchSettings.cpp +++ b/src/config/LaunchSettings.cpp @@ -13,6 +13,7 @@ #include "util/crypto/aes128.h" #include "Cafe/Filesystem/FST/FST.h" +#include "util/helpers/StringHelpers.h" void requireConsole(); @@ -74,7 +75,9 @@ bool LaunchSettings::HandleCommandline(const std::vector& args) po::options_description hidden{ "Hidden options" }; hidden.add_options() ("nsight", po::value()->implicit_value(true), "NSight debugging options") - ("legacy", po::value()->implicit_value(true), "Intel legacy graphic mode"); + ("legacy", po::value()->implicit_value(true), "Intel legacy graphic mode") + ("ppcrec-lower-addr", po::value(), "For debugging: Lower address allowed for PPC recompilation") + ("ppcrec-upper-addr", po::value(), "For debugging: Upper address allowed for PPC recompilation"); po::options_description extractor{ "Extractor tool" }; extractor.add_options() @@ -186,6 +189,20 @@ bool LaunchSettings::HandleCommandline(const std::vector& args) if (vm.count("output")) log_path = vm["output"].as(); + // recompiler range limit for debugging + if (vm.count("ppcrec-lower-addr")) + { + uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-lower-addr"].as()); + ppcRec_limitLowerAddr = addr; + } + if (vm.count("ppcrec-upper-addr")) + { + uint32 addr = (uint32)StringHelpers::ToInt64(vm["ppcrec-upper-addr"].as()); + ppcRec_limitUpperAddr = addr; + } + if(ppcRec_limitLowerAddr != 0 && ppcRec_limitUpperAddr != 0) + cemuLog_log(LogType::Force, "PPCRec range limited to 0x{:08x}-0x{:08x}", ppcRec_limitLowerAddr, ppcRec_limitUpperAddr); + if(!extract_path.empty()) { ExtractorTool(extract_path, output_path, log_path); diff --git a/src/config/LaunchSettings.h b/src/config/LaunchSettings.h index b0f673a1..074fbb91 100644 --- a/src/config/LaunchSettings.h +++ b/src/config/LaunchSettings.h @@ -29,6 +29,9 @@ public: static std::optional GetPersistentId() { return s_persistent_id; } + static uint32 GetPPCRecLowerAddr() { return ppcRec_limitLowerAddr; }; + static uint32 GetPPCRecUpperAddr() { return ppcRec_limitUpperAddr; }; + private: inline static std::optional s_load_game_file{}; inline static std::optional s_load_title_id{}; @@ -44,6 +47,10 @@ private: inline static std::optional s_persistent_id{}; + // for recompiler debugging + inline static uint32 ppcRec_limitLowerAddr{}; + inline static uint32 ppcRec_limitUpperAddr{}; + static bool ExtractorTool(std::wstring_view wud_path, std::string_view output_path, std::wstring_view log_path); };