diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index 93fedf3d..37ee6f05 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -598,6 +598,13 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here } +void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here + x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress); + x64GenContext->emitter->CALL_q(X86_REG_RAX); +} + bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { auto regR = _reg32(imlInstruction->op_r_r.regR); @@ -1574,6 +1581,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo { PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); } + else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) { // no op diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index 53841baf..4b56ff94 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -222,6 +222,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + if (op_call_imm.regParam0.IsValid()) + registersUsed->readGPR1 = op_call_imm.regParam0; + if (op_call_imm.regParam1.IsValid()) + registersUsed->readGPR2 = op_call_imm.regParam1; + if (op_call_imm.regParam2.IsValid()) + registersUsed->readGPR3 = op_call_imm.regParam2; + registersUsed->writtenGPR1 = op_call_imm.regReturn; + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { // fpr load operation @@ -631,6 +641,16 @@ void IMLInstruction::RewriteGPR(const std::unordered_map& tr op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable); + if (op_call_imm.regParam0.IsValid()) + op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable); + if (op_call_imm.regParam1.IsValid()) + op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable); + if (op_call_imm.regParam2.IsValid()) + op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable); + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); @@ -757,6 +777,10 @@ void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegist { ; } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + // not affected + } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); @@ -866,7 +890,11 @@ void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegist } else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) { - ; + // not affected + } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + // not affected } else if (type == PPCREC_IML_TYPE_FPR_LOAD) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 7594bc9f..e7c58e8e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -99,6 +99,7 @@ private: }; static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0); +static const IMLRegID IMLRegID_INVALID(0xFFFF); using IMLName = uint32; @@ -256,6 +257,9 @@ enum // conditional (legacy) PPCREC_IML_TYPE_CONDITIONAL_R_S32, + // function call + PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address + // FPR PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) @@ -516,6 +520,14 @@ struct IMLInstruction sint32 immS32; }op_storeLoad; struct + { + uintptr_t callAddress; + IMLReg regParam0; + IMLReg regParam1; + IMLReg regParam2; + IMLReg regReturn; + }op_call_imm; + struct { IMLReg regR; IMLReg regA; @@ -775,6 +787,17 @@ struct IMLInstruction this->op_atomic_compare_store.regBoolOut = regSuccessOutput; } + void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn) + { + this->type = PPCREC_IML_TYPE_CALL_IMM; + this->operation = 0; + this->op_call_imm.callAddress = callAddress; + this->op_call_imm.regParam0 = param0; + this->op_call_imm.regParam1 = param1; + this->op_call_imm.regParam2 = param2; + this->op_call_imm.regReturn = regReturn; + } + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) { this->type = PPCREC_IML_TYPE_FPR_COMPARE; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 9b9ce15f..048b316d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -60,8 +60,8 @@ struct IMLFixedRegisters IMLReg reg; IMLPhysRegisterSet physRegSet; }; - boost::container::static_vector listInput; // fixed registers for input edge - boost::container::static_vector listOutput; // fixed registers for output edge + boost::container::small_vector listInput; // fixed registers for instruction input edge + boost::container::small_vector listOutput; // fixed registers for instruction output edge }; static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) @@ -86,7 +86,38 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); } - // todo - for volatile registers during call, we can emit a bunch of ranges that cover the output edge of the CALL instruction and use a special vGPR to indicate its not an actually mapped register + else if(instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + // parameters (todo) + cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid()); + cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid()); + cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid()); + // return value + if(instruction->op_call_imm.regReturn.IsValid()) + { + IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat(); + bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8; + cemu_assert_debug(isIntegerFormat); // float return values are still todo + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); + fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps); + } + // block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call + IMLPhysRegisterSet ps; + if(!instruction->op_call_imm.regReturn.IsValid()) + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10); + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11); + for(int i=0; i<=5; i++) + ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile + // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use + fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps); + } + } @@ -232,7 +263,7 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; for(auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.GetRegID() != ourRegId) + if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) { cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) @@ -487,7 +518,7 @@ std::vector IMLRA_BuildSegmentInstructionFixedReg pos = pos + 1; for(auto& fixedRegAccess : fixedRegs.listOutput) { - frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid()?fixedRegAccess.reg.GetRegID():IMLRegID_INVALID); } index++; } @@ -556,7 +587,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment continue; boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); - cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement + if(entry.regId != IMLRegID_INVALID) + cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers for(auto& range : overlappingRanges) { @@ -1013,7 +1045,7 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; for(auto& fixedRegLoc : fixedRegAccess) { - if(fixedRegLoc.reg.GetRegID() != ourRegId) + if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); } } @@ -1451,11 +1483,13 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); for(auto& fixedRegAccess : fixedRegs.listInput) { - AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); + if(fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); } for(auto& fixedRegAccess : fixedRegs.listOutput) { - AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); + if(fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); } index++; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp index 2c041ee3..270a133a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -152,6 +152,20 @@ void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRa void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) { +#ifdef CEMU_DEBUG_ASSERT + raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second; + bool hasRangeFound = false; + while(cur) + { + if(cur == subrange) + { + hasRangeFound = true; + break; + } + cur = cur->link_sameVirtualRegister.next; + } + cemu_assert_debug(hasRangeFound); +#endif IMLRegID regId = subrange->GetVirtualRegister(); raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; @@ -169,6 +183,7 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_mapsecond == subrange); root.erase(regId); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b637b594..ed3cfa1e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -6,6 +6,7 @@ #include "IML/IML.h" #include "IML/IMLRegisterAllocatorRanges.h" #include "PPCFunctionBoundaryTracker.h" +#include "Cafe/OS/libs/coreinit/coreinit_Time.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); @@ -398,15 +399,30 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return true; } +ATTR_MS_ABI uint32 PPCRecompiler_GetTBL() +{ + return (uint32)coreinit::coreinit_getTimerTick(); +} + +ATTR_MS_ABI uint32 PPCRecompiler_GetTBU() +{ + return (uint32)(coreinit::coreinit_getTimerTick() >> 32); +} + bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - printf("PPCRecompilerImlGen_MFTB(): Not supported\n"); - return false; - uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); + if( spr == SPR_TBL || spr == SPR_TBU ) + { + IMLReg resultReg = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_call_imm(spr == SPR_TBL ? (uintptr_t)PPCRecompiler_GetTBL : (uintptr_t)PPCRecompiler_GetTBU, IMLREG_INVALID, IMLREG_INVALID, IMLREG_INVALID, resultReg); + return true; + } + return false; + if (spr == 268 || spr == 269) { // TBL / TBU