PPCRec: Support for arbitrary function calls in the IR

Used for MFTBU/MFTBL instruction
This commit is contained in:
Exzap 2024-10-19 02:35:55 +02:00
parent 4517c209d5
commit b55785a0a0
6 changed files with 140 additions and 13 deletions

View file

@ -598,6 +598,13 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe
x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here
} }
void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
// the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
x64GenContext->emitter->CALL_q(X86_REG_RAX);
}
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{ {
auto regR = _reg32(imlInstruction->op_r_r.regR); auto regR = _reg32(imlInstruction->op_r_r.regR);
@ -1574,6 +1581,10 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
{ {
PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
} }
else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM)
{
PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
{ {
// no op // no op

View file

@ -222,6 +222,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue;
registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut;
} }
else if (type == PPCREC_IML_TYPE_CALL_IMM)
{
if (op_call_imm.regParam0.IsValid())
registersUsed->readGPR1 = op_call_imm.regParam0;
if (op_call_imm.regParam1.IsValid())
registersUsed->readGPR2 = op_call_imm.regParam1;
if (op_call_imm.regParam2.IsValid())
registersUsed->readGPR3 = op_call_imm.regParam2;
registersUsed->writtenGPR1 = op_call_imm.regReturn;
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD) else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{ {
// fpr load operation // fpr load operation
@ -631,6 +641,16 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& tr
op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable);
op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable);
} }
else if (type == PPCREC_IML_TYPE_CALL_IMM)
{
op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable);
if (op_call_imm.regParam0.IsValid())
op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable);
if (op_call_imm.regParam1.IsValid())
op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable);
if (op_call_imm.regParam2.IsValid())
op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable);
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD) else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{ {
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable);
@ -757,6 +777,10 @@ void IMLInstruction::ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegist
{ {
; ;
} }
else if (type == PPCREC_IML_TYPE_CALL_IMM)
{
// not affected
}
else if (type == PPCREC_IML_TYPE_FPR_LOAD) else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{ {
op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced);
@ -866,7 +890,11 @@ void IMLInstruction::ReplaceFPR(IMLRegID fprRegisterSearched, IMLRegID fprRegist
} }
else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{ {
; // not affected
}
else if (type == PPCREC_IML_TYPE_CALL_IMM)
{
// not affected
} }
else if (type == PPCREC_IML_TYPE_FPR_LOAD) else if (type == PPCREC_IML_TYPE_FPR_LOAD)
{ {

View file

@ -99,6 +99,7 @@ private:
}; };
static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0); static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0);
static const IMLRegID IMLRegID_INVALID(0xFFFF);
using IMLName = uint32; using IMLName = uint32;
@ -256,6 +257,9 @@ enum
// conditional (legacy) // conditional (legacy)
PPCREC_IML_TYPE_CONDITIONAL_R_S32, PPCREC_IML_TYPE_CONDITIONAL_R_S32,
// function call
PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address
// FPR // FPR
PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode)
PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode)
@ -516,6 +520,14 @@ struct IMLInstruction
sint32 immS32; sint32 immS32;
}op_storeLoad; }op_storeLoad;
struct struct
{
uintptr_t callAddress;
IMLReg regParam0;
IMLReg regParam1;
IMLReg regParam2;
IMLReg regReturn;
}op_call_imm;
struct
{ {
IMLReg regR; IMLReg regR;
IMLReg regA; IMLReg regA;
@ -775,6 +787,17 @@ struct IMLInstruction
this->op_atomic_compare_store.regBoolOut = regSuccessOutput; this->op_atomic_compare_store.regBoolOut = regSuccessOutput;
} }
void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn)
{
this->type = PPCREC_IML_TYPE_CALL_IMM;
this->operation = 0;
this->op_call_imm.callAddress = callAddress;
this->op_call_imm.regParam0 = param0;
this->op_call_imm.regParam1 = param1;
this->op_call_imm.regParam2 = param2;
this->op_call_imm.regReturn = regReturn;
}
void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond)
{ {
this->type = PPCREC_IML_TYPE_FPR_COMPARE; this->type = PPCREC_IML_TYPE_FPR_COMPARE;

View file

@ -60,8 +60,8 @@ struct IMLFixedRegisters
IMLReg reg; IMLReg reg;
IMLPhysRegisterSet physRegSet; IMLPhysRegisterSet physRegSet;
}; };
boost::container::static_vector<Entry, 4> listInput; // fixed registers for input edge boost::container::small_vector<Entry, 4> listInput; // fixed registers for instruction input edge
boost::container::static_vector<Entry, 4> listOutput; // fixed registers for output edge boost::container::small_vector<Entry, 4> listOutput; // fixed registers for instruction output edge
}; };
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
@ -86,7 +86,38 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX); ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX);
fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps);
} }
// todo - for volatile registers during call, we can emit a bunch of ranges that cover the output edge of the CALL instruction and use a special vGPR to indicate its not an actually mapped register else if(instruction->type == PPCREC_IML_TYPE_CALL_IMM)
{
// parameters (todo)
cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid());
cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid());
cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid());
// return value
if(instruction->op_call_imm.regReturn.IsValid())
{
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
cemu_assert_debug(isIntegerFormat); // float return values are still todo
IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX);
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
}
// block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call
IMLPhysRegisterSet ps;
if(!instruction->op_call_imm.regReturn.IsValid())
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10);
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11);
for(int i=0; i<=5; i++)
ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile
// for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps);
}
} }
@ -232,7 +263,7 @@ sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructi
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for(auto& fixedRegLoc : fixedRegAccess) for(auto& fixedRegLoc : fixedRegAccess)
{ {
if(fixedRegLoc.reg.GetRegID() != ourRegId) if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
{ {
cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider cemu_assert_debug(fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider
if(fixedRegLoc.physRegSet.IsAvailable(physRegister)) if(fixedRegLoc.physRegSet.IsAvailable(physRegister))
@ -487,7 +518,7 @@ std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedReg
pos = pos + 1; pos = pos + 1;
for(auto& fixedRegAccess : fixedRegs.listOutput) for(auto& fixedRegAccess : fixedRegs.listOutput)
{ {
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid()?fixedRegAccess.reg.GetRegID():IMLRegID_INVALID);
} }
index++; index++;
} }
@ -556,7 +587,8 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment
continue; continue;
boost::container::small_vector<raLivenessRange*, 8> overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); boost::container::small_vector<raLivenessRange*, 8> overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg);
cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement if(entry.regId != IMLRegID_INVALID)
cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers
for(auto& range : overlappingRanges) for(auto& range : overlappingRanges)
{ {
@ -1013,7 +1045,7 @@ void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocato
auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput;
for(auto& fixedRegLoc : fixedRegAccess) for(auto& fixedRegLoc : fixedRegAccess)
{ {
if(fixedRegLoc.reg.GetRegID() != ourRegId) if(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId)
candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet);
} }
} }
@ -1451,11 +1483,13 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs);
for(auto& fixedRegAccess : fixedRegs.listInput) for(auto& fixedRegAccess : fixedRegs.listInput)
{ {
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); if(fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet);
} }
for(auto& fixedRegAccess : fixedRegs.listOutput) for(auto& fixedRegAccess : fixedRegs.listOutput)
{ {
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); if(fixedRegAccess.reg != IMLREG_INVALID)
AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet);
} }
index++; index++;
} }

View file

@ -152,6 +152,20 @@ void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRa
void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange) void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
{ {
#ifdef CEMU_DEBUG_ASSERT
raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second;
bool hasRangeFound = false;
while(cur)
{
if(cur == subrange)
{
hasRangeFound = true;
break;
}
cur = cur->link_sameVirtualRegister.next;
}
cemu_assert_debug(hasRangeFound);
#endif
IMLRegID regId = subrange->GetVirtualRegister(); IMLRegID regId = subrange->GetVirtualRegister();
raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next;
raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev;
@ -169,6 +183,7 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLiven
} }
else else
{ {
cemu_assert_debug(root.find(regId)->second == subrange);
root.erase(regId); root.erase(regId);
} }
} }

View file

@ -6,6 +6,7 @@
#include "IML/IML.h" #include "IML/IML.h"
#include "IML/IMLRegisterAllocatorRanges.h" #include "IML/IMLRegisterAllocatorRanges.h"
#include "PPCFunctionBoundaryTracker.h" #include "PPCFunctionBoundaryTracker.h"
#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext);
@ -398,15 +399,30 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
return true; return true;
} }
ATTR_MS_ABI uint32 PPCRecompiler_GetTBL()
{
return (uint32)coreinit::coreinit_getTimerTick();
}
ATTR_MS_ABI uint32 PPCRecompiler_GetTBU()
{
return (uint32)(coreinit::coreinit_getTimerTick() >> 32);
}
bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
printf("PPCRecompilerImlGen_MFTB(): Not supported\n");
return false;
uint32 rD, spr1, spr2, spr; uint32 rD, spr1, spr2, spr;
PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2);
spr = spr1 | (spr2<<5); spr = spr1 | (spr2<<5);
if( spr == SPR_TBL || spr == SPR_TBU )
{
IMLReg resultReg = _GetRegGPR(ppcImlGenContext, rD);
ppcImlGenContext->emitInst().make_call_imm(spr == SPR_TBL ? (uintptr_t)PPCRecompiler_GetTBL : (uintptr_t)PPCRecompiler_GetTBU, IMLREG_INVALID, IMLREG_INVALID, IMLREG_INVALID, resultReg);
return true;
}
return false;
if (spr == 268 || spr == 269) if (spr == 268 || spr == 269)
{ {
// TBL / TBU // TBL / TBU