mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
PPCRec: Fixes and optimizations + rework FRES/FRSQRTE
This commit is contained in:
parent
89f8f9bd2a
commit
f94f99546d
13 changed files with 408 additions and 354 deletions
|
@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
|
|||
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
|
||||
};
|
||||
|
||||
double frsqrte_espresso(double input)
|
||||
ATTR_MS_ABI double frsqrte_espresso(double input)
|
||||
{
|
||||
unsigned long long x = *(unsigned long long*)&input;
|
||||
|
||||
|
@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
|
|||
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
|
||||
};
|
||||
|
||||
double fres_espresso(double input)
|
||||
ATTR_MS_ABI double fres_espresso(double input)
|
||||
{
|
||||
// based on testing we know that fres uses only the first 15 bits of the mantissa
|
||||
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
|
||||
|
|
|
@ -191,8 +191,8 @@ inline double roundTo25BitAccuracy(double d)
|
|||
return *(double*)&v;
|
||||
}
|
||||
|
||||
double fres_espresso(double input);
|
||||
double frsqrte_espresso(double input);
|
||||
ATTR_MS_ABI double fres_espresso(double input);
|
||||
ATTR_MS_ABI double frsqrte_espresso(double input);
|
||||
|
||||
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
|
||||
|
||||
|
|
|
@ -601,8 +601,10 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe
|
|||
void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
{
|
||||
// the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
|
||||
x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x28); // reserve enough space for any parameters while keeping stack alignment of 16 intact
|
||||
x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
|
||||
x64GenContext->emitter->CALL_q(X86_REG_RAX);
|
||||
x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x28);
|
||||
}
|
||||
|
||||
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
|
|
|
@ -780,18 +780,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
|||
// move to FPR register
|
||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP )
|
||||
{
|
||||
// move register to XMM15
|
||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
||||
|
||||
// call assembly routine to calculate accurate FRES result in XMM15
|
||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
|
||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
|
||||
|
||||
// copy result to bottom and top half of result register
|
||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
|
||||
{
|
||||
// move register to XMM15
|
||||
|
|
|
@ -363,7 +363,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ||
|
||||
operation == PPCREC_IML_OP_ASSIGN ||
|
||||
operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ||
|
||||
operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_ABS_PAIR ||
|
||||
operation == PPCREC_IML_OP_FPR_FRES_PAIR ||
|
||||
|
|
|
@ -143,7 +143,6 @@ enum
|
|||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
|
||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
|
||||
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
|
||||
|
|
|
@ -10,9 +10,16 @@
|
|||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
|
||||
#define DEBUG_RA_INSTRUCTION_GEN 0
|
||||
|
||||
|
||||
struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
|
||||
{
|
||||
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
|
||||
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
|
||||
: regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {};
|
||||
|
||||
void TrackInstruction(sint32 index)
|
||||
{
|
||||
|
@ -34,7 +41,6 @@ struct IMLRegisterAllocatorContext
|
|||
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary
|
||||
// first pass
|
||||
std::vector<std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>> perSegmentAbstractRanges;
|
||||
// second pass
|
||||
|
||||
// helper methods
|
||||
inline std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>& GetSegmentAbstractRangeMap(IMLSegment* imlSegment)
|
||||
|
@ -48,38 +54,117 @@ struct IMLRegisterAllocatorContext
|
|||
cemu_assert_debug(it != regIdToBaseFormat.cend());
|
||||
return it->second;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct IMLFixedRegisters
|
||||
{
|
||||
struct Entry
|
||||
{
|
||||
Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) : reg(reg), physRegSet(physRegSet) {}
|
||||
Entry(IMLReg reg, IMLPhysRegisterSet physRegSet)
|
||||
: reg(reg), physRegSet(physRegSet) {}
|
||||
|
||||
IMLReg reg;
|
||||
IMLPhysRegisterSet physRegSet;
|
||||
};
|
||||
boost::container::small_vector<Entry, 4> listInput; // fixed registers for instruction input edge
|
||||
boost::container::small_vector<Entry, 4> listOutput; // fixed registers for instruction output edge
|
||||
boost::container::small_vector<Entry, 4> listInput; // fixed register requirements for instruction input edge
|
||||
boost::container::small_vector<Entry, 4> listOutput; // fixed register requirements for instruction output edge
|
||||
};
|
||||
|
||||
static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters)
|
||||
{
|
||||
sint32 numIntParams = 0, numFloatParams = 0;
|
||||
|
||||
auto AddParameterMapping = [&](IMLReg reg) {
|
||||
if (!reg.IsValid())
|
||||
return;
|
||||
if (reg.GetBaseFormat() == IMLRegFormat::I64)
|
||||
{
|
||||
IMLPhysRegisterSet ps;
|
||||
ps.SetAvailable(intParamToPhysReg[numIntParams]);
|
||||
fixedRegs.listInput.emplace_back(reg, ps);
|
||||
numIntParams++;
|
||||
}
|
||||
else if (reg.GetBaseFormat() == IMLRegFormat::F64)
|
||||
{
|
||||
IMLPhysRegisterSet ps;
|
||||
ps.SetAvailable(floatParamToPhysReg[numFloatParams]);
|
||||
fixedRegs.listInput.emplace_back(reg, ps);
|
||||
numFloatParams++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
};
|
||||
AddParameterMapping(instruction->op_call_imm.regParam0);
|
||||
AddParameterMapping(instruction->op_call_imm.regParam1);
|
||||
AddParameterMapping(instruction->op_call_imm.regParam2);
|
||||
// return value
|
||||
if (instruction->op_call_imm.regReturn.IsValid())
|
||||
{
|
||||
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
|
||||
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
|
||||
IMLPhysRegisterSet ps;
|
||||
if (isIntegerFormat)
|
||||
{
|
||||
ps.SetAvailable(intReturnPhysReg);
|
||||
volatileRegisters.SetReserved(intReturnPhysReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
ps.SetAvailable(floatReturnPhysReg);
|
||||
volatileRegisters.SetReserved(floatReturnPhysReg);
|
||||
}
|
||||
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
|
||||
}
|
||||
// block volatile registers from being used on the output edge, this makes the register allocator store them during the call
|
||||
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
// aarch64
|
||||
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
|
||||
{
|
||||
fixedRegs.listInput.clear();
|
||||
fixedRegs.listOutput.clear();
|
||||
|
||||
// code below for aarch64 has not been tested
|
||||
// The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it
|
||||
// on x86 this is used for instructions like SHL <reg>, CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention
|
||||
cemu_assert_unimplemented();
|
||||
#ifdef 0
|
||||
if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2};
|
||||
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2};
|
||||
IMLPhysRegisterSet volatileRegs;
|
||||
for (int i=0; i<19; i++) // x0 to x18 are volatile
|
||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i);
|
||||
for (int i = 0; i <= 31; i++) // which float registers are volatile?
|
||||
volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i);
|
||||
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// x86-64
|
||||
static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs)
|
||||
{
|
||||
fixedRegs.listInput.clear();
|
||||
fixedRegs.listOutput.clear();
|
||||
|
||||
// x86 specific logic is hardcoded for now
|
||||
if (instruction->type == PPCREC_IML_TYPE_R_R_R)
|
||||
{
|
||||
if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
|
||||
{
|
||||
// todo: We can skip this if g_CPUFeatures.x86.bmi2 is set, but for now we just assume it's not so we can properly test increased register pressure
|
||||
if(!g_CPUFeatures.x86.bmi2)
|
||||
{
|
||||
IMLPhysRegisterSet ps;
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX);
|
||||
fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
|
||||
{
|
||||
IMLPhysRegisterSet ps;
|
||||
|
@ -88,38 +173,24 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
|
|||
}
|
||||
else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
|
||||
{
|
||||
// parameters (todo)
|
||||
cemu_assert_debug(!instruction->op_call_imm.regParam0.IsValid());
|
||||
cemu_assert_debug(!instruction->op_call_imm.regParam1.IsValid());
|
||||
cemu_assert_debug(!instruction->op_call_imm.regParam2.IsValid());
|
||||
// return value
|
||||
if(instruction->op_call_imm.regReturn.IsValid())
|
||||
{
|
||||
IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat();
|
||||
bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8;
|
||||
cemu_assert_debug(isIntegerFormat); // float return values are still todo
|
||||
IMLPhysRegisterSet ps;
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_EAX);
|
||||
fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps);
|
||||
}
|
||||
// block volatile registers from being used on the output edge, this makes the RegAlloc store them during the call
|
||||
IMLPhysRegisterSet ps;
|
||||
if(!instruction->op_call_imm.regReturn.IsValid())
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RAX);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RCX);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_RDX);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R8);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R9);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R10);
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE+X86_REG_R11);
|
||||
const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8};
|
||||
const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2};
|
||||
IMLPhysRegisterSet volatileRegs;
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10);
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11);
|
||||
// YMM0-YMM5 are volatile
|
||||
for (int i = 0; i <= 5; i++)
|
||||
ps.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE+i); // YMM0-YMM5 are volatile
|
||||
volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i);
|
||||
// for YMM6-YMM15 only the upper 128 bits are volatile which we dont use
|
||||
fixedRegs.listOutput.emplace_back(IMLREG_INVALID, ps);
|
||||
SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uint32 PPCRecRA_getNextIterationIndex()
|
||||
{
|
||||
|
@ -315,32 +386,6 @@ struct IMLRALivenessTimeline
|
|||
activeRanges.emplace_back(subrange);
|
||||
}
|
||||
|
||||
// remove all ranges from activeRanges with end <= instructionIndex
|
||||
void ExpireRanges(sint32 instructionIndex)
|
||||
{
|
||||
__debugbreak(); // maybe replace calls with raInstructionEdge variant?
|
||||
expiredRanges.clear();
|
||||
size_t count = activeRanges.size();
|
||||
for (size_t f = 0; f < count; f++)
|
||||
{
|
||||
raLivenessRange* liverange = activeRanges[f];
|
||||
if (liverange->interval2.end.GetInstructionIndex() < instructionIndex) // <= to < since end is now inclusive
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (instructionIndex != RA_INTER_RANGE_END && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken))
|
||||
assert_dbg(); // infinite subranges should not expire
|
||||
#endif
|
||||
expiredRanges.emplace_back(liverange);
|
||||
// remove entry
|
||||
activeRanges[f] = activeRanges[count-1];
|
||||
f--;
|
||||
count--;
|
||||
}
|
||||
}
|
||||
if(count != activeRanges.size())
|
||||
activeRanges.resize(count);
|
||||
}
|
||||
|
||||
void ExpireRanges(raInstructionEdge expireUpTo)
|
||||
{
|
||||
expiredRanges.clear();
|
||||
|
@ -425,7 +470,10 @@ void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLP
|
|||
}
|
||||
}
|
||||
|
||||
bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->interval2.start < rhs->interval2.start; }
|
||||
bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs)
|
||||
{
|
||||
return lhs->interval2.start < rhs->interval2.start;
|
||||
}
|
||||
|
||||
void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
|
||||
{
|
||||
|
@ -460,7 +508,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
|
|||
subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
|
||||
}
|
||||
// validate list
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
#if DEBUG_RA_EXTRA_VALIDATION
|
||||
sint32 count2 = 0;
|
||||
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
|
||||
raInstructionEdge currentStartPosition;
|
||||
|
@ -544,7 +592,7 @@ boost::container::small_vector<raLivenessRange*, 8> IMLRA_GetRangeWithFixedRegRe
|
|||
void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
||||
{
|
||||
// first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border
|
||||
// todo - this can be optimized. Ranges only need to be split if there are conflicts with other segments. Note that below passes rely on the fact that this pass currently splits all ranges with fixed register requirements
|
||||
// todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This isn't always necessary
|
||||
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;)
|
||||
{
|
||||
IMLPhysRegisterSet allowedRegs;
|
||||
|
@ -608,7 +656,6 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment
|
|||
PPCRecRA_splitLocalSubrange2(ppcImlGenContext, range, entry.pos, true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
// finally iterate ranges and assign fixed registers
|
||||
|
@ -626,7 +673,7 @@ void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment
|
|||
currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg());
|
||||
}
|
||||
// DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
#if DEBUG_RA_EXTRA_VALIDATION
|
||||
for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next)
|
||||
{
|
||||
IMLPhysRegisterSet allowedRegs;
|
||||
|
@ -964,7 +1011,6 @@ private:
|
|||
} explodeRange;
|
||||
};
|
||||
|
||||
|
||||
class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
|
||||
{
|
||||
public:
|
||||
|
@ -1108,15 +1154,13 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
|
|||
raInstructionEdge currentRangeStart = subrangeItr->interval2.start; // used to be currentIndex before refactor
|
||||
PPCRecRA_debugValidateSubrange(subrangeItr);
|
||||
|
||||
// below used to be: std::min<sint32>(currentIndex, RA_INTER_RANGE_END-1)
|
||||
livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges
|
||||
// note: The logic here is complicated in regards to whether the instruction index should be inclusive or exclusive. Find a way to simplify?
|
||||
|
||||
// if subrange already has register assigned then add it to the active list and continue
|
||||
if (subrangeItr->GetPhysicalRegister() >= 0)
|
||||
{
|
||||
// verify if register is actually available
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
#if DEBUG_RA_EXTRA_VALIDATION
|
||||
for (auto& liverangeItr : livenessTimeline.activeRanges)
|
||||
{
|
||||
// check for register mismatch
|
||||
|
@ -1176,8 +1220,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
|
|||
// cant assign register
|
||||
// there might be registers available, we just can't use them due to range conflicts
|
||||
RASpillStrategy* selectedStrategy = nullptr;
|
||||
auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy)
|
||||
{
|
||||
auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) {
|
||||
if (newStrategy.GetCost() == INT_MAX)
|
||||
return;
|
||||
if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost())
|
||||
|
@ -1366,9 +1409,7 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx
|
|||
}
|
||||
abstractRange->isProcessed = true;
|
||||
// create subrange
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
|
||||
#endif
|
||||
cemu_assert_debug(
|
||||
(abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) ||
|
||||
abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger
|
||||
|
@ -1414,16 +1455,6 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx
|
|||
PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
|
||||
}
|
||||
}
|
||||
// for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction
|
||||
// this is due to range load instructions being inserted before the suffix instruction
|
||||
// todo - currently later steps might break this assumption, look into this
|
||||
// if (subrange->interval2.ExtendsIntoNextSegment())
|
||||
// {
|
||||
// if (imlSegment->HasSuffixInstruction())
|
||||
// {
|
||||
// cemu_assert_debug(subrange->interval2.start.GetInstructionIndex() <= imlSegment->GetSuffixInstructionIndex());
|
||||
// }
|
||||
// }
|
||||
return subrange;
|
||||
}
|
||||
|
||||
|
@ -1432,8 +1463,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
|
|||
{
|
||||
const std::unordered_map<IMLRegID, raLivenessRange*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
|
||||
|
||||
auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet)
|
||||
{
|
||||
auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) {
|
||||
raLivenessRange* subrange = regToSubrange.find(regId)->second;
|
||||
cemu_assert_debug(subrange);
|
||||
raFixedRegRequirement tmp;
|
||||
|
@ -1624,8 +1654,12 @@ void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSeg
|
|||
list_segments.reserve(segmentCount + 1);
|
||||
list_processedSegment.resize(segmentCount);
|
||||
|
||||
auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {list_processedSegment[seg->momentaryIndex] = true; };
|
||||
auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { return list_processedSegment[seg->momentaryIndex]; };
|
||||
auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) {
|
||||
list_processedSegment[seg->momentaryIndex] = true;
|
||||
};
|
||||
auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool {
|
||||
return list_processedSegment[seg->momentaryIndex];
|
||||
};
|
||||
markSegProcessed(imlSegment);
|
||||
|
||||
sint32 index = 0;
|
||||
|
@ -1730,10 +1764,8 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
|
|||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
|
||||
struct subrangeEndingInfo_t
|
||||
{
|
||||
//boost::container::small_vector<raLivenessSubrange_t*, 32> subrangeList2;
|
||||
raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
|
||||
sint32 subrangeCount;
|
||||
|
||||
|
@ -1870,8 +1902,6 @@ inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
|
|||
return IMLReg(baseFormat, baseFormat, 0, regId);
|
||||
}
|
||||
|
||||
#define DEBUG_RA_INSTRUCTION_GEN 0
|
||||
|
||||
// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts
|
||||
void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
|
||||
{
|
||||
|
@ -2036,7 +2066,6 @@ void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IM
|
|||
// range expires
|
||||
// we cant erase it from virtId2PhysReg right away because a store might happen before the last use (the +1 thing above)
|
||||
|
||||
|
||||
// todo - check hasStore
|
||||
raLivenessRange* storedRange = *it;
|
||||
if (storedRange->hasStore)
|
||||
|
@ -2095,8 +2124,34 @@ void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx)
|
|||
}
|
||||
}
|
||||
|
||||
void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
|
||||
static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment)
|
||||
{
|
||||
#if DEBUG_RA_EXTRA_VALIDATION
|
||||
std::vector<raFixedRegRequirementWithVGPR> frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment);
|
||||
for(auto& fixedReq : frr)
|
||||
{
|
||||
for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next)
|
||||
{
|
||||
if (!range->interval2.ContainsEdge(fixedReq.pos))
|
||||
continue;
|
||||
// verify if the requirement is compatible
|
||||
if(range->GetVirtualRegister() == fixedReq.regId)
|
||||
{
|
||||
cemu_assert(range->HasPhysicalRegister());
|
||||
cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
|
||||
{
|
||||
#if DEBUG_RA_EXTRA_VALIDATION
|
||||
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s];
|
||||
|
@ -2107,6 +2162,12 @@ void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx)
|
|||
subrangeItr = subrangeItr->link_allSegmentRanges.next;
|
||||
}
|
||||
}
|
||||
// check that no range validates register requirements
|
||||
for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam)
|
||||
|
@ -2121,7 +2182,7 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext
|
|||
IMLRA_CalculateLivenessRanges(ctx);
|
||||
IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx);
|
||||
IMLRA_AssignRegisters(ctx, ppcImlGenContext);
|
||||
DbgVerifyAllRanges(ctx); // DEBUG
|
||||
DbgVerifyAllRanges(ctx);
|
||||
IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext);
|
||||
IMLRA_GenerateMoveInstructions(ctx);
|
||||
|
||||
|
|
|
@ -17,9 +17,19 @@ public:
|
|||
m_regBitmask &= ~((uint64)1 << index);
|
||||
}
|
||||
|
||||
void SetAllAvailable()
|
||||
{
|
||||
m_regBitmask = ~0ull;
|
||||
}
|
||||
|
||||
bool HasAllAvailable() const
|
||||
{
|
||||
return m_regBitmask == ~0ull;
|
||||
}
|
||||
|
||||
bool IsAvailable(uint32 index) const
|
||||
{
|
||||
return (m_regBitmask & (1 << index)) != 0;
|
||||
return (m_regBitmask & ((uint64)1 << index)) != 0;
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
|
||||
|
|
|
@ -67,38 +67,30 @@ boost::container::small_vector<raLivenessRange*, 128> raLivenessRange::GetAllSub
|
|||
return subranges;
|
||||
}
|
||||
|
||||
void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs)
|
||||
{
|
||||
range->lastIterationIndex = iterationIndex;
|
||||
for (auto& it : range->list_fixedRegRequirements)
|
||||
allowedRegs &= it.allowedReg;
|
||||
// check successors
|
||||
if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs);
|
||||
if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs);
|
||||
// check predecessors
|
||||
for (auto& prev : range->previousRanges)
|
||||
{
|
||||
if (prev->lastIterationIndex != iterationIndex)
|
||||
GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs);
|
||||
}
|
||||
};
|
||||
|
||||
bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
|
||||
{
|
||||
if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment())
|
||||
{
|
||||
auto clusterRanges = GetAllSubrangesInCluster();
|
||||
bool hasAnyRequirement = false;
|
||||
for(auto& subrange : clusterRanges)
|
||||
{
|
||||
if(subrange->list_fixedRegRequirements.empty())
|
||||
continue;
|
||||
allowedRegisters = subrange->list_fixedRegRequirements.front().allowedReg;
|
||||
hasAnyRequirement = true;
|
||||
break;
|
||||
}
|
||||
if(!hasAnyRequirement)
|
||||
return false;
|
||||
for(auto& subrange : clusterRanges)
|
||||
{
|
||||
for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
|
||||
allowedRegisters &= fixedRegLoc.allowedReg;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// local check only, slightly faster
|
||||
if(list_fixedRegRequirements.empty())
|
||||
return false;
|
||||
allowedRegisters = list_fixedRegRequirements.front().allowedReg;
|
||||
for(auto& fixedRegLoc : list_fixedRegRequirements)
|
||||
allowedRegisters &= fixedRegLoc.allowedReg;
|
||||
}
|
||||
return true;
|
||||
uint32 iterationIndex = PPCRecRA_getNextIterationIndex();
|
||||
allowedRegisters.SetAllAvailable();
|
||||
GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters);
|
||||
return !allowedRegisters.HasAllAvailable();
|
||||
}
|
||||
|
||||
IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
|
||||
|
@ -424,6 +416,14 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
|
|||
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx());
|
||||
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx());
|
||||
}
|
||||
// validate fixed reg requirements
|
||||
if (!range->list_fixedRegRequirements.empty())
|
||||
{
|
||||
cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval2.start);
|
||||
cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval2.end);
|
||||
for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
|
||||
cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
@ -563,7 +563,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||
{
|
||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||
if (tailInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
|
||||
if (tailInterval.ContainsEdge(fixedReg->pos))
|
||||
{
|
||||
tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
|
||||
}
|
||||
|
@ -572,7 +572,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
|||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||
{
|
||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||
if (!headInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
|
||||
if (!headInterval.ContainsEdge(fixedReg->pos))
|
||||
{
|
||||
subrange->list_fixedRegRequirements.resize(i);
|
||||
break;
|
||||
|
|
|
@ -335,6 +335,9 @@ struct raLivenessRange
|
|||
void SetPhysicalRegister(sint32 physicalRegister);
|
||||
void SetPhysicalRegisterForCluster(sint32 physicalRegister);
|
||||
void UnsetPhysicalRegister() { physicalRegister = -1; }
|
||||
|
||||
private:
|
||||
void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs);
|
||||
};
|
||||
|
||||
raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
|
||||
|
|
|
@ -181,9 +181,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
}
|
||||
}
|
||||
|
||||
// if(range.startAddress < 0x0202fa3C || range.startAddress > 0x0202FA7C)
|
||||
// return nullptr; // DEBUG
|
||||
|
||||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
|
@ -340,15 +337,6 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
|||
//PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
||||
//PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
||||
|
||||
|
||||
// if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8 && ppcImlGenContext.debug_entryPPCAddress < 0x0240C0AC)
|
||||
// {
|
||||
// IMLDebug_Dump(&ppcImlGenContext);
|
||||
// __debugbreak();
|
||||
// }
|
||||
// else if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8)
|
||||
// return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1513,7 +1513,7 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
|
||||
else
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
|
||||
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31);
|
||||
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regMemResEA, regMemResEA, ~31);
|
||||
// zero out the cacheline
|
||||
for(sint32 i = 0; i < 32; i += 4)
|
||||
ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
#include "PPCRecompilerIml.h"
|
||||
#include "Cafe/GameProfile/GameProfile.h"
|
||||
|
||||
ATTR_MS_ABI double frsqrte_espresso(double input);
|
||||
ATTR_MS_ABI double fres_espresso(double input);
|
||||
|
||||
IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit);
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||
|
@ -1007,9 +1010,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
// load registers
|
||||
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterB);
|
||||
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD);
|
||||
// adjust accuracy
|
||||
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
||||
// copy result to top
|
||||
if( ppcImlGenContext->PSE )
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1026,9 +1032,7 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
}
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD);
|
||||
if( ppcImlGenContext->PSE )
|
||||
{
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1075,7 +1079,7 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op
|
|||
// hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr);
|
||||
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, fprRegisterD, fprRegisterB);
|
||||
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD);
|
||||
// adjust accuracy
|
||||
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
||||
return true;
|
||||
|
|
Loading…
Add table
Reference in a new issue