mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
PPCRec: Fixes and optimizations + rework FRES/FRSQRTE
This commit is contained in:
parent
89f8f9bd2a
commit
f94f99546d
13 changed files with 408 additions and 354 deletions
|
@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] =
|
||||||
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
|
{0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6},
|
||||||
};
|
};
|
||||||
|
|
||||||
double frsqrte_espresso(double input)
|
ATTR_MS_ABI double frsqrte_espresso(double input)
|
||||||
{
|
{
|
||||||
unsigned long long x = *(unsigned long long*)&input;
|
unsigned long long x = *(unsigned long long*)&input;
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] =
|
||||||
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
|
{0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106}
|
||||||
};
|
};
|
||||||
|
|
||||||
double fres_espresso(double input)
|
ATTR_MS_ABI double fres_espresso(double input)
|
||||||
{
|
{
|
||||||
// based on testing we know that fres uses only the first 15 bits of the mantissa
|
// based on testing we know that fres uses only the first 15 bits of the mantissa
|
||||||
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
|
// seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used)
|
||||||
|
|
|
@ -191,8 +191,8 @@ inline double roundTo25BitAccuracy(double d)
|
||||||
return *(double*)&v;
|
return *(double*)&v;
|
||||||
}
|
}
|
||||||
|
|
||||||
double fres_espresso(double input);
|
ATTR_MS_ABI double fres_espresso(double input);
|
||||||
double frsqrte_espresso(double input);
|
ATTR_MS_ABI double frsqrte_espresso(double input);
|
||||||
|
|
||||||
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
|
void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b);
|
||||||
|
|
||||||
|
|
|
@ -601,8 +601,10 @@ void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRe
|
||||||
void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||||
{
|
{
|
||||||
// the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
|
// the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here
|
||||||
|
x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x28); // reserve enough space for any parameters while keeping stack alignment of 16 intact
|
||||||
x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
|
x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress);
|
||||||
x64GenContext->emitter->CALL_q(X86_REG_RAX);
|
x64GenContext->emitter->CALL_q(X86_REG_RAX);
|
||||||
|
x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x28);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||||
|
|
|
@ -780,18 +780,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
||||||
// move to FPR register
|
// move to FPR register
|
||||||
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP);
|
||||||
}
|
}
|
||||||
else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP )
|
|
||||||
{
|
|
||||||
// move register to XMM15
|
|
||||||
x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA);
|
|
||||||
|
|
||||||
// call assembly routine to calculate accurate FRES result in XMM15
|
|
||||||
x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres);
|
|
||||||
x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP);
|
|
||||||
|
|
||||||
// copy result to bottom and top half of result register
|
|
||||||
x64Gen_movddup_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP);
|
|
||||||
}
|
|
||||||
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
|
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT)
|
||||||
{
|
{
|
||||||
// move register to XMM15
|
// move register to XMM15
|
||||||
|
|
|
@ -363,7 +363,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
||||||
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ||
|
operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ||
|
||||||
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ||
|
operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ||
|
||||||
operation == PPCREC_IML_OP_ASSIGN ||
|
operation == PPCREC_IML_OP_ASSIGN ||
|
||||||
operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ||
|
|
||||||
operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ||
|
operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ||
|
||||||
operation == PPCREC_IML_OP_FPR_ABS_PAIR ||
|
operation == PPCREC_IML_OP_FPR_ABS_PAIR ||
|
||||||
operation == PPCREC_IML_OP_FPR_FRES_PAIR ||
|
operation == PPCREC_IML_OP_FPR_FRES_PAIR ||
|
||||||
|
|
|
@ -143,7 +143,6 @@ enum
|
||||||
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched
|
||||||
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED,
|
||||||
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half
|
||||||
PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half
|
|
||||||
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
|
PPCREC_IML_OP_FPR_FCMPO_BOTTOM, // deprecated
|
||||||
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
|
PPCREC_IML_OP_FPR_FCMPU_BOTTOM, // deprecated
|
||||||
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
|
PPCREC_IML_OP_FPR_FCMPU_TOP, // deprecated
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -17,9 +17,19 @@ public:
|
||||||
m_regBitmask &= ~((uint64)1 << index);
|
m_regBitmask &= ~((uint64)1 << index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetAllAvailable()
|
||||||
|
{
|
||||||
|
m_regBitmask = ~0ull;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasAllAvailable() const
|
||||||
|
{
|
||||||
|
return m_regBitmask == ~0ull;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsAvailable(uint32 index) const
|
bool IsAvailable(uint32 index) const
|
||||||
{
|
{
|
||||||
return (m_regBitmask & (1 << index)) != 0;
|
return (m_regBitmask & ((uint64)1 << index)) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
|
IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other)
|
||||||
|
|
|
@ -67,38 +67,30 @@ boost::container::small_vector<raLivenessRange*, 128> raLivenessRange::GetAllSub
|
||||||
return subranges;
|
return subranges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs)
|
||||||
|
{
|
||||||
|
range->lastIterationIndex = iterationIndex;
|
||||||
|
for (auto& it : range->list_fixedRegRequirements)
|
||||||
|
allowedRegs &= it.allowedReg;
|
||||||
|
// check successors
|
||||||
|
if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex)
|
||||||
|
GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs);
|
||||||
|
if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
|
||||||
|
GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs);
|
||||||
|
// check predecessors
|
||||||
|
for (auto& prev : range->previousRanges)
|
||||||
|
{
|
||||||
|
if (prev->lastIterationIndex != iterationIndex)
|
||||||
|
GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
|
bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
|
||||||
{
|
{
|
||||||
if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment())
|
uint32 iterationIndex = PPCRecRA_getNextIterationIndex();
|
||||||
{
|
allowedRegisters.SetAllAvailable();
|
||||||
auto clusterRanges = GetAllSubrangesInCluster();
|
GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters);
|
||||||
bool hasAnyRequirement = false;
|
return !allowedRegisters.HasAllAvailable();
|
||||||
for(auto& subrange : clusterRanges)
|
|
||||||
{
|
|
||||||
if(subrange->list_fixedRegRequirements.empty())
|
|
||||||
continue;
|
|
||||||
allowedRegisters = subrange->list_fixedRegRequirements.front().allowedReg;
|
|
||||||
hasAnyRequirement = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if(!hasAnyRequirement)
|
|
||||||
return false;
|
|
||||||
for(auto& subrange : clusterRanges)
|
|
||||||
{
|
|
||||||
for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
|
|
||||||
allowedRegisters &= fixedRegLoc.allowedReg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// local check only, slightly faster
|
|
||||||
if(list_fixedRegRequirements.empty())
|
|
||||||
return false;
|
|
||||||
allowedRegisters = list_fixedRegRequirements.front().allowedReg;
|
|
||||||
for(auto& fixedRegLoc : list_fixedRegRequirements)
|
|
||||||
allowedRegisters &= fixedRegLoc.allowedReg;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
|
IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
|
||||||
|
@ -424,6 +416,14 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
|
||||||
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx());
|
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx());
|
||||||
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx());
|
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx());
|
||||||
}
|
}
|
||||||
|
// validate fixed reg requirements
|
||||||
|
if (!range->list_fixedRegRequirements.empty())
|
||||||
|
{
|
||||||
|
cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval2.start);
|
||||||
|
cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval2.end);
|
||||||
|
for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
|
||||||
|
cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -563,7 +563,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
||||||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||||
{
|
{
|
||||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||||
if (tailInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
|
if (tailInterval.ContainsEdge(fixedReg->pos))
|
||||||
{
|
{
|
||||||
tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
|
tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
|
||||||
}
|
}
|
||||||
|
@ -572,7 +572,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
|
||||||
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
|
||||||
{
|
{
|
||||||
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
|
||||||
if (!headInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
|
if (!headInterval.ContainsEdge(fixedReg->pos))
|
||||||
{
|
{
|
||||||
subrange->list_fixedRegRequirements.resize(i);
|
subrange->list_fixedRegRequirements.resize(i);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -335,6 +335,9 @@ struct raLivenessRange
|
||||||
void SetPhysicalRegister(sint32 physicalRegister);
|
void SetPhysicalRegister(sint32 physicalRegister);
|
||||||
void SetPhysicalRegisterForCluster(sint32 physicalRegister);
|
void SetPhysicalRegisterForCluster(sint32 physicalRegister);
|
||||||
void UnsetPhysicalRegister() { physicalRegister = -1; }
|
void UnsetPhysicalRegister() { physicalRegister = -1; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs);
|
||||||
};
|
};
|
||||||
|
|
||||||
raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
|
raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
|
||||||
|
|
|
@ -181,9 +181,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if(range.startAddress < 0x0202fa3C || range.startAddress > 0x0202FA7C)
|
|
||||||
// return nullptr; // DEBUG
|
|
||||||
|
|
||||||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||||
ppcRecFunc->ppcAddress = range.startAddress;
|
ppcRecFunc->ppcAddress = range.startAddress;
|
||||||
ppcRecFunc->ppcSize = range.length;
|
ppcRecFunc->ppcSize = range.length;
|
||||||
|
@ -340,15 +337,6 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
||||||
//PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
//PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
||||||
//PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
//PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
||||||
|
|
||||||
|
|
||||||
// if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8 && ppcImlGenContext.debug_entryPPCAddress < 0x0240C0AC)
|
|
||||||
// {
|
|
||||||
// IMLDebug_Dump(&ppcImlGenContext);
|
|
||||||
// __debugbreak();
|
|
||||||
// }
|
|
||||||
// else if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8)
|
|
||||||
// return false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1513,7 +1513,7 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||||
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
|
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
|
||||||
else
|
else
|
||||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
|
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
|
||||||
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31);
|
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regMemResEA, regMemResEA, ~31);
|
||||||
// zero out the cacheline
|
// zero out the cacheline
|
||||||
for(sint32 i = 0; i < 32; i += 4)
|
for(sint32 i = 0; i < 32; i += 4)
|
||||||
ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
|
ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
|
||||||
|
|
|
@ -4,6 +4,9 @@
|
||||||
#include "PPCRecompilerIml.h"
|
#include "PPCRecompilerIml.h"
|
||||||
#include "Cafe/GameProfile/GameProfile.h"
|
#include "Cafe/GameProfile/GameProfile.h"
|
||||||
|
|
||||||
|
ATTR_MS_ABI double frsqrte_espresso(double input);
|
||||||
|
ATTR_MS_ABI double fres_espresso(double input);
|
||||||
|
|
||||||
IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit);
|
IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit);
|
||||||
|
|
||||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian, IMLReg registerGQR = IMLREG_INVALID)
|
||||||
|
@ -1007,9 +1010,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||||
// load registers
|
// load registers
|
||||||
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||||
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
||||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterB);
|
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD);
|
||||||
// adjust accuracy
|
// adjust accuracy
|
||||||
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
||||||
|
// copy result to top
|
||||||
|
if( ppcImlGenContext->PSE )
|
||||||
|
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1026,9 +1032,7 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||||
}
|
}
|
||||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD);
|
PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD);
|
||||||
if( ppcImlGenContext->PSE )
|
if( ppcImlGenContext->PSE )
|
||||||
{
|
|
||||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD);
|
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD);
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1075,7 +1079,7 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op
|
||||||
// hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr);
|
// hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr);
|
||||||
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
IMLReg fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||||
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
IMLReg fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD);
|
||||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, fprRegisterD, fprRegisterB);
|
ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprRegisterB, IMLREG_INVALID, IMLREG_INVALID, fprRegisterD);
|
||||||
// adjust accuracy
|
// adjust accuracy
|
||||||
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD);
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Add table
Reference in a new issue