mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
PPCRec: Emit x86 movd for non-AVX + more restructuring
This commit is contained in:
parent
411a83799c
commit
da08eda506
13 changed files with 589 additions and 584 deletions
|
@ -76,6 +76,7 @@ add_library(CemuCafe
|
|||
HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLOptimizer.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp
|
||||
HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp
|
||||
|
|
|
@ -16,16 +16,16 @@ void IMLAnalyzer_GetCRTracking(IMLInstruction* imlInstruction, PPCRecCRTracking_
|
|||
|
||||
// optimizer passes
|
||||
// todo - rename
|
||||
bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext);
|
||||
bool PPCRecompiler_reduceNumberOfFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
bool PPCRecompiler_manageFPRRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_removeRedundantCRUpdates(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// register allocator
|
||||
void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
// debug
|
||||
void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false);
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "IMLRegisterAllocatorRanges.h"
|
||||
#include "util/helpers/StringBuf.h"
|
||||
|
||||
#include "../PPCRecompiler.h"
|
||||
|
||||
const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml)
|
||||
{
|
||||
|
|
|
@ -750,8 +750,8 @@ void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange)
|
|||
|
||||
void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
||||
{
|
||||
sint16 virtualReg2PhysReg[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX];
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
|
||||
virtualReg2PhysReg[i] = -1;
|
||||
|
||||
raLiveRangeInfo_t liveInfo;
|
||||
|
@ -848,7 +848,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
|
|||
replaceGpr[f] = -1;
|
||||
continue;
|
||||
}
|
||||
if (virtualRegister >= PPC_REC_MAX_VIRTUAL_GPR)
|
||||
if (virtualRegister >= IML_RA_VIRT_REG_COUNT_MAX)
|
||||
assert_dbg();
|
||||
replaceGpr[f] = virtualReg2PhysReg[virtualRegister];
|
||||
cemu_assert_debug(replaceGpr[f] >= 0);
|
||||
|
@ -860,7 +860,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
|
|||
}
|
||||
// expire infinite subranges (subranges that cross the segment border)
|
||||
sint32 storeLoadListLength = 0;
|
||||
raLoadStoreInfo_t loadStoreList[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
raLoadStoreInfo_t loadStoreList[IML_RA_VIRT_REG_COUNT_MAX];
|
||||
for (sint32 f = 0; f < liveInfo.liveRangesCount; f++)
|
||||
{
|
||||
raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f];
|
||||
|
@ -1007,7 +1007,7 @@ bool _isRangeDefined(IMLSegment* imlSegment, sint32 vGPR)
|
|||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
|
@ -1027,7 +1027,7 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext,
|
|||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
cemu_assert_debug(virtualRegister < IML_RA_VIRT_REG_COUNT_MAX);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min<sint32>(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max<sint32>(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction
|
||||
}
|
||||
|
@ -1086,7 +1086,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG
|
|||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, i) == false)
|
||||
continue;
|
||||
|
@ -1096,8 +1096,8 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext,
|
|||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
raLivenessSubrange_t* vGPR2Subrange[IML_RA_VIRT_REG_COUNT_MAX];
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
|
@ -1257,7 +1257,7 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe
|
|||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
|
@ -1334,7 +1334,7 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
|||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
|
|
|
@ -1,7 +1,84 @@
|
|||
#pragma once
|
||||
#include "IMLInstruction.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" // remove once dependency is gone
|
||||
#define IML_RA_VIRT_REG_COUNT_MAX 40 // should match PPC_REC_MAX_VIRTUAL_GPR -> todo: Make this dynamic
|
||||
|
||||
struct IMLSegmentPoint
|
||||
{
|
||||
sint32 index;
|
||||
struct IMLSegment* imlSegment;
|
||||
IMLSegmentPoint* next;
|
||||
IMLSegmentPoint* prev;
|
||||
};
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink_t
|
||||
{
|
||||
struct raLivenessSubrange_t* prev;
|
||||
struct raLivenessSubrange_t* next;
|
||||
};
|
||||
|
||||
struct raLivenessSubrange_t
|
||||
{
|
||||
struct raLivenessRange_t* range;
|
||||
IMLSegment* imlSegment;
|
||||
IMLSegmentPoint start;
|
||||
IMLSegmentPoint end;
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessSubrange_t* subrangeBranchTaken;
|
||||
raLivenessSubrange_t* subrangeBranchNotTaken;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
|
||||
};
|
||||
|
||||
struct raLivenessRange_t
|
||||
{
|
||||
sint32 virtualRegister;
|
||||
sint32 physicalRegister;
|
||||
sint32 name;
|
||||
std::vector<raLivenessSubrange_t*> list_subranges;
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// analyzer stage
|
||||
bool isPartOfProcessedLoop{}; // used during loop detection
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
raLivenessSubrange_t* linkedList_allSubranges{};
|
||||
raLivenessSubrange_t* linkedList_perVirtualGPR[IML_RA_VIRT_REG_COUNT_MAX]{};
|
||||
};
|
||||
|
||||
struct PPCRecVGPRDistances_t
|
||||
{
|
||||
struct _RegArrayEntry
|
||||
{
|
||||
sint32 usageStart{};
|
||||
sint32 usageEnd{};
|
||||
}reg[IML_RA_VIRT_REG_COUNT_MAX];
|
||||
bool isProcessed[IML_RA_VIRT_REG_COUNT_MAX]{};
|
||||
};
|
||||
|
||||
struct IMLSegment
|
||||
{
|
||||
|
@ -39,11 +116,9 @@ struct IMLSegment
|
|||
PPCRecVGPRDistances_t raDistances{};
|
||||
bool raRangeExtendProcessed{};
|
||||
// segment points
|
||||
ppcRecompilerSegmentPoint_t* segmentPointList{};
|
||||
|
||||
IMLSegmentPoint* segmentPointList{};
|
||||
bool HasSuffixInstruction() const;
|
||||
IMLInstruction* GetLastInstruction();
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include "util/helpers/helpers.h"
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IML.h"
|
||||
|
||||
struct PPCInvalidationRange
|
||||
{
|
||||
MPTR startAddress;
|
||||
|
@ -127,6 +129,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress)
|
|||
PPCRecompiler_enter(hCPU, funcPtr);
|
||||
}
|
||||
}
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set<uint32>& entryAddresses, std::vector<std::pair<MPTR, uint32>>& entryPointsOut)
|
||||
{
|
||||
|
@ -153,21 +156,27 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
|
||||
ppcRecFunc->ppcAddress = range.startAddress;
|
||||
ppcRecFunc->ppcSize = range.length;
|
||||
|
||||
// generate intermediate code
|
||||
ppcImlGenContext_t ppcImlGenContext = { 0 };
|
||||
bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses);
|
||||
if (compiledSuccessfully == false)
|
||||
{
|
||||
// todo: Free everything
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
delete ppcRecFunc;
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// apply passes
|
||||
if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
|
||||
{
|
||||
delete ppcRecFunc;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// emit x64 code
|
||||
bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext);
|
||||
if (x64GenerationSuccess == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -183,11 +192,82 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
|||
|
||||
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
|
||||
}
|
||||
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return ppcRecFunc;
|
||||
}
|
||||
|
||||
void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext);
|
||||
|
||||
bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
PPCRecompiler_FixLoops(ppcImlGenContext);
|
||||
|
||||
// isolate entry points from function flow (enterable segments must not be the target of any other segment)
|
||||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// if GQRs can be predicted, optimize PSQ load/stores
|
||||
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
|
||||
|
||||
// count number of used registers
|
||||
uint32 numLoadedFPRRegisters = 0;
|
||||
for (uint32 i = 0; i < 255; i++)
|
||||
{
|
||||
if (ppcImlGenContext.mappedFPRRegister[i])
|
||||
numLoadedFPRRegisters++;
|
||||
}
|
||||
|
||||
// insert name store instructions at the end of each segment but before branch instructions
|
||||
for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
if (segIt->imlList.size() == 0)
|
||||
continue; // ignore empty segments
|
||||
// analyze segment for register usage
|
||||
IMLUsedRegisters registersUsed;
|
||||
for (sint32 i = 0; i < segIt->imlList.size(); i++)
|
||||
{
|
||||
segIt->imlList[i].CheckRegisterUsage(®istersUsed);
|
||||
sint32 accessedTempReg[5];
|
||||
// intermediate FPRs
|
||||
accessedTempReg[0] = registersUsed.readFPR1;
|
||||
accessedTempReg[1] = registersUsed.readFPR2;
|
||||
accessedTempReg[2] = registersUsed.readFPR3;
|
||||
accessedTempReg[3] = registersUsed.readFPR4;
|
||||
accessedTempReg[4] = registersUsed.writtenFPR1;
|
||||
for (sint32 f = 0; f < 5; f++)
|
||||
{
|
||||
if (accessedTempReg[f] == -1)
|
||||
continue;
|
||||
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]];
|
||||
if (regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0 + 32)
|
||||
{
|
||||
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// merge certain float load+store patterns (must happen before FPR register remapping)
|
||||
PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
||||
if (numLoadedFPRRegisters > 0)
|
||||
{
|
||||
if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext);
|
||||
|
||||
// remove redundant name load and store instructions
|
||||
PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
||||
PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector<std::pair<MPTR, uint32>>& entryPoints)
|
||||
{
|
||||
// update jump table
|
||||
|
@ -511,42 +591,6 @@ void PPCRecompiler_init()
|
|||
PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize());
|
||||
PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize());
|
||||
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// setup GQR scale tables
|
||||
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
|
|
|
@ -25,84 +25,6 @@ struct PPCRecFunction_t
|
|||
};
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h"
|
||||
|
||||
typedef struct _ppcRecompilerSegmentPoint_t
|
||||
{
|
||||
sint32 index;
|
||||
struct IMLSegment* imlSegment;
|
||||
_ppcRecompilerSegmentPoint_t* next;
|
||||
_ppcRecompilerSegmentPoint_t* prev;
|
||||
}ppcRecompilerSegmentPoint_t;
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink_t
|
||||
{
|
||||
struct raLivenessSubrange_t* prev;
|
||||
struct raLivenessSubrange_t* next;
|
||||
};
|
||||
|
||||
struct raLivenessSubrange_t
|
||||
{
|
||||
struct raLivenessRange_t* range;
|
||||
IMLSegment* imlSegment;
|
||||
ppcRecompilerSegmentPoint_t start;
|
||||
ppcRecompilerSegmentPoint_t end;
|
||||
// dirty state tracking
|
||||
bool _noLoad;
|
||||
bool hasStore;
|
||||
bool hasStoreDelayed;
|
||||
// next
|
||||
raLivenessSubrange_t* subrangeBranchTaken;
|
||||
raLivenessSubrange_t* subrangeBranchNotTaken;
|
||||
// processing
|
||||
uint32 lastIterationIndex;
|
||||
// instruction locations
|
||||
std::vector<raLivenessLocation_t> list_locations;
|
||||
// linked list (subranges with same GPR virtual register)
|
||||
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
|
||||
// linked list (all subranges for this segment)
|
||||
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
|
||||
};
|
||||
|
||||
struct raLivenessRange_t
|
||||
{
|
||||
sint32 virtualRegister;
|
||||
sint32 physicalRegister;
|
||||
sint32 name;
|
||||
std::vector<raLivenessSubrange_t*> list_subranges;
|
||||
};
|
||||
|
||||
struct PPCSegmentRegisterAllocatorInfo_t
|
||||
{
|
||||
// analyzer stage
|
||||
bool isPartOfProcessedLoop{}; // used during loop detection
|
||||
sint32 lastIterationIndex{};
|
||||
// linked lists
|
||||
raLivenessSubrange_t* linkedList_allSubranges{};
|
||||
raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
struct PPCRecVGPRDistances_t
|
||||
{
|
||||
struct _RegArrayEntry
|
||||
{
|
||||
sint32 usageStart{};
|
||||
sint32 usageEnd{};
|
||||
}reg[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{};
|
||||
};
|
||||
|
||||
#include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h"
|
||||
|
||||
struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
@ -140,6 +62,21 @@ struct ppcImlGenContext_t
|
|||
bool modifiesGQR[8];
|
||||
}tracking;
|
||||
|
||||
~ppcImlGenContext_t()
|
||||
{
|
||||
if (imlList)
|
||||
{
|
||||
free(imlList);
|
||||
imlList = nullptr;
|
||||
}
|
||||
|
||||
for (IMLSegment* imlSegment : segmentList2)
|
||||
{
|
||||
delete imlSegment;
|
||||
}
|
||||
segmentList2.clear();
|
||||
}
|
||||
|
||||
// append raw instruction
|
||||
IMLInstruction& emitInst()
|
||||
{
|
||||
|
@ -194,8 +131,6 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
|||
|
||||
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
|
||||
|
||||
// todo - move some of the stuff above into PPCRecompilerInternal.h
|
||||
|
||||
// recompiler interface
|
||||
|
||||
void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress);
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example)
|
||||
|
||||
bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set<uint32>& entryAddresses);
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor
|
||||
|
||||
IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext);
|
||||
void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount);
|
||||
|
@ -10,8 +9,8 @@ IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 i
|
|||
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count);
|
||||
|
||||
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint);
|
||||
void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index);
|
||||
void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint);
|
||||
|
||||
// GPR register management
|
||||
uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false);
|
||||
|
|
|
@ -2933,7 +2933,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext
|
|||
return v;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, IMLSegment* imlSegment, sint32 index)
|
||||
void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index)
|
||||
{
|
||||
segmentPoint->imlSegment = imlSegment;
|
||||
segmentPoint->index = index;
|
||||
|
@ -2944,7 +2944,7 @@ void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint,
|
|||
imlSegment->segmentPointList = segmentPoint;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint)
|
||||
void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint)
|
||||
{
|
||||
if (segmentPoint->prev)
|
||||
segmentPoint->prev->next = segmentPoint->next;
|
||||
|
@ -2975,7 +2975,7 @@ void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index,
|
|||
// update position of segment points
|
||||
if (imlSegment->segmentPointList)
|
||||
{
|
||||
ppcRecompilerSegmentPoint_t* segmentPoint = imlSegment->segmentPointList;
|
||||
IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList;
|
||||
while (segmentPoint)
|
||||
{
|
||||
if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END)
|
||||
|
@ -3017,21 +3017,6 @@ void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint3
|
|||
ppcImlGenContext->segmentList2[index + i] = new IMLSegment();
|
||||
}
|
||||
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
if (ppcImlGenContext->imlList)
|
||||
{
|
||||
free(ppcImlGenContext->imlList);
|
||||
ppcImlGenContext->imlList = nullptr;
|
||||
}
|
||||
|
||||
for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
delete imlSegment;
|
||||
}
|
||||
ppcImlGenContext->segmentList2.clear();
|
||||
}
|
||||
|
||||
bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
bool unsupportedInstructionFound = false;
|
||||
|
@ -3953,9 +3938,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
|||
ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0)
|
||||
if( unsupportedInstructionCount > 0 || unsupportedInstructionFound )
|
||||
{
|
||||
// could not compile function
|
||||
debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset);
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return false;
|
||||
}
|
||||
// optimize unused jumpmarks away
|
||||
|
@ -4260,16 +4243,20 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
|||
segIt->imlList[0].op_macro.param = cycleCount;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext)
|
||||
{
|
||||
// find segments that have a (conditional) jump instruction that points in reverse direction of code flow
|
||||
// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop.
|
||||
// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example)
|
||||
uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located
|
||||
for(size_t s=0; s<ppcImlGenContext.segmentList2.size(); s++)
|
||||
for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++)
|
||||
{
|
||||
// todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?)
|
||||
IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s];
|
||||
if( imlSegment->imlList.empty() )
|
||||
if (imlSegment->imlList.empty())
|
||||
continue;
|
||||
if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin)
|
||||
continue;
|
||||
|
@ -4289,12 +4276,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
|||
|
||||
PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2);
|
||||
imlSegment = NULL;
|
||||
IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0];
|
||||
IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1];
|
||||
IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2];
|
||||
IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0];
|
||||
IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1];
|
||||
IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2];
|
||||
// create entry point segment
|
||||
PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1);
|
||||
IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1];
|
||||
IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1];
|
||||
// relink segments
|
||||
IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0);
|
||||
IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
|
||||
|
@ -4322,7 +4309,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
|||
imlSegmentP2->ppcAddrMin = 0;
|
||||
imlSegmentP2->ppcAddrMax = 0;
|
||||
// setup enterable segment
|
||||
if( enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF )
|
||||
if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF)
|
||||
{
|
||||
imlSegmentPEntry->isEnterable = true;
|
||||
imlSegmentPEntry->ppcAddress = enterPPCAddress;
|
||||
|
@ -4353,70 +4340,4 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
|||
// skip the newly created segments
|
||||
s += 2;
|
||||
}
|
||||
|
||||
// isolate entry points from function flow (enterable segments must not be the target of any other segment)
|
||||
// this simplifies logic during register allocation
|
||||
PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext);
|
||||
|
||||
// if GQRs can be predicted, optimize PSQ load/stores
|
||||
PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext);
|
||||
|
||||
// count number of used registers
|
||||
uint32 numLoadedFPRRegisters = 0;
|
||||
for(uint32 i=0; i<255; i++)
|
||||
{
|
||||
if( ppcImlGenContext.mappedFPRRegister[i] )
|
||||
numLoadedFPRRegisters++;
|
||||
}
|
||||
|
||||
// insert name store instructions at the end of each segment but before branch instructions
|
||||
for (IMLSegment* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
if(segIt->imlList.size() == 0 )
|
||||
continue; // ignore empty segments
|
||||
// analyze segment for register usage
|
||||
IMLUsedRegisters registersUsed;
|
||||
for(sint32 i=0; i<segIt->imlList.size(); i++)
|
||||
{
|
||||
segIt->imlList[i].CheckRegisterUsage(®istersUsed);
|
||||
sint32 accessedTempReg[5];
|
||||
// intermediate FPRs
|
||||
accessedTempReg[0] = registersUsed.readFPR1;
|
||||
accessedTempReg[1] = registersUsed.readFPR2;
|
||||
accessedTempReg[2] = registersUsed.readFPR3;
|
||||
accessedTempReg[3] = registersUsed.readFPR4;
|
||||
accessedTempReg[4] = registersUsed.writtenFPR1;
|
||||
for(sint32 f=0; f<5; f++)
|
||||
{
|
||||
if( accessedTempReg[f] == -1 )
|
||||
continue;
|
||||
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]];
|
||||
if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 )
|
||||
{
|
||||
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// merge certain float load+store patterns (must happen before FPR register remapping)
|
||||
PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext);
|
||||
// delay byte swapping for certain load+store patterns
|
||||
PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext);
|
||||
|
||||
if (numLoadedFPRRegisters > 0)
|
||||
{
|
||||
if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false)
|
||||
{
|
||||
PPCRecompiler_freeContext(&ppcImlGenContext);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext);
|
||||
|
||||
// remove redundant name load and store instructions
|
||||
PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
|
||||
PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -8,6 +8,11 @@
|
|||
#include "util/MemMapper/MemMapper.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
bool s_hasLZCNTSupport = false;
|
||||
bool s_hasMOVBESupport = false;
|
||||
bool s_hasBMI2Support = false;
|
||||
bool s_hasAVXSupport = false;
|
||||
|
||||
sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping
|
||||
{
|
||||
REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX
|
||||
|
@ -351,152 +356,143 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
|
|||
sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER;
|
||||
if( indexed )
|
||||
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2);
|
||||
if( false )//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS )
|
||||
if( indexed && realRegisterMem == realRegisterMem2 )
|
||||
{
|
||||
// load u8/u16/u32 via direct memory access + optional sign extend
|
||||
assert_dbg(); // todo
|
||||
return false;
|
||||
}
|
||||
else
|
||||
if( indexed && realRegisterData == realRegisterMem2 )
|
||||
{
|
||||
if( indexed && realRegisterMem == realRegisterMem2 )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( indexed && realRegisterData == realRegisterMem2 )
|
||||
{
|
||||
// for indexed memory access realRegisterData must not be the same register as the second memory register,
|
||||
// this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2
|
||||
sint32 temp = realRegisterMem;
|
||||
realRegisterMem = realRegisterMem2;
|
||||
realRegisterMem2 = temp;
|
||||
}
|
||||
// for indexed memory access realRegisterData must not be the same register as the second memory register,
|
||||
// this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2
|
||||
sint32 temp = realRegisterMem;
|
||||
realRegisterMem = realRegisterMem2;
|
||||
realRegisterMem2 = temp;
|
||||
}
|
||||
|
||||
bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
|
||||
bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
|
||||
if( imlInstruction->op_storeLoad.copyWidth == 32 )
|
||||
bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
|
||||
bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
|
||||
if( imlInstruction->op_storeLoad.copyWidth == 32 )
|
||||
{
|
||||
//if( indexed )
|
||||
// PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( IMLBackendX64_HasExtensionMOVBE() && switchEndian )
|
||||
{
|
||||
//if( indexed )
|
||||
// PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( g_CPUFeatures.x86.movbe && switchEndian )
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
//if (indexed && realRegisterMem != realRegisterData)
|
||||
// x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
//if (indexed && realRegisterMem != realRegisterData)
|
||||
// x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
//if (realRegisterMem != realRegisterData)
|
||||
// x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if (switchEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if (switchEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == 16 )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( g_CPUFeatures.x86.movbe && switchEndian )
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if( switchEndian )
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8);
|
||||
}
|
||||
if( signExtend )
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == 8 )
|
||||
{
|
||||
if( indexed )
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
// todo: Optimize by using only MOVZX/MOVSX
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// todo: Use sign extend move from memory instead of separate sign-extend?
|
||||
if( signExtend )
|
||||
x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if( imlInstruction->op_storeLoad.immS32 != 0 )
|
||||
assert_dbg(); // not supported
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if( switchEndian )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation
|
||||
// LWARX instruction costs extra cycles (this speeds up busy loops)
|
||||
x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20);
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if( switchEndian == false )
|
||||
assert_dbg();
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move)
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
//if (realRegisterMem != realRegisterData)
|
||||
// x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if (switchEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
if (switchEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
}
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00);
|
||||
}
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == 16 )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available
|
||||
if (indexed)
|
||||
{
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if(IMLBackendX64_HasExtensionMOVBE() && switchEndian )
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
{
|
||||
x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if( switchEndian )
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8);
|
||||
}
|
||||
if( signExtend )
|
||||
x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
|
||||
else
|
||||
x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData);
|
||||
}
|
||||
return false;
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == 8 )
|
||||
{
|
||||
if( indexed )
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
// todo: Optimize by using only MOVZX/MOVSX
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// todo: Use sign extend move from memory instead of separate sign-extend?
|
||||
if( signExtend )
|
||||
x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if( imlInstruction->op_storeLoad.immS32 != 0 )
|
||||
assert_dbg(); // not supported
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if( switchEndian )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation
|
||||
// LWARX instruction costs extra cycles (this speeds up busy loops)
|
||||
x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20);
|
||||
}
|
||||
else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 )
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if( switchEndian == false )
|
||||
assert_dbg();
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move)
|
||||
if(IMLBackendX64_HasExtensionMOVBE())
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData);
|
||||
}
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -510,169 +506,160 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
|
|||
if (indexed)
|
||||
realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2);
|
||||
|
||||
if (false)//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS )
|
||||
if (indexed && realRegisterMem == realRegisterMem2)
|
||||
{
|
||||
// load u8/u16/u32 via direct memory access + optional sign extend
|
||||
assert_dbg(); // todo
|
||||
return false;
|
||||
}
|
||||
else
|
||||
if (indexed && realRegisterData == realRegisterMem2)
|
||||
{
|
||||
if (indexed && realRegisterMem == realRegisterMem2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (indexed && realRegisterData == realRegisterMem2)
|
||||
{
|
||||
// for indexed memory access realRegisterData must not be the same register as the second memory register,
|
||||
// this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2
|
||||
sint32 temp = realRegisterMem;
|
||||
realRegisterMem = realRegisterMem2;
|
||||
realRegisterMem2 = temp;
|
||||
}
|
||||
// for indexed memory access realRegisterData must not be the same register as the second memory register,
|
||||
// this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2
|
||||
sint32 temp = realRegisterMem;
|
||||
realRegisterMem = realRegisterMem2;
|
||||
realRegisterMem2 = temp;
|
||||
}
|
||||
|
||||
bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
|
||||
bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
|
||||
if (imlInstruction->op_storeLoad.copyWidth == 32)
|
||||
{
|
||||
if (indexed)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
uint32 valueRegister;
|
||||
if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData)
|
||||
{
|
||||
valueRegister = realRegisterData;
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
valueRegister = REG_RESV_TEMP;
|
||||
}
|
||||
if (g_CPUFeatures.x86.movbe == false && swapEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if (g_CPUFeatures.x86.movbe && swapEndian)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == 16)
|
||||
{
|
||||
if (indexed || swapEndian)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (swapEndian)
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// todo: Optimize this, e.g. by using MOVBE
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == 8)
|
||||
{
|
||||
if (indexed)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (indexed && realRegisterMem == realRegisterData)
|
||||
{
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
realRegisterData = REG_RESV_TEMP;
|
||||
}
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
|
||||
{
|
||||
bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend;
|
||||
bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian;
|
||||
if (imlInstruction->op_storeLoad.copyWidth == 32)
|
||||
{
|
||||
if (indexed)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (imlInstruction->op_storeLoad.immS32 != 0)
|
||||
assert_dbg(); // todo
|
||||
// reset cr0 LT, GT and EQ
|
||||
sint32 crRegister = 0;
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0);
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0);
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0);
|
||||
// calculate effective address
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (swapEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// realRegisterMem now holds EA
|
||||
x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
|
||||
sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex;
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
|
||||
// EA matches reservation
|
||||
// backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten)
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX);
|
||||
// backup REG_RESV_MEMBASE
|
||||
x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE);
|
||||
// add mem register to REG_RESV_MEMBASE
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem);
|
||||
// load reserved value in EAX
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
|
||||
// bswap EAX
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX);
|
||||
|
||||
//x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP);
|
||||
x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP);
|
||||
|
||||
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ));
|
||||
|
||||
// reset reservation
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0);
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0);
|
||||
|
||||
// restore EAX
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
|
||||
// restore REG_RESV_MEMBASE
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]));
|
||||
|
||||
// copy XER SO to CR0 SO
|
||||
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31);
|
||||
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO));
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2)
|
||||
uint32 valueRegister;
|
||||
if ((swapEndian == false || IMLBackendX64_HasExtensionMOVBE()) && realRegisterMem != realRegisterData)
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes ..
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3)
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP);
|
||||
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
valueRegister = realRegisterData;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
{
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
valueRegister = REG_RESV_TEMP;
|
||||
}
|
||||
if (!IMLBackendX64_HasExtensionMOVBE() && swapEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if (IMLBackendX64_HasExtensionMOVBE() && swapEndian)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
return false;
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == 16)
|
||||
{
|
||||
if (indexed || swapEndian)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (swapEndian)
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// todo: Optimize this, e.g. by using MOVBE
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == 8)
|
||||
{
|
||||
if (indexed)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (indexed && realRegisterMem == realRegisterData)
|
||||
{
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
realRegisterData = REG_RESV_TEMP;
|
||||
}
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER)
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
if (imlInstruction->op_storeLoad.immS32 != 0)
|
||||
assert_dbg(); // todo
|
||||
// reset cr0 LT, GT and EQ
|
||||
sint32 crRegister = 0;
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0);
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0);
|
||||
x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0);
|
||||
// calculate effective address
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (swapEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
// realRegisterMem now holds EA
|
||||
x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr));
|
||||
sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex;
|
||||
x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0);
|
||||
// EA matches reservation
|
||||
// backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten)
|
||||
x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX);
|
||||
// backup REG_RESV_MEMBASE
|
||||
x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE);
|
||||
// add mem register to REG_RESV_MEMBASE
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem);
|
||||
// load reserved value in EAX
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue));
|
||||
// bswap EAX
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX);
|
||||
|
||||
//x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP);
|
||||
x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP);
|
||||
|
||||
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ));
|
||||
|
||||
// reset reservation
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0);
|
||||
x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0);
|
||||
|
||||
// restore EAX
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]));
|
||||
// restore REG_RESV_MEMBASE
|
||||
x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]));
|
||||
|
||||
// copy XER SO to CR0 SO
|
||||
x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31);
|
||||
x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO));
|
||||
// end
|
||||
PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2)
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes ..
|
||||
x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
|
||||
x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3)
|
||||
{
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP);
|
||||
x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8);
|
||||
x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP);
|
||||
|
||||
if (indexed)
|
||||
x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
|
||||
|
@ -781,7 +768,8 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
|
|||
// count leading zeros
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
|
||||
if( g_CPUFeatures.x86.lzcnt )
|
||||
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
|
||||
if(IMLBackendX64_HasExtensionLZCNT())
|
||||
{
|
||||
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA));
|
||||
}
|
||||
|
@ -1499,12 +1487,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
|
|||
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA);
|
||||
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB);
|
||||
|
||||
if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
|
||||
if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SRW)
|
||||
{
|
||||
// use BMI2 SHRX if available
|
||||
x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
|
||||
}
|
||||
else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW)
|
||||
else if (IMLBackendX64_HasExtensionBMI2() && imlInstruction->operation == PPCREC_IML_OP_SLW)
|
||||
{
|
||||
// use BMI2 SHLX if available
|
||||
x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
|
||||
|
@ -2656,4 +2644,79 @@ void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions()
|
|||
PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
|
||||
PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode();
|
||||
cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited);
|
||||
}
|
||||
}
|
||||
|
||||
bool IMLBackendX64_HasExtensionLZCNT()
|
||||
{
|
||||
return s_hasLZCNTSupport;
|
||||
}
|
||||
|
||||
bool IMLBackendX64_HasExtensionMOVBE()
|
||||
{
|
||||
return s_hasMOVBESupport;
|
||||
}
|
||||
|
||||
bool IMLBackendX64_HasExtensionBMI2()
|
||||
{
|
||||
return s_hasBMI2Support;
|
||||
}
|
||||
|
||||
bool IMLBackendX64_HasExtensionAVX()
|
||||
{
|
||||
return s_hasAVXSupport;
|
||||
}
|
||||
|
||||
void IMLBackendX64_Init()
|
||||
{
|
||||
// init x64 recompiler instance data
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[1] = 1ULL << 63ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[0] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_xorNOTMask[1] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskBottom[1] = ~0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[0] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andAbsMaskPair[1] = ~(1ULL << 63ULL);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[0] = ~(1 << 31);
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[1] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[2] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_andFloatAbsMaskBottom[3] = 0xFFFFFFFF;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[0] = 0xFFFFFFFFULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_singleWordMask[1] = 0ULL;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[0] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble1_1[1] = 1.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[0] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constDouble0_0[1] = 0.0;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[0] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat0_0[1] = 0.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[0] = 1.0f;
|
||||
ppcRecompilerInstanceData->_x64XMM_constFloat1_1[1] = 1.0f;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[0] = 0x00800000;
|
||||
*(uint32*)&ppcRecompilerInstanceData->_x64XMM_constFloatMin[1] = 0x00800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[0] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[1] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[2] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMask1[3] = 0x7F800000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[0] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[1] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000;
|
||||
ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000;
|
||||
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
|
||||
// query processor extensions
|
||||
int cpuInfo[4];
|
||||
cpuid(cpuInfo, 0x80000001);
|
||||
s_hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
|
||||
cpuid(cpuInfo, 0x1);
|
||||
s_hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
|
||||
s_hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
|
||||
cpuidex(cpuInfo, 0x7, 0);
|
||||
s_hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
|
||||
|
||||
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", s_hasLZCNTSupport ? "LZCNT " : "", s_hasMOVBESupport ? "MOVBE " : "", s_hasAVXSupport ? "AVX " : "");
|
||||
}
|
||||
|
|
|
@ -131,6 +131,12 @@ enum
|
|||
#define PPC_X64_GPR_USABLE_REGISTERS (16-4)
|
||||
#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register
|
||||
|
||||
void IMLBackendX64_Init();
|
||||
|
||||
bool IMLBackendX64_HasExtensionLZCNT();
|
||||
bool IMLBackendX64_HasExtensionMOVBE();
|
||||
bool IMLBackendX64_HasExtensionBMI2();
|
||||
bool IMLBackendX64_HasExtensionAVX();
|
||||
|
||||
bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
|||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg);
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
if (IMLBackendX64_HasExtensionMOVBE())
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
|||
}
|
||||
else
|
||||
{
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
if (IMLBackendX64_HasExtensionMOVBE())
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
|||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
if (IMLBackendX64_HasExtensionAVX())
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
|
@ -281,29 +281,21 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
if(IMLBackendX64_HasExtensionMOVBE())
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
if(IMLBackendX64_HasExtensionMOVBE())
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
if( g_CPUFeatures.x86.movbe == false )
|
||||
if(IMLBackendX64_HasExtensionMOVBE() == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP);
|
||||
x64Gen_movddup_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
|
||||
|
||||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
|
@ -317,7 +309,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
|||
}
|
||||
else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 )
|
||||
{
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
if( IMLBackendX64_HasExtensionAVX() )
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
|
@ -420,23 +412,15 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
|
|||
if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
if (g_CPUFeatures.x86.movbe == false)
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
if (IMLBackendX64_HasExtensionMOVBE() == false)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
cemu_assert_debug(memReg != memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
if (IMLBackendX64_HasExtensionMOVBE())
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP);
|
||||
|
@ -605,30 +589,14 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
// value is already in single format
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
if( g_CPUFeatures.x86.movbe == false )
|
||||
if(IMLBackendX64_HasExtensionMOVBE() == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
|
@ -636,7 +604,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
assert_dbg();
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
if(IMLBackendX64_HasExtensionMOVBE())
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
|
@ -669,15 +637,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
|||
}
|
||||
else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
|
||||
{
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
else
|
||||
{
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
|
@ -1057,7 +1017,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
|||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB);
|
||||
}
|
||||
else if (g_CPUFeatures.x86.avx)
|
||||
else if (IMLBackendX64_HasExtensionAVX())
|
||||
{
|
||||
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue