mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
PPCRec: Implement MCRF, rework DCBZ
This commit is contained in:
parent
972d0ed05d
commit
89f8f9bd2a
5 changed files with 57 additions and 68 deletions
|
@ -671,27 +671,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
|
|||
{
|
||||
x64GenContext->emitter->CMP_dd(regR, regA);
|
||||
}
|
||||
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
|
||||
{
|
||||
if( regR != regA )
|
||||
{
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
|
||||
for(sint32 f=0; f<0x20; f+=8)
|
||||
x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate effective address
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
|
||||
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
|
||||
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
|
||||
for(sint32 f=0; f<0x20; f+=8)
|
||||
x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);
|
||||
|
|
|
@ -4,6 +4,15 @@
|
|||
#include "../PPCRecompiler.h"
|
||||
#include "../PPCRecompilerIml.h"
|
||||
|
||||
bool IMLInstruction::HasSideEffects() const
|
||||
{
|
||||
bool hasSideEffects = true;
|
||||
if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
|
||||
hasSideEffects = false;
|
||||
// todo - add more cases
|
||||
return hasSideEffects;
|
||||
}
|
||||
|
||||
void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
||||
{
|
||||
registersUsed->readGPR1 = IMLREG_INVALID;
|
||||
|
@ -26,8 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
|
|||
}
|
||||
else if (type == PPCREC_IML_TYPE_R_R)
|
||||
{
|
||||
if (operation == PPCREC_IML_OP_DCBZ ||
|
||||
operation == PPCREC_IML_OP_X86_CMP)
|
||||
if (operation == PPCREC_IML_OP_X86_CMP)
|
||||
{
|
||||
// both operands are read only
|
||||
registersUsed->readGPR1 = op_r_r.regR;
|
||||
|
|
|
@ -126,7 +126,6 @@ enum
|
|||
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
|
||||
PPCREC_IML_OP_CNTLZW,
|
||||
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
|
||||
// FPU
|
||||
PPCREC_IML_OP_FPR_ADD_BOTTOM,
|
||||
PPCREC_IML_OP_FPR_ADD_PAIR,
|
||||
|
@ -818,6 +817,7 @@ struct IMLInstruction
|
|||
}
|
||||
|
||||
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
|
||||
bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
|
||||
|
||||
void RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable);
|
||||
void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]);
|
||||
|
|
|
@ -649,7 +649,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal
|
|||
// Then for each segment:
|
||||
// - Iterate instructions backwards
|
||||
// - Maintain a list of registers which are read at a later point (initially this is the list from the first step)
|
||||
// - If an instruction only modifies registers which are not in the read list, then it is dead code and can be replaced with a no-op
|
||||
// - If an instruction only modifies registers which are not in the read list and has no side effects, then it is dead code and can be replaced with a no-op
|
||||
|
||||
std::unordered_set<IMLRegID> regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
|
||||
|
||||
|
@ -688,10 +688,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal
|
|||
registersUsed.ForEachReadGPR([&](IMLReg reg) {
|
||||
regsNeeded.insert(reg.GetRegID());
|
||||
});
|
||||
// for now we only allow some instruction types to be deleted, eventually we should find a safer way to identify side effects that can't be judged by register usage alone
|
||||
if(imlInstruction.type != PPCREC_IML_TYPE_R_R && imlInstruction.type != PPCREC_IML_TYPE_R_R_S32 && imlInstruction.type != PPCREC_IML_TYPE_COMPARE && imlInstruction.type != PPCREC_IML_TYPE_COMPARE_S32)
|
||||
continue;
|
||||
if(onlyWritesRedundantRegisters)
|
||||
if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
|
||||
{
|
||||
imlInstruction.make_no_op();
|
||||
}
|
||||
|
|
|
@ -422,17 +422,22 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (spr == 268 || spr == 269)
|
||||
void PPCRecompilerImlGen_MCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
uint32 crD, crS, b;
|
||||
PPC_OPC_TEMPL_X(opcode, crD, crS, b);
|
||||
cemu_assert_debug((crD&3) == 0);
|
||||
cemu_assert_debug((crS&3) == 0);
|
||||
crD >>= 2;
|
||||
crS >>= 2;
|
||||
for (sint32 i = 0; i<4; i++)
|
||||
{
|
||||
// TBL / TBU
|
||||
uint32 param2 = spr | (rD << 16);
|
||||
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0, IMLREG_INVALID);
|
||||
IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
|
||||
|
||||
return true;
|
||||
IMLReg regCrSrcBit = _GetRegCR(ppcImlGenContext, crS * 4 + i);
|
||||
IMLReg regCrDstBit = _GetRegCR(ppcImlGenContext, crD * 4 + i);
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrDstBit, regCrSrcBit);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
|
@ -1211,12 +1216,12 @@ bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg)
|
||||
void PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg)
|
||||
{
|
||||
// if rA == rD, then the EA wont be stored to rA. We could set updateAddrReg to false in such cases but the end result is the same since the loaded value would overwrite rA
|
||||
sint32 rA, rD, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
|
||||
if (updateAddrReg && (rA == 0 || rD == rB))
|
||||
return false; // invalid instruction form
|
||||
updateAddrReg = updateAddrReg && (rA != 0);
|
||||
IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID;
|
||||
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
|
||||
IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD);
|
||||
|
@ -1231,7 +1236,6 @@ bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint
|
|||
PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian);
|
||||
else
|
||||
ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg)
|
||||
|
@ -1498,13 +1502,21 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
|||
rA = (opcode>>16)&0x1F;
|
||||
rB = (opcode>>11)&0x1F;
|
||||
// prepare registers
|
||||
IMLReg gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID;
|
||||
IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
|
||||
// store
|
||||
if( rA != 0 )
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB);
|
||||
IMLReg regA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID;
|
||||
IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
|
||||
// load zero into a temporary register
|
||||
IMLReg regZero = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
|
||||
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regZero, 0);
|
||||
// prepare EA and align it to cacheline
|
||||
IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
|
||||
if(rA != 0)
|
||||
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
|
||||
else
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB);
|
||||
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
|
||||
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31);
|
||||
// zero out the cacheline
|
||||
for(sint32 i = 0; i < 32; i += 4)
|
||||
ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2069,6 +2081,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
case 19: // opcode category 19
|
||||
switch (PPC_getBits(opcode, 30, 10))
|
||||
{
|
||||
case 0:
|
||||
PPCRecompilerImlGen_MCRF(ppcImlGenContext, opcode);
|
||||
break;
|
||||
case 16: // BCLR
|
||||
if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
|
@ -2170,8 +2185,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 23: // LWZX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false);
|
||||
break;
|
||||
case 24:
|
||||
if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false)
|
||||
|
@ -2196,8 +2210,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
// DBCST - Generates no code
|
||||
break;
|
||||
case 55: // LWZUX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true);
|
||||
break;
|
||||
case 60: // ANDC
|
||||
if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode))
|
||||
|
@ -2211,16 +2224,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
// DCBF -> No-Op
|
||||
break;
|
||||
case 87: // LBZX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false);
|
||||
break;
|
||||
case 104:
|
||||
if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 119: // LBZUX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true);
|
||||
break;
|
||||
case 124: // NOR
|
||||
if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true))
|
||||
|
@ -2279,16 +2290,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 279: // LHZX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false);
|
||||
break;
|
||||
case 284: // EQV (alias to NXOR)
|
||||
if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true))
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 311: // LHZUX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true);
|
||||
break;
|
||||
case 316: // XOR
|
||||
if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false))
|
||||
|
@ -2299,16 +2308,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 343: // LHAX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false);
|
||||
break;
|
||||
case 371:
|
||||
if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false)
|
||||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 375: // LHAUX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true);
|
||||
break;
|
||||
case 407: // STHX
|
||||
if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false))
|
||||
|
@ -2342,8 +2349,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 534: // LWBRX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false);
|
||||
break;
|
||||
case 535:
|
||||
if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false)
|
||||
|
@ -2397,8 +2403,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
|
|||
unsupportedInstructionFound = true;
|
||||
break;
|
||||
case 790: // LHBRX
|
||||
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false))
|
||||
unsupportedInstructionFound = true;
|
||||
PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false);
|
||||
break;
|
||||
case 792:
|
||||
if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false)
|
||||
|
@ -2878,7 +2883,7 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc
|
|||
|
||||
if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext))
|
||||
{
|
||||
debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction);
|
||||
cemuLog_logDebug(LogType::Force, "PPCRecompiler: Unsupported instruction at 0x{:08x}", addressOfCurrentInstruction);
|
||||
ppcImlGenContext.currentOutputSegment = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue