PPCRec: Implement MCRF, rework DCBZ

This commit is contained in:
Exzap 2024-10-20 04:25:21 +02:00
parent 972d0ed05d
commit 89f8f9bd2a
5 changed files with 57 additions and 68 deletions

View file

@ -671,27 +671,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
{ {
x64GenContext->emitter->CMP_dd(regR, regA); x64GenContext->emitter->CMP_dd(regR, regA);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ )
{
if( regR != regA )
{
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regR);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0);
}
else
{
// calculate effective address
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, regA);
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F);
x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE);
for(sint32 f=0; f<0x20; f+=8)
x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0);
}
}
else else
{ {
debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation);

View file

@ -4,6 +4,15 @@
#include "../PPCRecompiler.h" #include "../PPCRecompiler.h"
#include "../PPCRecompilerIml.h" #include "../PPCRecompilerIml.h"
bool IMLInstruction::HasSideEffects() const
{
bool hasSideEffects = true;
if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32)
hasSideEffects = false;
// todo - add more cases
return hasSideEffects;
}
void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
{ {
registersUsed->readGPR1 = IMLREG_INVALID; registersUsed->readGPR1 = IMLREG_INVALID;
@ -26,8 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
} }
else if (type == PPCREC_IML_TYPE_R_R) else if (type == PPCREC_IML_TYPE_R_R)
{ {
if (operation == PPCREC_IML_OP_DCBZ || if (operation == PPCREC_IML_OP_X86_CMP)
operation == PPCREC_IML_OP_X86_CMP)
{ {
// both operands are read only // both operands are read only
registersUsed->readGPR1 = op_r_r.regR; registersUsed->readGPR1 = op_r_r.regR;

View file

@ -126,7 +126,6 @@ enum
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
PPCREC_IML_OP_CNTLZW, PPCREC_IML_OP_CNTLZW,
PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20
// FPU // FPU
PPCREC_IML_OP_FPR_ADD_BOTTOM, PPCREC_IML_OP_FPR_ADD_BOTTOM,
PPCREC_IML_OP_FPR_ADD_PAIR, PPCREC_IML_OP_FPR_ADD_PAIR,
@ -818,6 +817,7 @@ struct IMLInstruction
} }
void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const;
bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used
void RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable); void RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& translationTable);
void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]); void ReplaceFPRs(IMLReg fprRegisterSearched[4], IMLReg fprRegisterReplaced[4]);

View file

@ -649,7 +649,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal
// Then for each segment: // Then for each segment:
// - Iterate instructions backwards // - Iterate instructions backwards
// - Maintain a list of registers which are read at a later point (initially this is the list from the first step) // - Maintain a list of registers which are read at a later point (initially this is the list from the first step)
// - If an instruction only modifies registers which are not in the read list, then it is dead code and can be replaced with a no-op // - If an instruction only modifies registers which are not in the read list and has no side effects, then it is dead code and can be replaced with a no-op
std::unordered_set<IMLRegID> regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg); std::unordered_set<IMLRegID> regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg);
@ -688,10 +688,7 @@ void IMLOptimizer_RemoveDeadCodeFromSegment(IMLOptimizerRegIOAnalysis& regIoAnal
registersUsed.ForEachReadGPR([&](IMLReg reg) { registersUsed.ForEachReadGPR([&](IMLReg reg) {
regsNeeded.insert(reg.GetRegID()); regsNeeded.insert(reg.GetRegID());
}); });
// for now we only allow some instruction types to be deleted, eventually we should find a safer way to identify side effects that can't be judged by register usage alone if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters)
if(imlInstruction.type != PPCREC_IML_TYPE_R_R && imlInstruction.type != PPCREC_IML_TYPE_R_R_S32 && imlInstruction.type != PPCREC_IML_TYPE_COMPARE && imlInstruction.type != PPCREC_IML_TYPE_COMPARE_S32)
continue;
if(onlyWritesRedundantRegisters)
{ {
imlInstruction.make_no_op(); imlInstruction.make_no_op();
} }

View file

@ -422,17 +422,22 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
return true; return true;
} }
return false; return false;
}
if (spr == 268 || spr == 269) void PPCRecompilerImlGen_MCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{
uint32 crD, crS, b;
PPC_OPC_TEMPL_X(opcode, crD, crS, b);
cemu_assert_debug((crD&3) == 0);
cemu_assert_debug((crS&3) == 0);
crD >>= 2;
crS >>= 2;
for (sint32 i = 0; i<4; i++)
{ {
// TBL / TBU IMLReg regCrSrcBit = _GetRegCR(ppcImlGenContext, crS * 4 + i);
uint32 param2 = spr | (rD << 16); IMLReg regCrDstBit = _GetRegCR(ppcImlGenContext, crD * 4 + i);
ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0, IMLREG_INVALID); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrDstBit, regCrSrcBit);
IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock);
return true;
} }
return false;
} }
bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
@ -1211,12 +1216,12 @@ bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
return true; return true;
} }
bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) void PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg)
{ {
// if rA == rD, then the EA wont be stored to rA. We could set updateAddrReg to false in such cases but the end result is the same since the loaded value would overwrite rA
sint32 rA, rD, rB; sint32 rA, rD, rB;
PPC_OPC_TEMPL_X(opcode, rD, rA, rB); PPC_OPC_TEMPL_X(opcode, rD, rA, rB);
if (updateAddrReg && (rA == 0 || rD == rB)) updateAddrReg = updateAddrReg && (rA != 0);
return false; // invalid instruction form
IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID;
IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); IMLReg regB = _GetRegGPR(ppcImlGenContext, rB);
IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD);
@ -1231,7 +1236,6 @@ bool PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint
PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian); PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian);
else else
ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian); ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian);
return true;
} }
bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg)
@ -1498,13 +1502,21 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
rA = (opcode>>16)&0x1F; rA = (opcode>>16)&0x1F;
rB = (opcode>>11)&0x1F; rB = (opcode>>11)&0x1F;
// prepare registers // prepare registers
IMLReg gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID; IMLReg regA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID;
IMLReg gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB);
// store // load zero into a temporary register
if( rA != 0 ) IMLReg regZero = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regZero, 0);
// prepare EA and align it to cacheline
IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1);
if(rA != 0)
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB);
else else
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB);
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_AND, regMemResEA, ~31);
// zero out the cacheline
for(sint32 i = 0; i < 32; i += 4)
ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false);
return true; return true;
} }
@ -2069,6 +2081,9 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
case 19: // opcode category 19 case 19: // opcode category 19
switch (PPC_getBits(opcode, 30, 10)) switch (PPC_getBits(opcode, 30, 10))
{ {
case 0:
PPCRecompilerImlGen_MCRF(ppcImlGenContext, opcode);
break;
case 16: // BCLR case 16: // BCLR
if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false) if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
@ -2170,8 +2185,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 23: // LWZX case 23: // LWZX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false);
unsupportedInstructionFound = true;
break; break;
case 24: case 24:
if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false)
@ -2196,8 +2210,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
// DBCST - Generates no code // DBCST - Generates no code
break; break;
case 55: // LWZUX case 55: // LWZUX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true);
unsupportedInstructionFound = true;
break; break;
case 60: // ANDC case 60: // ANDC
if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode)) if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode))
@ -2211,16 +2224,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
// DCBF -> No-Op // DCBF -> No-Op
break; break;
case 87: // LBZX case 87: // LBZX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false);
unsupportedInstructionFound = true;
break; break;
case 104: case 104:
if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 119: // LBZUX case 119: // LBZUX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true);
unsupportedInstructionFound = true;
break; break;
case 124: // NOR case 124: // NOR
if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true)) if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true))
@ -2279,16 +2290,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 279: // LHZX case 279: // LHZX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false);
unsupportedInstructionFound = true;
break; break;
case 284: // EQV (alias to NXOR) case 284: // EQV (alias to NXOR)
if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true)) if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true))
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 311: // LHZUX case 311: // LHZUX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true);
unsupportedInstructionFound = true;
break; break;
case 316: // XOR case 316: // XOR
if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false)) if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false))
@ -2299,16 +2308,14 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 343: // LHAX case 343: // LHAX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false);
unsupportedInstructionFound = true;
break; break;
case 371: case 371:
if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 375: // LHAUX case 375: // LHAUX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true);
unsupportedInstructionFound = true;
break; break;
case 407: // STHX case 407: // STHX
if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false)) if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false))
@ -2342,8 +2349,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 534: // LWBRX case 534: // LWBRX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false);
unsupportedInstructionFound = true;
break; break;
case 535: case 535:
if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false)
@ -2397,8 +2403,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext)
unsupportedInstructionFound = true; unsupportedInstructionFound = true;
break; break;
case 790: // LHBRX case 790: // LHBRX
if (!PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false)) PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false);
unsupportedInstructionFound = true;
break; break;
case 792: case 792:
if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false) if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false)
@ -2878,7 +2883,7 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc
if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext))
{ {
debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); cemuLog_logDebug(LogType::Force, "PPCRecompiler: Unsupported instruction at 0x{:08x}", addressOfCurrentInstruction);
ppcImlGenContext.currentOutputSegment = nullptr; ppcImlGenContext.currentOutputSegment = nullptr;
return false; return false;
} }