PPCRec: Add RA support for instructions with register constraints

Also make interval tracking more fine grained and differentiate between input and output edges of each instruction
This commit is contained in:
Exzap 2024-10-17 12:06:12 +02:00
parent 675c802cc1
commit aa946ae42d
10 changed files with 2308 additions and 736 deletions

View file

@ -579,31 +579,23 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut));
auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA);
auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue);
auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue);
// make sure non of the regs are in EAX
if (regEA == X86_REG_EAX ||
regBoolOut == X86_REG_EAX ||
regVal == X86_REG_EAX ||
regCmp == X86_REG_EAX)
{
printf("x86: atomic_cmp_store cannot emit due to EAX already being in use\n");
return false;
}
cemu_assert_debug(regBoolOut == X86_REG_EAX);
cemu_assert_debug(regEA != X86_REG_EAX);
cemu_assert_debug(regVal != X86_REG_EAX);
cemu_assert_debug(regCmp != X86_REG_EAX);
x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX);
x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regBoolOut), _reg32_from_reg8(regBoolOut)); // zero bytes unaffected by SETcc
x64GenContext->emitter->LockPrefix();
x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal);
x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut);
x64GenContext->emitter->XCHG_qq(REG_RESV_TEMP, X86_REG_RAX);
return true;
x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here
}
bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
@ -908,78 +900,29 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U ||
imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
{
// x86's shift and rotate instruction have the shift amount hardwired to the CL register
// since our register allocator doesn't support instruction based fixed phys registers yet
// we'll instead have to temporarily shuffle registers around
// we use BMI2's shift instructions until the RA can assign fixed registers
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
if(g_CPUFeatures.x86.bmi2)
{
x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
}
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
else
{
x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
cemu_assert_debug(rRegResult != rRegOperand2);
cemu_assert_debug(rRegResult != X86_REG_RCX);
cemu_assert_debug(rRegOperand2 == X86_REG_RCX);
if(rRegOperand1 != rRegResult)
x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
x64GenContext->emitter->SAR_d_CL(rRegResult);
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
x64GenContext->emitter->SHR_d_CL(rRegResult);
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64GenContext->emitter->SHL_d_CL(rRegResult);
}
else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
{
x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
}
//auto rResult = _reg32(rRegResult);
//auto rOp2 = _reg8_from_reg32(_reg32(rRegOperand2));
//if (rRegResult == rRegOperand2)
//{
// if (rRegResult != rRegOperand1)
// DEBUG_BREAK; // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2)
//}
//if(rRegOperand1 != rRegResult)
// x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1);
//cemu_assert_debug(rRegOperand1 != X86_REG_ECX);
//if (rRegOperand2 == X86_REG_ECX)
//{
// if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
// x64GenContext->emitter->SAR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
// x64GenContext->emitter->SHR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
// x64GenContext->emitter->SHL_d_CL(rResult);
// else
// cemu_assert_unimplemented();
//}
//else
//{
// auto rRegResultOrg = rRegResult;
// if (rRegResult == X86_REG_ECX)
// {
// x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegResult);
// rRegResult = REG_RESV_TEMP;
// rResult = _reg32(rRegResult);
// }
//
// x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
//
// if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S)
// x64GenContext->emitter->SAR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
// x64GenContext->emitter->SHR_d_CL(rResult);
// else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
// x64GenContext->emitter->SHL_d_CL(rResult);
// else
// cemu_assert_unimplemented();
// x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2);
// // move result back if it was in ECX
// if (rRegResultOrg == X86_REG_ECX)
// {
// x64Gen_mov_reg64_reg64(x64GenContext, rRegResultOrg, REG_RESV_TEMP);
// }
//}
}
else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED )
{
@ -1093,9 +1036,19 @@ bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction
auto regA = _reg32(imlInstruction->op_compare.regA);
auto regB = _reg32(imlInstruction->op_compare.regB);
X86Cond cond = _x86Cond(imlInstruction->op_compare.cond);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->SETcc_b(cond, regR);
bool keepR = regR == regA || regR == regB;
if(!keepR)
{
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->SETcc_b(cond, regR);
}
else
{
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
x64GenContext->emitter->SETcc_b(cond, regR);
}
return true;
}
@ -1105,9 +1058,19 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunc
auto regA = _reg32(imlInstruction->op_compare_s32.regA);
sint32 imm = imlInstruction->op_compare_s32.immS32;
X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond);
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_di32(regA, imm);
x64GenContext->emitter->SETcc_b(cond, regR);
bool keepR = regR == regA;
if(!keepR)
{
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_di32(regA, imm);
x64GenContext->emitter->SETcc_b(cond, regR);
}
else
{
x64GenContext->emitter->CMP_di32(regA, imm);
x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
x64GenContext->emitter->SETcc_b(cond, regR);
}
return true;
}
@ -1202,7 +1165,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
{
if( regA != regR )
x64Gen_mov_reg64_reg64(x64GenContext, regR, regA);
if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT)
x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32);
else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U)
@ -1224,19 +1186,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFu
auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA);
sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32;
auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry);
cemu_assert_debug(regCarry != regR && regCarry != regA);
cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register
bool delayCarryInit = regCarry == regA;
switch (imlInstruction->operation)
{
case PPCREC_IML_OP_ADD:
x64GenContext->emitter->XOR_dd(regCarry, regCarry);
if(!delayCarryInit)
x64GenContext->emitter->XOR_dd(regCarry, regCarry);
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->ADD_di32(regR, immS32);
if(delayCarryInit)
x64GenContext->emitter->MOV_di32(regCarry, 0);
x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry));
break;
case PPCREC_IML_OP_ADD_WITH_CARRY:
// assumes that carry is already correctly initialized as 0 or 1
cemu_assert_debug(regCarry != regR);
if (regR != regA)
x64GenContext->emitter->MOV_dd(regR, regA);
x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag
@ -1600,8 +1568,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
}
else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE)
{
if (!PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction))
codeGenerationFailed = true;
PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP )
{

View file

@ -91,25 +91,37 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml
sint32 index = currentLineText.getLen();
while (index < 70)
{
debug_printf(" ");
currentLineText.add(" ");
index++;
}
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
if (offset == subrangeItr->start.index)
if (subrangeItr->interval2.start.GetInstructionIndexEx() == offset)
{
debug_printf("|%-2d", subrangeItr->GetVirtualRegister());
if(subrangeItr->interval2.start.IsInstructionIndex() && !subrangeItr->interval2.start.IsOnInputEdge())
currentLineText.add(".");
else
currentLineText.add("|");
currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister());
}
else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index)
else if (subrangeItr->interval2.end.GetInstructionIndexEx() == offset)
{
debug_printf("| ");
if(subrangeItr->interval2.end.IsInstructionIndex() && !subrangeItr->interval2.end.IsOnOutputEdge())
currentLineText.add("* ");
else
currentLineText.add("| ");
}
else if (subrangeItr->interval2.ContainsInstructionIndexEx(offset))
{
currentLineText.add("| ");
}
else
{
debug_printf(" ");
currentLineText.add(" ");
}
index += 3;
index += 5;
// next
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
@ -446,7 +458,7 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo)
{
StringBuf strOutput(1024);
StringBuf strOutput(4096);
strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth);
if (imlSegment->isEnterable)
@ -457,13 +469,13 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
{
strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg));
}
debug_printf("%s\n", strOutput.c_str());
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
if (printLivenessRangeInfo)
{
strOutput.reset();
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START);
debug_printf("%s\n", strOutput.c_str());
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
}
//debug_printf("\n");
strOutput.reset();
@ -475,53 +487,56 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
// don't log NOP instructions
if (inst.type == PPCREC_IML_TYPE_NO_OP)
continue;
//strOutput.addFmt("{:02x} ", i);
debug_printf(fmt::format("{:02x} ", i).c_str());
strOutput.reset();
strOutput.addFmt("{:02x} ", i);
//cemuLog_log(LogType::Force, "{:02x} ", i);
disassemblyLine.clear();
IMLDebug_DisassembleInstruction(inst, disassemblyLine);
debug_printf("%s", disassemblyLine.c_str());
strOutput.add(disassemblyLine);
if (printLivenessRangeInfo)
{
IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i);
}
debug_printf("\n");
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
}
// all ranges
if (printLivenessRangeInfo)
{
debug_printf("Ranges-VirtReg ");
strOutput.reset();
strOutput.add("Ranges-VirtReg ");
raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
debug_printf("v%-2d", subrangeItr->GetVirtualRegister());
strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister());
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
debug_printf("\n");
debug_printf("Ranges-PhysReg ");
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
strOutput.reset();
strOutput.add("Ranges-PhysReg ");
subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr)
{
debug_printf("p%-2d", subrangeItr->GetPhysicalRegister());
strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister());
subrangeItr = subrangeItr->link_allSegmentRanges.next;
}
debug_printf("\n");
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
}
// branch info
debug_printf("Links from: ");
strOutput.reset();
strOutput.add("Links from: ");
for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++)
{
if (i)
debug_printf(", ");
debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
strOutput.add(", ");
strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str());
}
debug_printf("\n");
cemuLog_log(LogType::Force, "{}", strOutput.c_str());
if (imlSegment->nextSegmentBranchNotTaken)
debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str());
if (imlSegment->nextSegmentBranchTaken)
debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str());
if (imlSegment->nextSegmentIsUncertain)
debug_printf("Dynamic target\n");
debug_printf("\n");
cemuLog_log(LogType::Force, "Dynamic target");
}
void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo)
@ -529,6 +544,6 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRange
for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
{
IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo);
debug_printf("\n");
cemuLog_log(LogType::Force, "");
}
}

View file

@ -1,6 +1,7 @@
#pragma once
using IMLRegID = uint16; // 16 bit ID
using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned
// format of IMLReg:
// 0-15 (16 bit) IMLRegID

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,7 @@
#pragma once
// container for storing a set of register indices
// specifically optimized towards storing physical register indices (expected to be below 64)
// specifically optimized towards storing typical range of physical register indices (expected to be below 64)
class IMLPhysRegisterSet
{
public:
@ -33,11 +34,21 @@ public:
return *this;
}
void RemoveRegisters(const IMLPhysRegisterSet& other)
{
this->m_regBitmask &= ~other.m_regBitmask;
}
bool HasAnyAvailable() const
{
return m_regBitmask != 0;
}
bool HasExactlyOneAvailable() const
{
return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0;
}
// returns index of first available register. Do not call when HasAnyAvailable() == false
uint32 GetFirstAvailableReg()
{
@ -59,7 +70,7 @@ public:
// returns index of next available register (search includes any register index >= startIndex)
// returns -1 if there is no more register
sint32 GetNextAvailableReg(sint32 startIndex)
sint32 GetNextAvailableReg(sint32 startIndex) const
{
if (startIndex >= 64)
return -1;
@ -81,6 +92,11 @@ public:
return regIndex;
}
sint32 CountAvailableRegs() const
{
return std::popcount(m_regBitmask);
}
private:
uint64 m_regBitmask{ 0 };
};

View file

@ -22,7 +22,6 @@ IMLName raLivenessRange::GetName() const
void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister)
{
cemu_assert_suspicious(); // not used yet
this->physicalRegister = physicalRegister;
}
@ -68,6 +67,58 @@ boost::container::small_vector<raLivenessRange*, 32> raLivenessRange::GetAllSubr
return subranges;
}
bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters)
{
if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment())
{
auto clusterRanges = GetAllSubrangesInCluster();
bool hasAnyRequirement = false;
for(auto& subrange : clusterRanges)
{
if(subrange->list_fixedRegRequirements.empty())
continue;
allowedRegisters = subrange->list_fixedRegRequirements.front().allowedReg;
hasAnyRequirement = true;
break;
}
if(!hasAnyRequirement)
return false;
for(auto& subrange : clusterRanges)
{
for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
allowedRegisters &= fixedRegLoc.allowedReg;
}
}
else
{
// local check only, slightly faster
if(list_fixedRegRequirements.empty())
return false;
allowedRegisters = list_fixedRegRequirements.front().allowedReg;
for(auto& fixedRegLoc : list_fixedRegRequirements)
allowedRegisters &= fixedRegLoc.allowedReg;
}
return true;
}
IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool)
{
IMLPhysRegisterSet fixedRegRequirements = regPool;
if(interval2.ExtendsPreviousSegment() || interval2.ExtendsIntoNextSegment())
{
auto clusterRanges = GetAllSubrangesInCluster();
for(auto& subrange : clusterRanges)
{
for(auto& fixedRegLoc : subrange->list_fixedRegRequirements)
fixedRegRequirements &= fixedRegLoc.allowedReg;
}
return fixedRegRequirements;
}
for(auto& fixedRegLoc : list_fixedRegRequirements)
fixedRegRequirements &= fixedRegLoc.allowedReg;
return fixedRegRequirements;
}
void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
{
IMLRegID regId = subrange->GetVirtualRegister();
@ -142,14 +193,19 @@ void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenes
MemoryPoolPermanentObjects<raLivenessRange> memPool_livenessSubrange(4096);
raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex)
// startPosition and endPosition are inclusive
raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition)
{
raLivenessRange* range = memPool_livenessSubrange.acquireObj();
range->previousRanges.clear();
range->list_locations.resize(0);
range->list_locations.clear();
range->list_fixedRegRequirements.clear();
range->imlSegment = imlSegment;
PPCRecompilerIml_setSegmentPoint(&range->start, imlSegment, startIndex);
PPCRecompilerIml_setSegmentPoint(&range->end, imlSegment, endIndex);
cemu_assert_debug(startPosition <= endPosition);
range->interval2.start = startPosition;
range->interval2.end = endPosition;
// register mapping
range->virtualRegister = virtualRegister;
range->name = name;
@ -160,6 +216,7 @@ raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, I
range->lastIterationIndex = 0;
range->subrangeBranchNotTaken = nullptr;
range->subrangeBranchTaken = nullptr;
cemu_assert_debug(range->previousRanges.empty());
range->_noLoad = false;
// add to segment linked lists
PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
@ -172,6 +229,22 @@ void _unlinkSubrange(raLivenessRange* subrange)
IMLSegment* imlSegment = subrange->imlSegment;
PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange);
PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange);
// unlink reverse references
if(subrange->subrangeBranchTaken)
subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange));
if(subrange->subrangeBranchNotTaken)
subrange->subrangeBranchNotTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange));
subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
// remove forward references
for(auto& prev : subrange->previousRanges)
{
if(prev->subrangeBranchTaken == subrange)
prev->subrangeBranchTaken = nullptr;
if(prev->subrangeBranchNotTaken == subrange)
prev->subrangeBranchNotTaken = nullptr;
}
subrange->previousRanges.clear();
}
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
@ -179,14 +252,9 @@ void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
_unlinkSubrange(subrange);
//subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
subrange->list_locations.clear();
// unlink reverse references
if(subrange->subrangeBranchTaken)
subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange));
if(subrange->subrangeBranchNotTaken)
subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange));
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
memPool_livenessSubrange.releaseObj(subrange);
}
@ -194,9 +262,18 @@ void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{
_unlinkSubrange(subrange);
PPCRecompilerIml_removeSegmentPoint(&subrange->start);
PPCRecompilerIml_removeSegmentPoint(&subrange->end);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
memPool_livenessSubrange.releaseObj(subrange);
// #ifdef CEMU_DEBUG_ASSERT
// // DEBUG BEGIN
// subrange->lastIterationIndex = 0xFFFFFFFE;
// subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
// subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
//
// // DEBUG END
// #endif
}
void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
@ -229,8 +306,8 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
PPCRecRA_debugValidateSubrange(absorbedSubrange);
if (subrange->imlSegment != absorbedSubrange->imlSegment)
assert_dbg();
if (subrange->end.index > absorbedSubrange->start.index)
assert_dbg();
cemu_assert_debug(subrange->interval2.end == absorbedSubrange->interval2.start);
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
assert_dbg();
if (subrange == absorbedSubrange)
@ -238,21 +315,45 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
#endif
// update references
if(absorbedSubrange->subrangeBranchTaken)
*std::find(absorbedSubrange->subrangeBranchTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
if(absorbedSubrange->subrangeBranchNotTaken)
*std::find(absorbedSubrange->subrangeBranchNotTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
absorbedSubrange->subrangeBranchTaken = nullptr;
absorbedSubrange->subrangeBranchNotTaken = nullptr;
if(subrange->subrangeBranchTaken)
*std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
if(subrange->subrangeBranchNotTaken)
*std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
// merge usage locations
// at the merge point both ranges might track the same instruction, we handle this by first merging this duplicate location
if(subrange && absorbedSubrange && !subrange->list_locations.empty() && !absorbedSubrange->list_locations.empty())
{
if(subrange->list_locations.back().index == absorbedSubrange->list_locations.front().index)
{
subrange->list_locations.back().isRead |= absorbedSubrange->list_locations.front().isRead;
subrange->list_locations.back().isWrite |= absorbedSubrange->list_locations.front().isWrite;
absorbedSubrange->list_locations.erase(absorbedSubrange->list_locations.begin()); // inefficient
}
}
for (auto& location : absorbedSubrange->list_locations)
{
cemu_assert_debug(subrange->list_locations.empty() || (subrange->list_locations.back().index < location.index)); // todo - sometimes a subrange can contain the same instruction at the merge point if they are covering half of the instruction edge
subrange->list_locations.push_back(location);
}
absorbedSubrange->list_locations.clear();
// merge fixed reg locations
#ifdef CEMU_DEBUG_ASSERT
if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
{
cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos);
}
#endif
for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
{
subrange->list_fixedRegRequirements.push_back(fixedReg);
}
subrange->end.index = absorbedSubrange->end.index;
subrange->interval2.end = absorbedSubrange->interval2.end;
PPCRecRA_debugValidateSubrange(subrange);
@ -262,16 +363,21 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
// remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges)
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange)
{
cemu_assert_debug(originRange->interval2.ExtendsPreviousSegment() || originRange->interval2.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments
auto clusterRanges = originRange->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges)
{
if (subrange->list_locations.empty())
continue;
raLivenessRange* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
// copy locations
for (auto& location : subrange->list_locations)
raInterval interval;
interval.SetInterval(subrange->list_locations.front().index, true, subrange->list_locations.back().index, true);
raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
// copy locations and fixed reg indices
newSubrange->list_locations = subrange->list_locations;
newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
if(originRange->HasPhysicalRegister())
{
newSubrange->list_locations.push_back(location);
cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
}
}
// remove subranges
@ -279,82 +385,223 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange
}
#ifdef CEMU_DEBUG_ASSERT
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange)
void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
{
// validate subrange
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken)
assert_dbg();
if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken)
if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken)
assert_dbg();
if(range->subrangeBranchTaken || range->subrangeBranchNotTaken)
{
cemu_assert_debug(range->interval2.end.ConnectsToNextSegment());
}
if(!range->previousRanges.empty())
{
cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment());
}
// validate locations
if (!range->list_locations.empty())
{
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx());
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx());
}
}
#else
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
#endif
// split subrange at the given index
// After the split there will be two ranges and subranges:
// since locations are per-instruction, but intervals are per-edge, it's possible that locations track reads/writes outside of the range
// this function will remove any outside read/write locations
void IMLRA_FixLocations(raLivenessRange* range)
{
if(range->list_locations.empty())
return;
if(range->interval2.start.IsInstructionIndex() && range->interval2.start.GetInstructionIndex() == range->list_locations.front().index)
{
auto& location = range->list_locations.front();
if(range->interval2.start.IsOnOutputEdge())
{
location.isRead = false;
if(!location.isRead && !location.isWrite)
range->list_locations.erase(range->list_locations.begin());
}
}
if(range->list_locations.empty())
return;
if(range->interval2.end.IsInstructionIndex() && range->interval2.end.GetInstructionIndex() == range->list_locations.back().index)
{
auto& location = range->list_locations.back();
if(range->interval2.end.IsOnInputEdge())
{
location.isWrite = false;
if(!location.isRead && !location.isWrite)
range->list_locations.pop_back();
}
}
}
// trim start and end of range to match first and last read/write locations
// does not trim start/endpoints which extend into the next/previous segment
void IMLRA_TrimRangeToUse(raLivenessRange* range)
{
if(range->list_locations.empty())
{
// special case where we trim ranges extending from other segments to a single instruction edge
cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex());
if(range->interval2.start.IsInstructionIndex())
range->interval2.start = range->interval2.end;
if(range->interval2.end.IsInstructionIndex())
range->interval2.end = range->interval2.start;
return;
}
raInterval prevInterval = range->interval2;
// trim start
if(range->interval2.start.IsInstructionIndex())
{
bool isInputEdge = range->list_locations.front().isRead;
range->interval2.start.Set(range->list_locations.front().index, isInputEdge);
}
// trim end
if(range->interval2.end.IsInstructionIndex())
{
bool isOutputEdge = range->list_locations.back().isWrite;
range->interval2.end.Set(range->list_locations.back().index, !isOutputEdge);
}
// extra checks
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(range->interval2.start <= range->interval2.end);
for(auto& loc : range->list_locations)
{
cemu_assert_debug(range->interval2.ContainsInstructionIndex(loc.index));
}
cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2));
#endif
}
// split range at the given position
// After the split there will be two ranges:
// head -> subrange is shortened to end at splitIndex (exclusive)
// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange
// if head has a physical register assigned it will not carry over to tail
// The return value is the tail subrange
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole)
// The return value is the tail range
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within them
// the range after the split point does not inherit the physical register
// if trimToHole is true and any of the halfes is empty, it will be deleted
raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole)
{
// validation
#ifdef CEMU_DEBUG_ASSERT
//if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
// assert_dbg();
if (subrange->start.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START)
assert_dbg();
if (subrange->start.index >= splitIndex)
assert_dbg();
if (subrange->end.index <= splitIndex)
assert_dbg();
#endif
cemu_assert_debug(splitPosition.IsInstructionIndex());
cemu_assert_debug(!subrange->interval2.IsNextSegmentOnly() && !subrange->interval2.IsPreviousSegmentOnly());
cemu_assert_debug(subrange->interval2.ContainsEdge(splitPosition));
// determine new intervals
raInterval headInterval, tailInterval;
headInterval.SetInterval(subrange->interval2.start, splitPosition-1);
tailInterval.SetInterval(splitPosition, subrange->interval2.end);
cemu_assert_debug(headInterval.start <= headInterval.end);
cemu_assert_debug(tailInterval.start <= tailInterval.end);
// create tail
raLivenessRange* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), splitIndex, subrange->end.index);
// copy locations
raLivenessRange* tailSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end);
tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister());
// carry over branch targets and update reverse references
tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken;
tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken;
subrange->subrangeBranchTaken = nullptr;
subrange->subrangeBranchNotTaken = nullptr;
if(tailSubrange->subrangeBranchTaken)
*std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange;
if(tailSubrange->subrangeBranchNotTaken)
*std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
// we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
#ifdef CEMU_DEBUG_ASSERT
if(!subrange->list_locations.empty())
{
sint32 curIdx = -1;
for(auto& location : subrange->list_locations)
{
cemu_assert_debug(curIdx < location.index);
curIdx = location.index;
}
}
#endif
// split locations
// since there are 2 edges per instruction and locations track both via a single index, locations on the split point might need to be copied into both ranges
for (auto& location : subrange->list_locations)
{
if (location.index >= splitIndex)
if(tailInterval.ContainsInstructionIndex(location.index))
tailSubrange->list_locations.push_back(location);
}
// remove tail locations from head
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
{
raLivenessLocation_t* location = subrange->list_locations.data() + i;
if (location->index >= splitIndex)
if (!headInterval.ContainsInstructionIndex(location->index))
{
subrange->list_locations.resize(i);
break;
}
}
// adjust start/end
if (trimToHole)
// split fixed reg requirements
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
{
if (subrange->list_locations.empty())
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
if (tailInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
{
subrange->end.index = subrange->start.index+1;
tailSubrange->list_fixedRegRequirements.push_back(*fixedReg);
}
}
// remove tail fixed reg requirements from head
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
{
raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i;
if (!headInterval.ContainsInstructionIndex(fixedReg->pos.GetInstructionIndex()))
{
subrange->list_fixedRegRequirements.resize(i);
break;
}
}
// adjust intervals
subrange->interval2 = headInterval;
tailSubrange->interval2 = tailInterval;
// fix locations to only include read/write edges within the range
if(subrange)
IMLRA_FixLocations(subrange);
if(tailSubrange)
IMLRA_FixLocations(tailSubrange);
// trim to hole
if(trimToHole)
{
if(subrange->list_locations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex()))
{
PPCRecRA_deleteSubrange(ppcImlGenContext, subrange);
subrange = nullptr;
}
else
{
subrange->end.index = subrange->list_locations.back().index + 1;
IMLRA_TrimRangeToUse(subrange);
}
if (tailSubrange->list_locations.empty())
if(tailSubrange->list_locations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex()))
{
assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all)
PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange);
tailSubrange = nullptr;
}
else
{
tailSubrange->start.index = tailSubrange->list_locations.front().index;
IMLRA_TrimRangeToUse(tailSubrange);
}
}
else
{
// set head range to end at split index
subrange->end.index = splitIndex;
}
// validation
cemu_assert_debug(!subrange || subrange->interval2.start <= subrange->interval2.end);
cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start <= tailSubrange->interval2.end);
cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start >= splitPosition);
if (!trimToHole)
cemu_assert_debug(!tailSubrange || tailSubrange->interval2.start == splitPosition);
if(subrange)
PPCRecRA_debugValidateSubrange(subrange);
if(tailSubrange)
PPCRecRA_debugValidateSubrange(tailSubrange);
return tailSubrange;
}
@ -401,13 +648,13 @@ sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
for (auto& subrange : ranges)
{
if (subrange->start.index != RA_INTER_RANGE_START)
if (!subrange->interval2.ExtendsPreviousSegment())
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
readCount++;
}
if (subrange->end.index != RA_INTER_RANGE_END)
if (!subrange->interval2.ExtendsIntoNextSegment())
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
@ -433,13 +680,14 @@ sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange)
return cost;
}
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex)
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
{
// validation
#ifdef CEMU_DEBUG_ASSERT
if (subrange->end.index == RA_INTER_RANGE_END)
if (subrange->interval2.ExtendsIntoNextSegment())
assert_dbg();
#endif
cemu_assert_debug(splitPosition.IsInstructionIndex());
sint32 cost = 0;
// find split position in location list
@ -448,25 +696,15 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange,
assert_dbg(); // should not happen?
return 0;
}
if (splitIndex <= subrange->list_locations.front().index)
sint32 splitInstructionIndex = splitPosition.GetInstructionIndex();
if (splitInstructionIndex <= subrange->list_locations.front().index)
return 0;
if (splitIndex > subrange->list_locations.back().index)
if (splitInstructionIndex > subrange->list_locations.back().index)
return 0;
// todo - determine exact cost of split subranges
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
//for (sint32 f = 0; f < subrange->list_locations.size(); f++)
//{
// raLivenessLocation_t* location = subrange->list_locations.data() + f;
// if (location->index >= splitIndex)
// {
// ...
// return cost;
// }
//}
return cost;
}

View file

@ -1,4 +1,5 @@
#pragma once
#include "IMLRegisterAllocator.h"
struct raLivenessLocation_t
{
@ -18,11 +19,286 @@ struct raLivenessSubrangeLink
struct raLivenessRange* next;
};
struct raInstructionEdge
{
friend struct raInterval;
public:
raInstructionEdge()
{
index = 0;
}
raInstructionEdge(sint32 instructionIndex, bool isInputEdge)
{
Set(instructionIndex, isInputEdge);
}
void Set(sint32 instructionIndex, bool isInputEdge)
{
if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END)
{
index = instructionIndex;
return;
}
index = instructionIndex * 2 + (isInputEdge ? 0 : 1);
cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane
}
void SetRaw(sint32 index)
{
this->index = index;
cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane
}
// sint32 GetRaw()
// {
// this->index = index;
// }
std::string GetDebugString()
{
if(index == RA_INTER_RANGE_START)
return "RA_START";
else if(index == RA_INTER_RANGE_END)
return "RA_END";
std::string str = fmt::format("{}", GetInstructionIndex());
if(IsOnInputEdge())
str += "i";
else if(IsOnOutputEdge())
str += "o";
return str;
}
sint32 GetInstructionIndex() const
{
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
return index >> 1;
}
// returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END
sint32 GetInstructionIndexEx() const
{
if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END)
return index;
return index >> 1;
}
sint32 GetRaw() const
{
return index;
}
bool IsOnInputEdge() const
{
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
return (index&1) == 0;
}
bool IsOnOutputEdge() const
{
cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END);
return (index&1) != 0;
}
bool ConnectsToPreviousSegment() const
{
return index == RA_INTER_RANGE_START;
}
bool ConnectsToNextSegment() const
{
return index == RA_INTER_RANGE_END;
}
bool IsInstructionIndex() const
{
return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END;
}
// comparison operators
bool operator>(const raInstructionEdge& other) const
{
return index > other.index;
}
bool operator<(const raInstructionEdge& other) const
{
return index < other.index;
}
bool operator<=(const raInstructionEdge& other) const
{
return index <= other.index;
}
bool operator>=(const raInstructionEdge& other) const
{
return index >= other.index;
}
bool operator==(const raInstructionEdge& other) const
{
return index == other.index;
}
raInstructionEdge operator+(sint32 offset) const
{
cemu_assert_debug(IsInstructionIndex());
cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
raInstructionEdge edge;
edge.index = index + offset;
return edge;
}
raInstructionEdge operator-(sint32 offset) const
{
cemu_assert_debug(IsInstructionIndex());
cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END);
raInstructionEdge edge;
edge.index = index - offset;
return edge;
}
raInstructionEdge& operator++()
{
cemu_assert_debug(IsInstructionIndex());
index++;
return *this;
}
private:
sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2
};
struct raInterval
{
raInterval()
{
}
raInterval(raInstructionEdge start, raInstructionEdge end)
{
SetInterval(start, end);
}
// isStartOnInput = Input+Output edge on first instruction. If false then only output
// isEndOnOutput = Input+Output edge on last instruction. If false then only input
void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput)
{
this->start.Set(start, isStartOnInput);
this->end.Set(end, !isEndOnOutput);
}
void SetInterval(raInstructionEdge start, raInstructionEdge end)
{
cemu_assert_debug(start <= end);
this->start = start;
this->end = end;
}
void SetStart(const raInstructionEdge& edge)
{
start = edge;
}
void SetEnd(const raInstructionEdge& edge)
{
end = edge;
}
sint32 GetStartIndex() const
{
return start.GetInstructionIndex();
}
sint32 GetEndIndex() const
{
return end.GetInstructionIndex();
}
bool ExtendsPreviousSegment() const
{
return start.ConnectsToPreviousSegment();
}
bool ExtendsIntoNextSegment() const
{
return end.ConnectsToNextSegment();
}
bool IsNextSegmentOnly() const
{
return start.ConnectsToNextSegment() && end.ConnectsToNextSegment();
}
bool IsPreviousSegmentOnly() const
{
return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment();
}
// returns true if range is contained within a single segment
bool IsLocal() const
{
return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END;
}
bool ContainsInstructionIndex(sint32 instructionIndex) const
{
cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END);
return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
}
// similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input
bool ContainsInstructionIndexEx(sint32 instructionIndex) const
{
if(instructionIndex == RA_INTER_RANGE_START)
return start.ConnectsToPreviousSegment();
if(instructionIndex == RA_INTER_RANGE_END)
return end.ConnectsToNextSegment();
return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx();
}
bool ContainsEdge(const raInstructionEdge& edge) const
{
return edge >= start && edge <= end;
}
bool ContainsWholeInterval(const raInterval& other) const
{
return other.start >= start && other.end <= end;
}
bool IsOverlapping(const raInterval& other) const
{
return start <= other.end && end >= other.start;
}
sint32 GetPreciseDistance()
{
cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this?
if(start == end)
return 1;
cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment());
if(start.ConnectsToPreviousSegment())
return end.GetRaw() + 1;
return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive
}
//private: not making these directly accessible only forces us to create loads of verbose getters and setters
raInstructionEdge start;
raInstructionEdge end;
};
struct raFixedRegRequirement
{
raInstructionEdge pos;
IMLPhysRegisterSet allowedReg;
};
struct raLivenessRange
{
IMLSegment* imlSegment;
IMLSegmentPoint start;
IMLSegmentPoint end;
raInterval interval2;
// dirty state tracking
bool _noLoad;
bool hasStore;
@ -34,28 +310,34 @@ struct raLivenessRange
boost::container::small_vector<raLivenessRange*, 4> previousRanges;
// processing
uint32 lastIterationIndex;
// instruction locations
// instruction read/write locations
std::vector<raLivenessLocation_t> list_locations;
// ordered list of all raInstructionEdge indices which require a fixed register
std::vector<raFixedRegRequirement> list_fixedRegRequirements;
// linked list (subranges with same GPR virtual register)
raLivenessSubrangeLink link_sameVirtualRegister;
// linked list (all subranges for this segment)
raLivenessSubrangeLink link_allSegmentRanges;
// register mapping (constant)
// register info
IMLRegID virtualRegister;
IMLName name;
// register allocator result
sint32 physicalRegister;
boost::container::small_vector<raLivenessRange*, 32> GetAllSubrangesInCluster();
bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined
IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out
IMLRegID GetVirtualRegister() const;
sint32 GetPhysicalRegister() const;
bool HasPhysicalRegister() const { return physicalRegister >= 0; }
IMLName GetName() const;
void SetPhysicalRegister(sint32 physicalRegister);
void SetPhysicalRegisterForCluster(sint32 physicalRegister);
void UnsetPhysicalRegister() { physicalRegister = -1; }
};
raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex);
raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition);
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange);
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
@ -63,7 +345,7 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange);
raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole = false);
raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false);
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite);
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
@ -71,8 +353,5 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
// cost estimation
sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment);
sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange);
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
// special values to mark the index of ranges that reach across the segment border
#define RA_INTER_RANGE_START (-1)
#define RA_INTER_RANGE_END (0x70000000)
//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);

View file

@ -3,12 +3,121 @@
#include <boost/container/small_vector.hpp>
// special values to mark the index of ranges that reach across the segment border
#define RA_INTER_RANGE_START (-1)
#define RA_INTER_RANGE_END (0x70000000)
struct IMLSegmentPoint
{
friend struct IMLSegmentInterval;
sint32 index;
struct IMLSegment* imlSegment;
struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of
IMLSegmentPoint* next;
IMLSegmentPoint* prev;
// the index is the instruction index times two.
// this gives us the ability to cover half an instruction with RA ranges
// covering only the first half of an instruction (0-0) means that the register is read, but not preserved
// covering first and the second half means the register is read and preserved
// covering only the second half means the register is written but not read
sint32 GetInstructionIndex() const
{
return index;
}
void SetInstructionIndex(sint32 index)
{
this->index = index;
}
void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount)
{
if (!IsPreviousSegment() && !IsNextSegment())
{
if (GetInstructionIndex() >= instructionIndex)
index += shiftCount;
}
}
void DecrementByOneInstruction()
{
index--;
}
// the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment
bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; }
bool IsNextSegment() const { return index == RA_INTER_RANGE_END; }
// overload operand > and <
bool operator>(const IMLSegmentPoint& other) const { return index > other.index; }
bool operator<(const IMLSegmentPoint& other) const { return index < other.index; }
bool operator==(const IMLSegmentPoint& other) const { return index == other.index; }
bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; }
// overload comparison operands for sint32
bool operator>(const sint32 other) const { return index > other; }
bool operator<(const sint32 other) const { return index < other; }
bool operator<=(const sint32 other) const { return index <= other; }
bool operator>=(const sint32 other) const { return index >= other; }
};
struct IMLSegmentInterval
{
IMLSegmentPoint start;
IMLSegmentPoint end;
bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; }
bool IsRangeOverlapping(const IMLSegmentInterval& other)
{
// todo - compare the raw index
sint32 r1start = this->start.GetInstructionIndex();
sint32 r1end = this->end.GetInstructionIndex();
sint32 r2start = other.start.GetInstructionIndex();
sint32 r2end = other.end.GetInstructionIndex();
if (r1start < r2end && r1end > r2start)
return true;
if (this->start.IsPreviousSegment() && r1start == r2start)
return true;
if (this->end.IsNextSegment() && r1end == r2end)
return true;
return false;
}
bool ExtendsIntoPreviousSegment() const
{
return start.IsPreviousSegment();
}
bool ExtendsIntoNextSegment() const
{
return end.IsNextSegment();
}
bool IsNextSegmentOnly() const
{
if(!start.IsNextSegment())
return false;
cemu_assert_debug(end.IsNextSegment());
return true;
}
bool IsPreviousSegmentOnly() const
{
if (!end.IsPreviousSegment())
return false;
cemu_assert_debug(start.IsPreviousSegment());
return true;
}
sint32 GetDistance() const
{
// todo - assert if either start or end is outside the segment
// we may also want to switch this to raw indices?
return end.GetInstructionIndex() - start.GetInstructionIndex();
}
};
struct PPCSegmentRegisterAllocatorInfo_t

View file

@ -18,6 +18,8 @@
#include "BackendX64/BackendX64.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock
struct PPCInvalidationRange
{
MPTR startAddress;
@ -41,11 +43,36 @@ void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
PPCRecompilerInstanceData_t* ppcRecompilerInstanceData;
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
static std::mutex s_singleRecompilationMutex;
#endif
bool ppcRecompilerEnabled = false;
void PPCRecompiler_recompileAtAddress(uint32 address);
// this function does never block and can fail if the recompiler lock cannot be acquired immediately
void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress)
{
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
return;
PPCRecompilerState.recompilerSpinlock.lock();
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
{
PPCRecompilerState.recompilerSpinlock.unlock();
return;
}
ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited;
PPCRecompilerState.recompilerSpinlock.unlock();
s_singleRecompilationMutex.lock();
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] == PPCRecompiler_leaveRecompilerCode_visited)
{
PPCRecompiler_recompileAtAddress(enterAddress);
}
s_singleRecompilationMutex.unlock();
return;
#endif
// quick read-only check without lock
if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited)
return;
@ -154,6 +181,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
}
}
// if(range.startAddress < 0x0202fa3C || range.startAddress > 0x0202FA7C)
// return nullptr; // DEBUG
PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t();
ppcRecFunc->ppcAddress = range.startAddress;
ppcRecFunc->ppcSize = range.length;
@ -182,6 +212,85 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
}
// DEBUG BEGIN
// if(ppcRecFunc->ppcAddress != 0x2BDA9F4) // TP
// {
// delete ppcRecFunc;
// return nullptr;
// }
// if(ppcRecFunc->ppcAddress < 0x2BDA9F4) // TP
// {
// delete ppcRecFunc;
// return nullptr;
// }
// this prevents the crashing
// if((ppcRecFunc->ppcAddress >= 0x02ade400 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash
//if((ppcRecFunc->ppcAddress >= 0x02ade500 && ppcRecFunc->ppcAddress < 0x02ade600)) -> no crash
// if((ppcRecFunc->ppcAddress >= 0x02ade580 && ppcRecFunc->ppcAddress < 0x02ade600)) // -> crashed around 0x0x2b874b0 (but rare? Out of 5 runs it only crashed once)
// {
// delete ppcRecFunc;
// return nullptr;
// }
// the problem with Shovel Knight is that the crash seems to be pretty instable, at least when trying to narrow it down. Lets look for another game for now
// check TP bug...
// if(ppcRecFunc->ppcAddress >= 0x03000000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02800000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02C00000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02A00000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B00000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B80000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B40000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B60000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B70000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B68000) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B64000) -> no bug (I went into wrong direction)
// if(ppcRecFunc->ppcAddress >= 0x02B6C000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B6A000) -> has bug (double checked, it has bug)
// if(ppcRecFunc->ppcAddress >= 0x02B6B000) -> has bug (I went into the wrong direction again? Or does A000 have no bug??
// if(ppcRecFunc->ppcAddress >= 0x02B69000) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B68800) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B68400) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B68600) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B68500) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B68580) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B685C0) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B685A0) -> has bug
// if(ppcRecFunc->ppcAddress >= 0x02B68590) -> no bug
// if(ppcRecFunc->ppcAddress >= 0x02B68598) -> has bug
// if(ppcRecFunc->ppcAddress != 0x02B68594) -> seems fine. No bug (against the expectation)
// if(ppcRecFunc->ppcAddress == 0x02B68594) -> Still has the bug
// if(ppcRecFunc->ppcAddress == 0x02B68594)
// {
// delete ppcRecFunc;
// return nullptr;
// }
// if(ppcRecFunc->ppcAddress >= 0x2B7A8D4 && ppcRecFunc->ppcAddress < 0x02B7AC9C && ppcRecFunc->ppcAddress != 0x2B7A8D4)
// {
// delete ppcRecFunc;
// return nullptr;
// }
// doing both of these means no bug!
// excluding just ppcAddress == 0x2B7A8D4 is enough to trigger the bug again. So it definitely that function
// next: Debug it!
// In Pikmin 3 030a9998 is broken?
// if(!(ppcRecFunc->ppcAddress >= 0x030a9998 && ppcRecFunc->ppcAddress < 0x030AA208))
// {
// delete ppcRecFunc;
// return nullptr;
// }
// else
// {
// delete ppcRecFunc;
// return nullptr;
// }
// DEBUG END
// apply passes
if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext))
@ -190,13 +299,58 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
//if (ppcRecFunc->ppcAddress == 0x30DF5F8)
//{
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
//}
// TP
// if (ppcRecFunc->ppcAddress == 0x2B7A8D4)
// {
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// //__debugbreak();
// }
// // Bad Function in SM3DW
// if (ppcRecFunc->ppcAddress == 0x023D5768)
// {
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// }
// if (ppcRecFunc->ppcAddress >= 0x023D5768 && ppcRecFunc->ppcAddress < 0x023D58DC)
// {
// delete ppcRecFunc;
// return nullptr;
// }
//
//
// // 0x02846c74
// if (ppcRecFunc->ppcAddress == 0x02846c74)
// {
// debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// }
// Shovel Knight
// if (ppcRecFunc->ppcAddress >= 0x02A1E630 && ppcRecFunc->ppcAddress < 0x02A1E9D8)
// {
// // debug_printf("----------------------------------------\n");
// // IMLDebug_Dump(&ppcImlGenContext);
// // __debugbreak();
// delete ppcRecFunc;
// return nullptr;
// }
//
// //
// if (ppcRecFunc->ppcAddress == 0x02ade5c4 || ppcRecFunc->ppcAddress == 0x02ade5c8)
// {
// // debug_printf("----------------------------------------\n");
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// }
// else
// {
// delete ppcRecFunc;
// return nullptr;
// }
//if (ppcRecFunc->ppcAddress == 0x11223344)
//{
@ -210,14 +364,26 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
// return nullptr;
//}
//if (ppcRecFunc->ppcAddress == 0x03C26844)
//{
// __debugbreak();
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
//}
// if (ppcRecFunc->ppcAddress >= 0x2BDA9F4 && ppcRecFunc->ppcAddress < 0x02BDAB38)
// {
// return nullptr;
// //IMLDebug_Dump(&ppcImlGenContext);
// //__debugbreak();
// }
// if (ppcRecFunc->ppcAddress == 0x2BDA9F4)
// {
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// }
// 31A8778
// if(ppcRecFunc->ppcAddress >= 0x2759E20 && ppcRecFunc->ppcAddress < 0x0275A0CC)
// {
// delete ppcRecFunc;
// return nullptr;
// }
// Functions for testing (botw):
// 3B4049C (large with switch case)
// 30BF118 (has a bndz copy loop + some float instructions at the end)
@ -231,6 +397,14 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
return nullptr;
}
if (ppcRecFunc->ppcAddress == 0x2B7A8D4)
{
// write code to binary file
FILE* f = fopen("ppcRecFunc_2B7A8D4.bin", "wb");
fwrite(ppcRecFunc->x86Code, 1, ppcRecFunc->x86Size, f);
fclose(f);
}
// collect list of PPC-->x64 entry points
entryPointsOut.clear();
for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2)
@ -255,7 +429,7 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
codeHash += ((uint8*)ppcRecFunc->x86Code)[i];
}
//cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash);
cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash);
return ppcRecFunc;
}
@ -323,11 +497,14 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext)
//PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext);
//PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext);
// if(ppcImlGenContext.debug_entryPPCAddress == 0x0200E1E8)
// {
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// }
// if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8 && ppcImlGenContext.debug_entryPPCAddress < 0x0240C0AC)
// {
// IMLDebug_Dump(&ppcImlGenContext);
// __debugbreak();
// }
// else if(ppcImlGenContext.debug_entryPPCAddress >= 0x0240B7F8)
// return false;
return true;
}
@ -438,6 +615,10 @@ std::atomic_bool s_recompilerThreadStopSignal{false};
void PPCRecompiler_thread()
{
SetThreadName("PPCRecompiler");
#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION
return;
#endif
while (true)
{
if(s_recompilerThreadStopSignal)

View file

@ -1746,7 +1746,7 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext
void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index)
{
segmentPoint->imlSegment = imlSegment;
segmentPoint->index = index;
segmentPoint->SetInstructionIndex(index);
if (imlSegment->segmentPointList)
imlSegment->segmentPointList->prev = segmentPoint;
segmentPoint->prev = nullptr;
@ -1766,7 +1766,7 @@ void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint)
/*
* Insert multiple no-op instructions
* Warning: Can invalidate any previous instruction structs from the same segment
* Warning: Can invalidate any previous instruction pointers from the same segment
*/
void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount)
{
@ -1788,12 +1788,7 @@ void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index,
IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList;
while (segmentPoint)
{
if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END)
{
if (segmentPoint->index >= index)
segmentPoint->index += shiftBackCount;
}
// next
segmentPoint->ShiftIfAfter(index, shiftBackCount);
segmentPoint = segmentPoint->next;
}
}
@ -2864,6 +2859,76 @@ bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBloc
{
uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base);
ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction;
// DEBUG BEGIN
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4+0x10) -> stops bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0) -> has bug (optional code path)
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA50) -> stops bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC34) -> stops bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78) -> has bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70) -> has bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC88) -> has bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C) -> has bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38) -> no bug
// weirdly, excluding 0x02B7AC38 fixes the issue. Excluding both 0x02B7AC3C and 0x2B7AC88 (the follow up instructions) does not fix the bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABE4) -> has bug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AAD0) -> fixes bug
// maybe try to place as many leave instructions as possible while keeping the bug alive
// eventually we should end up with a relatively small IR footprint that is easier to analyze
// 0x023d5818
// SM3DW debug
// if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x23D58A8)
// {
// ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID);
// }
#if 0 // TP
if(ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC78 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AC70 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A9C0 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC3C || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABE4 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABC0 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7ABA8 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB90 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AB04 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7abc4 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9B0 || // verified
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa10 -> fixes bug (this is after a bl)
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AA3C || // verified
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7AA44 -> fixes bug (this is on the main path, the one before, 0x02B7AA3C, does not break)
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AADC || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7ABC4 || // verified
ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7ac88 || // verified
// ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aad0 || -> fixes it
// ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa30 || -> fixes it (mostly. There was a small glitch on eponas tail?)
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02b7aa24 || -> this fixes it
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A918 || -> this fixes it
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7A9A0 || -> this fixes it
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x02B7AC38 || -> this fixes it
//ppcImlGenContext.ppcAddressOfCurrentInstruction == 0x2B7A8D4 || -> this fixes it
(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AC44 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7AC84) || // verified
(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AADC && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x2B7ABC0) || // verified
(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7A9B0 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA0C) ||
(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x02B7AAE4 && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02b7ac20) // verified
// disabling IMLOptimizerX86_SubstituteCJumpForEflagsJump fixes it...
//(ppcImlGenContext.ppcAddressOfCurrentInstruction >= 0x2B7AA1C && ppcImlGenContext.ppcAddressOfCurrentInstruction <= 0x02B7AA40) -> fixes it
)
{
ppcImlGenContext.emitInst().make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext.ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID);
// this doesnt work any longer because the basic blocks are determined before the recompiler is called
basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchTaken(nullptr);
basicBlockInfo.GetSegmentForInstructionAppend()->SetLinkBranchNotTaken(nullptr);
break; // but we should be able to just exit the block early?
}
#endif
if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext))
{
debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction);