PPCRec: Refactor read/write access tracking for liveness ranges

This commit is contained in:
Exzap 2024-10-26 12:33:23 +02:00
parent 547cf501d0
commit 8270308ccc
4 changed files with 144 additions and 265 deletions

View file

@ -408,6 +408,27 @@ struct IMLUsedRegisters
F(readGPR3); F(readGPR3);
} }
// temporary (for FPRs)
template<typename Fn>
void ForEachWrittenFPR(Fn F) const
{
if (writtenFPR1.IsValid())
F(writtenFPR1);
}
template<typename Fn>
void ForEachReadFPR(Fn F) const
{
if (readFPR1.IsValid())
F(readFPR1);
if (readFPR2.IsValid())
F(readFPR2);
if (readFPR3.IsValid())
F(readFPR3);
if (readFPR4.IsValid())
F(readFPR4);
}
template<typename Fn> template<typename Fn>
void ForEachAccessedGPR(Fn F) const void ForEachAccessedGPR(Fn F) const
{ {

View file

@ -168,7 +168,8 @@ static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRe
{ {
IMLPhysRegisterSet ps; IMLPhysRegisterSet ps;
ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX); ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX);
fixedRegs.listInput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX
fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX
} }
else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM)
{ {
@ -262,30 +263,14 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml
sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition) sint32 PPCRecRA_countDistanceUntilNextUse2(raLivenessRange* subrange, raInstructionEdge startPosition)
{ {
sint32 startInstructionIndex; for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++)
if (startPosition.ConnectsToPreviousSegment())
startInstructionIndex = 0;
else
startInstructionIndex = startPosition.GetInstructionIndex();
for (sint32 i = 0; i < subrange->list_locations.size(); i++)
{ {
if (subrange->list_locations[i].index >= startInstructionIndex) if (subrange->list_accessLocations[i].pos >= startPosition)
{ {
sint32 preciseIndex = subrange->list_locations[i].index * 2; auto& it = subrange->list_accessLocations[i];
cemu_assert_debug(subrange->list_locations[i].isRead || subrange->list_locations[i].isWrite); // locations must have any access cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write
// check read edge cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment());
if (subrange->list_locations[i].isRead) return it.pos.GetRaw() - startPosition.GetRaw();
{
if (preciseIndex >= startPosition.GetRaw())
return preciseIndex - startPosition.GetRaw();
}
// check write edge
if (subrange->list_locations[i].isWrite)
{
preciseIndex++;
if (preciseIndex >= startPosition.GetRaw())
return preciseIndex - startPosition.GetRaw();
}
} }
} }
cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000); cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000);
@ -549,9 +534,7 @@ struct raFixedRegRequirementWithVGPR
std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment) std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment)
{ {
std::vector<raFixedRegRequirementWithVGPR> frrList; std::vector<raFixedRegRequirementWithVGPR> frrList;
size_t index = 0; size_t index = 0;
IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size()) while (index < imlSegment->imlList.size())
{ {
IMLFixedRegisters fixedRegs; IMLFixedRegisters fixedRegs;
@ -560,7 +543,7 @@ std::vector<raFixedRegRequirementWithVGPR> IMLRA_BuildSegmentInstructionFixedReg
pos.Set(index, true); pos.Set(index, true);
for (auto& fixedRegAccess : fixedRegs.listInput) for (auto& fixedRegAccess : fixedRegs.listInput)
{ {
frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.GetRegID()); frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID);
} }
pos = pos + 1; pos = pos + 1;
for (auto& fixedRegAccess : fixedRegs.listOutput) for (auto& fixedRegAccess : fixedRegs.listOutput)
@ -1468,6 +1451,19 @@ raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx
return subrange; return subrange;
} }
void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos)
{
if (subrange->list_accessLocations.empty())
{
subrange->list_accessLocations.emplace_back(pos);
return;
}
if(subrange->list_accessLocations.back().pos == pos)
return;
cemu_assert_debug(subrange->list_accessLocations.back().pos < pos);
subrange->list_accessLocations.emplace_back(pos);
}
// take abstract range data and create LivenessRanges // take abstract range data and create LivenessRanges
void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment)
{ {
@ -1500,12 +1496,27 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
while (index < imlSegment->imlList.size()) while (index < imlSegment->imlList.size())
{ {
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { raInstructionEdge pos((sint32)index, true);
gprTracking.ForEachReadGPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID(); IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second; raLivenessRange* subrange = regToSubrange.find(gprId)->second;
PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
cemu_assert_debug(!subrange->interval2.start.IsInstructionIndex() || subrange->interval2.start.GetInstructionIndex() <= index); });
cemu_assert_debug(!subrange->interval2.end.IsInstructionIndex() || subrange->interval2.end.GetInstructionIndex() >= index); gprTracking.ForEachReadFPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
});
pos = {(sint32)index, false};
gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
});
gprTracking.ForEachWrittenFPR([&](IMLReg gprReg) {
IMLRegID gprId = gprReg.GetRegID();
raLivenessRange* subrange = regToSubrange.find(gprId)->second;
IMLRA_UpdateOrAddSubrangeLocation(subrange, pos);
}); });
// check fixed register requirements // check fixed register requirements
IMLFixedRegisters fixedRegs; IMLFixedRegisters fixedRegs;
@ -1754,13 +1765,13 @@ void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange)
bool isRead = false; bool isRead = false;
bool isWritten = false; bool isWritten = false;
bool isOverwritten = false; bool isOverwritten = false;
for (auto& location : subrange->list_locations) for (auto& location : subrange->list_accessLocations)
{ {
if (location.isRead) if (location.IsRead())
{ {
isRead = true; isRead = true;
} }
if (location.isWrite) if (location.IsWrite())
{ {
if (isRead == false) if (isRead == false)
isOverwritten = true; isOverwritten = true;

View file

@ -207,7 +207,7 @@ raLivenessRange* PPCRecRA_createSubrange2(ppcImlGenContext_t* ppcImlGenContext,
{ {
raLivenessRange* range = memPool_livenessSubrange.acquireObj(); raLivenessRange* range = memPool_livenessSubrange.acquireObj();
range->previousRanges.clear(); range->previousRanges.clear();
range->list_locations.clear(); range->list_accessLocations.clear();
range->list_fixedRegRequirements.clear(); range->list_fixedRegRequirements.clear();
range->imlSegment = imlSegment; range->imlSegment = imlSegment;
@ -259,39 +259,16 @@ void _unlinkSubrange(raLivenessRange* subrange)
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{ {
_unlinkSubrange(subrange); _unlinkSubrange(subrange);
//subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); subrange->list_accessLocations.clear();
subrange->list_locations.clear(); subrange->list_fixedRegRequirements.clear();
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
memPool_livenessSubrange.releaseObj(subrange); memPool_livenessSubrange.releaseObj(subrange);
} }
// leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore
void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{
_unlinkSubrange(subrange);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.start);
//PPCRecompilerIml_removeSegmentPoint(&subrange->interval.end);
memPool_livenessSubrange.releaseObj(subrange);
// #ifdef CEMU_DEBUG_ASSERT
// // DEBUG BEGIN
// subrange->lastIterationIndex = 0xFFFFFFFE;
// subrange->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1;
// subrange->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1;
//
// // DEBUG END
// #endif
}
void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange) void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{ {
auto clusterRanges = subrange->GetAllSubrangesInCluster(); auto clusterRanges = subrange->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges) for (auto& subrange : clusterRanges)
{ PPCRecRA_deleteSubrange(ppcImlGenContext, subrange);
_PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange);
}
} }
void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
@ -300,9 +277,7 @@ void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
{ {
raLivenessRange* cur; raLivenessRange* cur;
while(cur = seg->raInfo.linkedList_allSubranges) while(cur = seg->raInfo.linkedList_allSubranges)
{ PPCRecRA_deleteSubrange(ppcImlGenContext, cur);
_PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur);
}
seg->raInfo.linkedList_allSubranges = nullptr; seg->raInfo.linkedList_allSubranges = nullptr;
seg->raInfo.linkedList_perVirtualRegister.clear(); seg->raInfo.linkedList_perVirtualRegister.clear();
} }
@ -322,7 +297,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
if (subrange == absorbedSubrange) if (subrange == absorbedSubrange)
assert_dbg(); assert_dbg();
#endif #endif
// update references // update references
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
@ -334,22 +308,9 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
*std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
// merge usage locations // merge usage locations
// at the merge point both ranges might track the same instruction, we handle this by first merging this duplicate location for (auto& accessLoc : absorbedSubrange->list_accessLocations)
if(subrange && absorbedSubrange && !subrange->list_locations.empty() && !absorbedSubrange->list_locations.empty()) subrange->list_accessLocations.push_back(accessLoc);
{ absorbedSubrange->list_accessLocations.clear();
if(subrange->list_locations.back().index == absorbedSubrange->list_locations.front().index)
{
subrange->list_locations.back().isRead |= absorbedSubrange->list_locations.front().isRead;
subrange->list_locations.back().isWrite |= absorbedSubrange->list_locations.front().isWrite;
absorbedSubrange->list_locations.erase(absorbedSubrange->list_locations.begin()); // inefficient
}
}
for (auto& location : absorbedSubrange->list_locations)
{
cemu_assert_debug(subrange->list_locations.empty() || (subrange->list_locations.back().index < location.index)); // todo - sometimes a subrange can contain the same instruction at the merge point if they are covering half of the instruction edge
subrange->list_locations.push_back(location);
}
absorbedSubrange->list_locations.clear();
// merge fixed reg locations // merge fixed reg locations
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty()) if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty())
@ -358,9 +319,8 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
} }
#endif #endif
for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements) for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements)
{
subrange->list_fixedRegRequirements.push_back(fixedReg); subrange->list_fixedRegRequirements.push_back(fixedReg);
} absorbedSubrange->list_fixedRegRequirements.clear();
subrange->interval2.end = absorbedSubrange->interval2.end; subrange->interval2.end = absorbedSubrange->interval2.end;
@ -376,18 +336,29 @@ void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange
auto clusterRanges = originRange->GetAllSubrangesInCluster(); auto clusterRanges = originRange->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges) for (auto& subrange : clusterRanges)
{ {
if (subrange->list_locations.empty()) if (subrange->list_accessLocations.empty())
continue; continue;
raInterval interval; raInterval interval;
interval.SetInterval(subrange->list_locations.front().index, true, subrange->list_locations.back().index, true); interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos);
raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); raLivenessRange* newSubrange = PPCRecRA_createSubrange2(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end);
// copy locations and fixed reg indices // copy locations and fixed reg indices
newSubrange->list_locations = subrange->list_locations; newSubrange->list_accessLocations = subrange->list_accessLocations;
newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements; newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements;
if(originRange->HasPhysicalRegister()) if(originRange->HasPhysicalRegister())
{ {
cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement
} }
// validate
if(!newSubrange->list_accessLocations.empty())
{
cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval2.start);
cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval2.end);
}
if(!newSubrange->list_fixedRegRequirements.empty())
{
cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval2.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters()
cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval2.end);
}
} }
// remove subranges // remove subranges
PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange); PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange);
@ -411,10 +382,10 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment()); cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment());
} }
// validate locations // validate locations
if (!range->list_locations.empty()) if (!range->list_accessLocations.empty())
{ {
cemu_assert_debug(range->list_locations.front().index >= range->interval2.start.GetInstructionIndexEx()); cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval2.start);
cemu_assert_debug(range->list_locations.back().index <= range->interval2.end.GetInstructionIndexEx()); cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval2.end);
} }
// validate fixed reg requirements // validate fixed reg requirements
if (!range->list_fixedRegRequirements.empty()) if (!range->list_fixedRegRequirements.empty())
@ -430,41 +401,11 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {} void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {}
#endif #endif
// since locations are per-instruction, but intervals are per-edge, it's possible that locations track reads/writes outside of the range
// this function will remove any outside read/write locations
void IMLRA_FixLocations(raLivenessRange* range)
{
if(range->list_locations.empty())
return;
if(range->interval2.start.IsInstructionIndex() && range->interval2.start.GetInstructionIndex() == range->list_locations.front().index)
{
auto& location = range->list_locations.front();
if(range->interval2.start.IsOnOutputEdge())
{
location.isRead = false;
if(!location.isRead && !location.isWrite)
range->list_locations.erase(range->list_locations.begin());
}
}
if(range->list_locations.empty())
return;
if(range->interval2.end.IsInstructionIndex() && range->interval2.end.GetInstructionIndex() == range->list_locations.back().index)
{
auto& location = range->list_locations.back();
if(range->interval2.end.IsOnInputEdge())
{
location.isWrite = false;
if(!location.isRead && !location.isWrite)
range->list_locations.pop_back();
}
}
}
// trim start and end of range to match first and last read/write locations // trim start and end of range to match first and last read/write locations
// does not trim start/endpoints which extend into the next/previous segment // does not trim start/endpoints which extend into the next/previous segment
void IMLRA_TrimRangeToUse(raLivenessRange* range) void IMLRA_TrimRangeToUse(raLivenessRange* range)
{ {
if(range->list_locations.empty()) if(range->list_accessLocations.empty())
{ {
// special case where we trim ranges extending from other segments to a single instruction edge // special case where we trim ranges extending from other segments to a single instruction edge
cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex()); cemu_assert_debug(!range->interval2.start.IsInstructionIndex() || !range->interval2.end.IsInstructionIndex());
@ -474,25 +415,18 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range)
range->interval2.end = range->interval2.start; range->interval2.end = range->interval2.start;
return; return;
} }
// trim start and end
raInterval prevInterval = range->interval2; raInterval prevInterval = range->interval2;
// trim start
if(range->interval2.start.IsInstructionIndex()) if(range->interval2.start.IsInstructionIndex())
{ range->interval2.start = range->list_accessLocations.front().pos;
bool isInputEdge = range->list_locations.front().isRead;
range->interval2.start.Set(range->list_locations.front().index, isInputEdge);
}
// trim end
if(range->interval2.end.IsInstructionIndex()) if(range->interval2.end.IsInstructionIndex())
{ range->interval2.end = range->list_accessLocations.back().pos;
bool isOutputEdge = range->list_locations.back().isWrite;
range->interval2.end.Set(range->list_locations.back().index, !isOutputEdge);
}
// extra checks // extra checks
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(range->interval2.start <= range->interval2.end); cemu_assert_debug(range->interval2.start <= range->interval2.end);
for(auto& loc : range->list_locations) for(auto& loc : range->list_accessLocations)
{ {
cemu_assert_debug(range->interval2.ContainsInstructionIndex(loc.index)); cemu_assert_debug(range->interval2.ContainsEdge(loc.pos));
} }
cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2)); cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2));
#endif #endif
@ -532,33 +466,25 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
*std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange; *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange;
// we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
if(!subrange->list_locations.empty()) if(subrange->list_accessLocations.size() > 1)
{ {
sint32 curIdx = -1; for(size_t i=0; i<subrange->list_accessLocations.size()-1; i++)
for(auto& location : subrange->list_locations)
{ {
cemu_assert_debug(curIdx < location.index); cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos);
curIdx = location.index;
} }
} }
#endif #endif
// split locations // split locations
// since there are 2 edges per instruction and locations track both via a single index, locations on the split point might need to be copied into both ranges auto it = std::lower_bound(
for (auto& location : subrange->list_locations) subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition,
{ [](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; }
if(tailInterval.ContainsInstructionIndex(location.index)) );
tailSubrange->list_locations.push_back(location); size_t originalCount = subrange->list_accessLocations.size();
} tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end());
// remove tail locations from head subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end());
for (sint32 i = 0; i < subrange->list_locations.size(); i++) cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition);
{ cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition);
raLivenessLocation_t* location = subrange->list_locations.data() + i; cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount);
if (!headInterval.ContainsInstructionIndex(location->index))
{
subrange->list_locations.resize(i);
break;
}
}
// split fixed reg requirements // split fixed reg requirements
for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++)
{ {
@ -581,15 +507,10 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
// adjust intervals // adjust intervals
subrange->interval2 = headInterval; subrange->interval2 = headInterval;
tailSubrange->interval2 = tailInterval; tailSubrange->interval2 = tailInterval;
// fix locations to only include read/write edges within the range
if(subrange)
IMLRA_FixLocations(subrange);
if(tailSubrange)
IMLRA_FixLocations(tailSubrange);
// trim to hole // trim to hole
if(trimToHole) if(trimToHole)
{ {
if(subrange->list_locations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex())) if(subrange->list_accessLocations.empty() && (subrange->interval2.start.IsInstructionIndex() && subrange->interval2.end.IsInstructionIndex()))
{ {
PPCRecRA_deleteSubrange(ppcImlGenContext, subrange); PPCRecRA_deleteSubrange(ppcImlGenContext, subrange);
subrange = nullptr; subrange = nullptr;
@ -598,7 +519,7 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
{ {
IMLRA_TrimRangeToUse(subrange); IMLRA_TrimRangeToUse(subrange);
} }
if(tailSubrange->list_locations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex())) if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval2.start.IsInstructionIndex() && tailSubrange->interval2.end.IsInstructionIndex()))
{ {
PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange); PPCRecRA_deleteSubrange(ppcImlGenContext, tailSubrange);
tailSubrange = nullptr; tailSubrange = nullptr;
@ -622,26 +543,6 @@ raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenConte
return tailSubrange; return tailSubrange;
} }
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite)
{
if (subrange->list_locations.empty())
{
subrange->list_locations.emplace_back(index, isRead, isWrite);
return;
}
raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1);
cemu_assert_debug(lastLocation->index <= index);
if (lastLocation->index == index)
{
// update
lastLocation->isRead = lastLocation->isRead || isRead;
lastLocation->isWrite = lastLocation->isWrite || isWrite;
return;
}
// add new
subrange->list_locations.emplace_back(index, isRead, isWrite);
}
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment) sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
{ {
sint32 v = imlSegment->loopDepth + 1; sint32 v = imlSegment->loopDepth + 1;
@ -649,40 +550,6 @@ sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
return v*v; // 25, 100, 225, 400 return v*v; // 25, 100, 225, 400
} }
// calculate cost of entire range cluster
sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
{
sint32 cost = 0;
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
// currently we calculate the cost based on the most expensive entry/exit point
sint32 mostExpensiveRead = 0;
sint32 mostExpensiveWrite = 0;
sint32 readCount = 0;
sint32 writeCount = 0;
for (auto& subrange : ranges)
{
if (!subrange->interval2.ExtendsPreviousSegment())
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveRead = std::max(mostExpensiveRead, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
readCount++;
}
if (!subrange->interval2.ExtendsIntoNextSegment())
{
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
mostExpensiveWrite = std::max(mostExpensiveWrite, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
writeCount++;
}
}
cost = mostExpensiveRead + mostExpensiveWrite;
cost = cost + (readCount + writeCount) / 10;
return cost;
}
// calculate additional cost of range that it would have after calling _ExplodeRange() on it // calculate additional cost of range that it would have after calling _ExplodeRange() on it
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
{ {
@ -690,18 +557,19 @@ sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges); sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
for (auto& subrange : ranges) for (auto& subrange : ranges)
{ {
if (subrange->list_locations.empty()) if (subrange->list_accessLocations.empty())
continue; // this range would be deleted and thus has no cost continue; // this range would be deleted and thus has no cost
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment(); bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment();
bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment(); bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment();
if(hasAdditionalLoad && !subrange->list_locations.front().isRead && subrange->list_locations.front().isWrite) // if written before read, then a load isn't necessary if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary
{ {
cemu_assert_debug(!subrange->list_accessLocations.front().IsRead());
cost += segmentLoadStoreCost; cost += segmentLoadStoreCost;
} }
if(hasAdditionalStore) if(hasAdditionalStore)
{ {
bool hasWrite = std::find_if(subrange->list_locations.begin(), subrange->list_locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != subrange->list_locations.end(); bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end();
if(!hasWrite) // ranges which don't modify their value do not need to be stored if(!hasWrite) // ranges which don't modify their value do not need to be stored
cost += segmentLoadStoreCost; cost += segmentLoadStoreCost;
} }
@ -721,60 +589,45 @@ sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInst
sint32 cost = 0; sint32 cost = 0;
// find split position in location list // find split position in location list
if (subrange->list_locations.empty()) if (subrange->list_accessLocations.empty())
{
assert_dbg(); // should not happen?
return 0; return 0;
} if (splitPosition <= subrange->list_accessLocations.front().pos)
sint32 splitInstructionIndex = splitPosition.GetInstructionIndex();
if (splitInstructionIndex <= subrange->list_locations.front().index)
return 0; return 0;
if (splitInstructionIndex > subrange->list_locations.back().index) if (splitPosition > subrange->list_accessLocations.back().pos)
return 0; return 0;
// this can be optimized, but we should change list_locations to track instruction edges instead of instruction indices size_t firstTailLocationIndex = 0;
std::vector<raLivenessLocation_t> headLocations; for (size_t i = 0; i < subrange->list_accessLocations.size(); i++)
std::vector<raLivenessLocation_t> tailLocations;
for (auto& location : subrange->list_locations)
{ {
if(location.GetReadPos() < splitPosition || location.GetWritePos() < splitPosition) if (subrange->list_accessLocations[i].pos >= splitPosition)
headLocations.push_back(location); {
if(location.GetReadPos() >= splitPosition || location.GetWritePos() >= splitPosition) firstTailLocationIndex = i;
tailLocations.push_back(location); break;
} }
// fixup locations
if(!headLocations.empty() && headLocations.back().GetWritePos() >= splitPosition)
{
headLocations.back().isWrite = false;
if(!headLocations.back().isRead && !headLocations.back().isWrite)
headLocations.pop_back();
}
if(!tailLocations.empty() && tailLocations.front().GetReadPos() < splitPosition)
{
tailLocations.front().isRead = false;
if(!tailLocations.front().isRead && !tailLocations.front().isWrite)
tailLocations.erase(tailLocations.begin());
} }
std::span<raAccessLocation> headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex};
std::span<raAccessLocation> tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex};
cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition);
cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition);
// based on
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
auto CalculateCostFromLocationRange = [segmentLoadStoreCost](const std::vector<raLivenessLocation_t>& locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32 auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span<raAccessLocation> locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
{ {
if(locations.empty()) if(locations.empty())
return 0; return 0;
sint32 cost = 0; sint32 cost = 0;
if(locations.front().isRead && trackLoadCost) if(locations.front().IsRead() && trackLoadCost)
cost += segmentLoadStoreCost; // not overwritten, so there is a load cost cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != locations.end(); bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end();
if(hasWrite && trackStoreCost) if(hasWrite && trackStoreCost)
cost += segmentLoadStoreCost; // modified, so there is a store cost cost += segmentLoadStoreCost; // modified, so there is a store cost
return cost; return cost;
}; };
sint32 baseCost = CalculateCostFromLocationRange(subrange->list_locations); sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations);
bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().isRead && tailLocations.front().isWrite; bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite();
sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true); sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
cemu_assert_debug(newCost >= baseCost); cemu_assert_debug(newCost >= baseCost);

View file

@ -155,26 +155,21 @@ private:
}; };
struct raLivenessLocation_t struct raAccessLocation
{ {
sint32 index; raAccessLocation(raInstructionEdge pos) : pos(pos) {}
bool isRead;
bool isWrite;
raLivenessLocation_t() = default; bool IsRead() const
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
: index(index), isRead(isRead), isWrite(isWrite) {};
raInstructionEdge GetReadPos()
{ {
return raInstructionEdge(index, true); return pos.IsOnInputEdge();
} }
raInstructionEdge GetWritePos() bool IsWrite() const
{ {
return raInstructionEdge(index, false); return pos.IsOnOutputEdge();
} }
raInstructionEdge pos;
}; };
struct raInterval struct raInterval
@ -321,7 +316,7 @@ struct raLivenessRange
// processing // processing
uint32 lastIterationIndex; uint32 lastIterationIndex;
// instruction read/write locations // instruction read/write locations
std::vector<raLivenessLocation_t> list_locations; std::vector<raAccessLocation> list_accessLocations;
// ordered list of all raInstructionEdge indices which require a fixed register // ordered list of all raInstructionEdge indices which require a fixed register
std::vector<raFixedRegRequirement> list_fixedRegRequirements; std::vector<raFixedRegRequirement> list_fixedRegRequirements;
// linked list (subranges with same GPR virtual register) // linked list (subranges with same GPR virtual register)
@ -360,7 +355,6 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRan
raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false); raLivenessRange* PPCRecRA_splitLocalSubrange2(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToHole = false);
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite);
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
// cost estimation // cost estimation