PPCRec: Simplify RA code and clean it up a bit

This commit is contained in:
Exzap 2024-09-01 02:52:45 +02:00
parent f55b842773
commit 675c802cc1
7 changed files with 466 additions and 414 deletions

View file

@ -94,23 +94,12 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml
debug_printf(" "); debug_printf(" ");
index++; index++;
} }
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr) while (subrangeItr)
{ {
if (offset == subrangeItr->start.index) if (offset == subrangeItr->start.index)
{ {
if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) debug_printf("|%-2d", subrangeItr->GetVirtualRegister());
{
debug_printf("*%-2d", subrangeItr->range->virtualRegister);
}
else
{
debug_printf("|%-2d", subrangeItr->range->virtualRegister);
}
}
else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index )
{
debug_printf("* ");
} }
else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index)
{ {
@ -122,7 +111,7 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml
} }
index += 3; index += 3;
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
} }
@ -501,19 +490,19 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool
if (printLivenessRangeInfo) if (printLivenessRangeInfo)
{ {
debug_printf("Ranges-VirtReg "); debug_printf("Ranges-VirtReg ");
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr) while (subrangeItr)
{ {
debug_printf("v%-2d", subrangeItr->range->virtualRegister); debug_printf("v%-2d", subrangeItr->GetVirtualRegister());
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
debug_printf("\n"); debug_printf("\n");
debug_printf("Ranges-PhysReg "); debug_printf("Ranges-PhysReg ");
subrangeItr = imlSegment->raInfo.linkedList_allSubranges; subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr) while (subrangeItr)
{ {
debug_printf("p%-2d", subrangeItr->range->physicalRegister); debug_printf("p%-2d", subrangeItr->GetPhysicalRegister());
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
debug_printf("\n"); debug_printf("\n");
} }

View file

@ -50,10 +50,9 @@ struct IMLRegisterAllocatorContext
}; };
uint32 recRACurrentIterationIndex = 0;
uint32 PPCRecRA_getNextIterationIndex() uint32 PPCRecRA_getNextIterationIndex()
{ {
static uint32 recRACurrentIterationIndex = 0;
recRACurrentIterationIndex++; recRACurrentIterationIndex++;
return recRACurrentIterationIndex; return recRACurrentIterationIndex;
} }
@ -120,7 +119,7 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml
#define SUBRANGE_LIST_SIZE (128) #define SUBRANGE_LIST_SIZE (128)
sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessRange* subrange, sint32 startIndex)
{ {
for (sint32 i = 0; i < subrange->list_locations.size(); i++) for (sint32 i = 0; i < subrange->list_locations.size(); i++)
{ {
@ -135,12 +134,12 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe
{ {
sint32 minDistance = INT_MAX; sint32 minDistance = INT_MAX;
// next // next
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr) while(subrangeItr)
{ {
if (subrangeItr->range->physicalRegister != physRegister) if (subrangeItr->GetPhysicalRegister() != physRegister)
{ {
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue; continue;
} }
if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index)
@ -149,7 +148,7 @@ sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(IMLSegment* imlSe
{ {
minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex));
} }
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
return minDistance; return minDistance;
} }
@ -175,7 +174,7 @@ struct IMLRALivenessTimeline
} }
// manually add an active range // manually add an active range
void AddActiveRange(raLivenessSubrange_t* subrange) void AddActiveRange(raLivenessRange* subrange)
{ {
activeRanges.emplace_back(subrange); activeRanges.emplace_back(subrange);
} }
@ -187,7 +186,7 @@ struct IMLRALivenessTimeline
size_t count = activeRanges.size(); size_t count = activeRanges.size();
for (size_t f = 0; f < count; f++) for (size_t f = 0; f < count; f++)
{ {
raLivenessSubrange_t* liverange = activeRanges[f]; raLivenessRange* liverange = activeRanges[f];
if (liverange->end.index <= instructionIndex) if (liverange->end.index <= instructionIndex)
{ {
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
@ -205,18 +204,18 @@ struct IMLRALivenessTimeline
activeRanges.resize(count); activeRanges.resize(count);
} }
std::span<raLivenessSubrange_t*> GetExpiredRanges() std::span<raLivenessRange*> GetExpiredRanges()
{ {
return { expiredRanges.data(), expiredRanges.size() }; return { expiredRanges.data(), expiredRanges.size() };
} }
boost::container::small_vector<raLivenessSubrange_t*, 64> activeRanges; boost::container::small_vector<raLivenessRange*, 64> activeRanges;
private: private:
boost::container::small_vector<raLivenessSubrange_t*, 16> expiredRanges; boost::container::small_vector<raLivenessRange*, 16> expiredRanges;
}; };
bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rangeB) bool IsRangeOverlapping(raLivenessRange* rangeA, raLivenessRange* rangeB)
{ {
if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index) if (rangeA->start.index < rangeB->end.index && rangeA->end.index > rangeB->start.index)
return true; return true;
@ -228,39 +227,40 @@ bool IsRangeOverlapping(raLivenessSubrange_t* rangeA, raLivenessSubrange_t* rang
} }
// mark occupied registers by any overlapping range as unavailable in physRegSet // mark occupied registers by any overlapping range as unavailable in physRegSet
void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange_t* range, IMLPhysRegisterSet& physRegSet) void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet)
{ {
for (auto& subrange : range->list_subranges) auto clusterRanges = range2->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges)
{ {
IMLSegment* imlSegment = subrange->imlSegment; IMLSegment* imlSegment = subrange->imlSegment;
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr) while(subrangeItr)
{ {
if (subrange == subrangeItr) if (subrange == subrangeItr)
{ {
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue; continue;
} }
if(IsRangeOverlapping(subrange, subrangeItr)) if(IsRangeOverlapping(subrange, subrangeItr))
{ {
if (subrangeItr->range->physicalRegister >= 0) if (subrangeItr->GetPhysicalRegister() >= 0)
physRegSet.SetReserved(subrangeItr->range->physicalRegister); physRegSet.SetReserved(subrangeItr->GetPhysicalRegister());
} }
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
} }
} }
bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) { return lhs->start.index < rhs->start.index; }
void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
{ {
raLivenessSubrange_t* subrangeList[4096+1]; raLivenessRange* subrangeList[4096+1];
sint32 count = 0; sint32 count = 0;
// disassemble linked list // disassemble linked list
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while (subrangeItr) while (subrangeItr)
{ {
if (count >= 4096) if (count >= 4096)
@ -268,7 +268,7 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
subrangeList[count] = subrangeItr; subrangeList[count] = subrangeItr;
count++; count++;
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
if (count == 0) if (count == 0)
{ {
@ -280,12 +280,12 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
// reassemble linked list // reassemble linked list
subrangeList[count] = nullptr; subrangeList[count] = nullptr;
imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; imlSegment->raInfo.linkedList_allSubranges = subrangeList[0];
subrangeList[0]->link_segmentSubrangesGPR.prev = nullptr; subrangeList[0]->link_allSegmentRanges.prev = nullptr;
subrangeList[0]->link_segmentSubrangesGPR.next = subrangeList[1]; subrangeList[0]->link_allSegmentRanges.next = subrangeList[1];
for (sint32 i = 1; i < count; i++) for (sint32 i = 1; i < count; i++)
{ {
subrangeList[i]->link_segmentSubrangesGPR.prev = subrangeList[i - 1]; subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1];
subrangeList[i]->link_segmentSubrangesGPR.next = subrangeList[i + 1]; subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1];
} }
// validate list // validate list
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
@ -299,40 +299,40 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment)
assert_dbg(); assert_dbg();
currentStartIndex = subrangeItr->start.index; currentStartIndex = subrangeItr->start.index;
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
if (count != count2) if (count != count2)
assert_dbg(); assert_dbg();
#endif #endif
} }
std::unordered_map<IMLRegID, raLivenessSubrange_t*>& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) std::unordered_map<IMLRegID, raLivenessRange*>& IMLRA_GetSubrangeMap(IMLSegment* imlSegment)
{ {
return imlSegment->raInfo.linkedList_perVirtualGPR2; return imlSegment->raInfo.linkedList_perVirtualRegister;
} }
raLivenessSubrange_t* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId)
{ {
auto it = imlSegment->raInfo.linkedList_perVirtualGPR2.find(regId); auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId);
if (it == imlSegment->raInfo.linkedList_perVirtualGPR2.end()) if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end())
return nullptr; return nullptr;
return it->second; return it->second;
} }
raLivenessSubrange_t* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex) raLivenessRange* _GetSubrangeByInstructionIndexAndVirtualReg(IMLSegment* imlSegment, IMLReg regToSearch, sint32 instructionIndex)
{ {
uint32 regId = regToSearch.GetRegID(); uint32 regId = regToSearch.GetRegID();
raLivenessSubrange_t* subrangeItr = IMLRA_GetSubrange(imlSegment, regId); raLivenessRange* subrangeItr = IMLRA_GetSubrange(imlSegment, regId);
while (subrangeItr) while (subrangeItr)
{ {
if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex) if (subrangeItr->start.index <= instructionIndex && subrangeItr->end.index > instructionIndex)
return subrangeItr; return subrangeItr;
subrangeItr = subrangeItr->link_sameVirtualRegisterGPR.next; subrangeItr = subrangeItr->link_sameVirtualRegister.next;
} }
return nullptr; return nullptr;
} }
void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessSubrange_t* subrange, sint32 instructionIndex) void IMLRA_IsolateRangeOnInstruction(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, raLivenessRange* subrange, sint32 instructionIndex)
{ {
DEBUG_BREAK; DEBUG_BREAK;
} }
@ -381,42 +381,42 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
_sortSegmentAllSubrangesLinkedList(imlSegment); _sortSegmentAllSubrangesLinkedList(imlSegment);
IMLRALivenessTimeline livenessTimeline; IMLRALivenessTimeline livenessTimeline;
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr) while(subrangeItr)
{ {
sint32 currentIndex = subrangeItr->start.index; sint32 currentIndex = subrangeItr->start.index;
PPCRecRA_debugValidateSubrange(subrangeItr); PPCRecRA_debugValidateSubrange(subrangeItr);
livenessTimeline.ExpireRanges(std::min<sint32>(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges livenessTimeline.ExpireRanges(std::min<sint32>(currentIndex, RA_INTER_RANGE_END-1)); // expire up to currentIndex (inclusive), but exclude infinite ranges
// if subrange already has register assigned then add it to the active list and continue // if subrange already has register assigned then add it to the active list and continue
if (subrangeItr->range->physicalRegister >= 0) if (subrangeItr->GetPhysicalRegister() >= 0)
{ {
// verify if register is actually available // verify if register is actually available
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
for (auto& liverangeItr : livenessTimeline.activeRanges) for (auto& liverangeItr : livenessTimeline.activeRanges)
{ {
// check for register mismatch // check for register mismatch
cemu_assert_debug(liverangeItr->range->physicalRegister != subrangeItr->range->physicalRegister); cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister());
} }
#endif #endif
livenessTimeline.AddActiveRange(subrangeItr); livenessTimeline.AddActiveRange(subrangeItr);
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
continue; continue;
} }
// find free register for current subrangeItr and segment // find free register for current subrangeItr and segment
IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->range->virtualRegister); IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister());
IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat);
cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool
for (auto& liverangeItr : livenessTimeline.activeRanges) for (auto& liverangeItr : livenessTimeline.activeRanges)
{ {
cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0);
physRegSet.SetReserved(liverangeItr->range->physicalRegister); physRegSet.SetReserved(liverangeItr->GetPhysicalRegister());
} }
// check intersections with other ranges and determine allowed registers // check intersections with other ranges and determine allowed registers
IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments) IMLPhysRegisterSet localAvailableRegsMask = physRegSet; // mask of registers that are currently not used (does not include range checks in other segments)
if(physRegSet.HasAnyAvailable()) if(physRegSet.HasAnyAvailable())
{ {
// check globally in all segments // check globally in all segments
PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr->range, physRegSet); PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, physRegSet);
} }
if (!physRegSet.HasAnyAvailable()) if (!physRegSet.HasAnyAvailable())
{ {
@ -427,7 +427,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
struct struct
{ {
sint32 distance; sint32 distance;
raLivenessSubrange_t* largestHoleSubrange; raLivenessRange* largestHoleSubrange;
sint32 cost; // additional cost of choosing this candidate sint32 cost; // additional cost of choosing this candidate
}localRangeHoleCutting; }localRangeHoleCutting;
// split current range (this is generally only a good choice when the current range is long but rarely used) // split current range (this is generally only a good choice when the current range is long but rarely used)
@ -440,7 +440,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
// explode a inter-segment range (prefer ranges that are not read/written in this segment) // explode a inter-segment range (prefer ranges that are not read/written in this segment)
struct struct
{ {
raLivenessRange_t* range; raLivenessRange* range;
sint32 cost; sint32 cost;
sint32 distance; // size of hole sint32 distance; // size of hole
// note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange
@ -540,7 +540,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
if( distance < 2) if( distance < 2)
continue; continue;
sint32 cost; sint32 cost;
cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
// if the hole is not large enough, add cost of splitting current subrange // if the hole is not large enough, add cost of splitting current subrange
if (distance < requiredSize) if (distance < requiredSize)
{ {
@ -553,7 +553,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
{ {
spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.cost = cost;
spillStrategies.explodeRange.distance = distance; spillStrategies.explodeRange.distance = distance;
spillStrategies.explodeRange.range = candidate->range; spillStrategies.explodeRange.range = candidate;
} }
} }
// choose strategy // choose strategy
@ -581,7 +581,7 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
else if (subrangeItr->start.index == RA_INTER_RANGE_START) else if (subrangeItr->start.index == RA_INTER_RANGE_START)
{ {
// alternative strategy if we have no other choice: explode current range // alternative strategy if we have no other choice: explode current range
PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr->range); PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr);
} }
else else
assert_dbg(); assert_dbg();
@ -603,27 +603,27 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
if (candidate->end.index != RA_INTER_RANGE_END) if (candidate->end.index != RA_INTER_RANGE_END)
continue; continue;
// only select candidates that clash with current subrange // only select candidates that clash with current subrange
if (candidate->range->physicalRegister < 0 && candidate != subrangeItr) if (candidate->GetPhysicalRegister() < 0 && candidate != subrangeItr)
continue; continue;
sint32 cost; sint32 cost;
cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
// compare with current best candidate for this strategy // compare with current best candidate for this strategy
if (cost < spillStrategies.explodeRange.cost) if (cost < spillStrategies.explodeRange.cost)
{ {
spillStrategies.explodeRange.cost = cost; spillStrategies.explodeRange.cost = cost;
spillStrategies.explodeRange.distance = INT_MAX; spillStrategies.explodeRange.distance = INT_MAX;
spillStrategies.explodeRange.range = candidate->range; spillStrategies.explodeRange.range = candidate;
} }
} }
// add current range as a candidate too // add current range as a candidate too
sint32 ownCost; sint32 ownCost;
ownCost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(subrangeItr->range); ownCost = PPCRecRARange_estimateCostAfterRangeExplode(subrangeItr);
if (ownCost < spillStrategies.explodeRange.cost) if (ownCost < spillStrategies.explodeRange.cost)
{ {
spillStrategies.explodeRange.cost = ownCost; spillStrategies.explodeRange.cost = ownCost;
spillStrategies.explodeRange.distance = INT_MAX; spillStrategies.explodeRange.distance = INT_MAX;
spillStrategies.explodeRange.range = subrangeItr->range; spillStrategies.explodeRange.range = subrangeItr;
} }
if (spillStrategies.explodeRange.cost == INT_MAX) if (spillStrategies.explodeRange.cost == INT_MAX)
assert_dbg(); // should not happen assert_dbg(); // should not happen
@ -632,10 +632,11 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon
return false; return false;
} }
// assign register to range // assign register to range
subrangeItr->range->physicalRegister = physRegSet.GetFirstAvailableReg(); //subrangeItr->SetPhysicalRegister(physRegSet.GetFirstAvailableReg());
subrangeItr->SetPhysicalRegisterForCluster(physRegSet.GetFirstAvailableReg());
livenessTimeline.AddActiveRange(subrangeItr); livenessTimeline.AddActiveRange(subrangeItr);
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
return true; return true;
} }
@ -673,137 +674,30 @@ void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t*
} }
} }
struct subrangeEndingInfo_t
{
//boost::container::small_vector<raLivenessSubrange_t*, 32> subrangeList2;
raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE];
sint32 subrangeCount;
bool hasUndefinedEndings;
};
void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
{
if (depth >= 30)
{
info->hasUndefinedEndings = true;
return;
}
if (subrange->lastIterationIndex == iterationIndex)
return; // already processed
subrange->lastIterationIndex = iterationIndex;
if (subrange->hasStoreDelayed)
return; // no need to traverse this subrange
IMLSegment* imlSegment = subrange->imlSegment;
if (subrange->end.index != RA_INTER_RANGE_END)
{
// ending segment
if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
{
info->hasUndefinedEndings = true;
return;
}
else
{
info->subrangeList[info->subrangeCount] = subrange;
info->subrangeCount++;
}
return;
}
// traverse next subranges in flow
if (imlSegment->nextSegmentBranchNotTaken)
{
if (subrange->subrangeBranchNotTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
}
}
if (imlSegment->nextSegmentBranchTaken)
{
if (subrange->subrangeBranchTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
}
}
}
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange)
{
if (subrange->end.index != RA_INTER_RANGE_END)
return;
// analyze data flow across segments (if this segment has writes)
if (subrange->hasStore)
{
subrangeEndingInfo_t writeEndingInfo;
writeEndingInfo.subrangeCount = 0;
writeEndingInfo.hasUndefinedEndings = false;
_findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo);
if (writeEndingInfo.hasUndefinedEndings == false)
{
// get cost of delaying store into endings
sint32 delayStoreCost = 0;
bool alreadyStoredInAllEndings = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i];
if( subrangeItr->hasStore )
continue; // this ending already stores, no extra cost
alreadyStoredInAllEndings = false;
sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment);
delayStoreCost = std::max(storeCost, delayStoreCost);
}
if (alreadyStoredInAllEndings)
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
}
else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment))
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i];
subrangeItr->hasStore = true;
}
}
}
}
}
inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId)
{ {
return IMLReg(baseFormat, baseFormat, 0, regId); return IMLReg(baseFormat, baseFormat, 0, regId);
} }
void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span<raLivenessSubrange_t*> loadList) void PPCRecRA_insertGPRLoadInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span<raLivenessRange*> loadList)
{ {
PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size()); PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadList.size());
for (sint32 i = 0; i < loadList.size(); i++) for (sint32 i = 0; i < loadList.size(); i++)
{ {
IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->range->virtualRegister]; IMLRegFormat baseFormat = ctx.regIdToBaseFormat[loadList[i]->GetVirtualRegister()];
cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT);
imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->range->physicalRegister), loadList[i]->range->name); imlSegment->imlList[insertIndex + i].make_r_name(_MakeNativeReg(baseFormat, loadList[i]->GetPhysicalRegister()), loadList[i]->GetName());
} }
} }
void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span<raLivenessSubrange_t*> storeList) void PPCRecRA_insertGPRStoreInstructions(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 insertIndex, std::span<raLivenessRange*> storeList)
{ {
PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size()); PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeList.size());
for (size_t i = 0; i < storeList.size(); i++) for (size_t i = 0; i < storeList.size(); i++)
{ {
IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->range->virtualRegister]; IMLRegFormat baseFormat = ctx.regIdToBaseFormat[storeList[i]->GetVirtualRegister()];
cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT); cemu_assert_debug(baseFormat != IMLRegFormat::INVALID_FORMAT);
imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->range->name, _MakeNativeReg(baseFormat, storeList[i]->range->physicalRegister)); imlSegment->imlList[insertIndex + i].make_name_r(storeList[i]->GetName(), _MakeNativeReg(baseFormat, storeList[i]->GetPhysicalRegister()));
} }
} }
@ -814,7 +708,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
sint32 index = 0; sint32 index = 0;
sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0;
// load register ranges that are supplied from previous segments // load register ranges that are supplied from previous segments
raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges;
while(subrangeItr) while(subrangeItr)
{ {
if (subrangeItr->start.index == RA_INTER_RANGE_START) if (subrangeItr->start.index == RA_INTER_RANGE_START)
@ -827,12 +721,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
assert_dbg(); assert_dbg();
} }
// update translation table // update translation table
cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->range->virtualRegister)); cemu_assert_debug(!virtId2PhysRegIdMap.contains(subrangeItr->GetVirtualRegister()));
#endif #endif
virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister());
} }
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
// process instructions // process instructions
while(index < imlSegment->imlList.size() + 1) while(index < imlSegment->imlList.size() + 1)
@ -842,7 +736,7 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
for (auto& expiredRange : livenessTimeline.GetExpiredRanges()) for (auto& expiredRange : livenessTimeline.GetExpiredRanges())
{ {
// update translation table // update translation table
virtId2PhysRegIdMap.erase(expiredRange->range->virtualRegister); virtId2PhysRegIdMap.erase(expiredRange->GetVirtualRegister());
// store GPR if required // store GPR if required
// special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed
if (expiredRange->hasStore) if (expiredRange->hasStore)
@ -874,9 +768,9 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
subrangeItr->start.index--; subrangeItr->start.index--;
} }
// update translation table // update translation table
virtId2PhysRegIdMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); virtId2PhysRegIdMap.insert_or_assign(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister());
} }
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
// rewrite registers // rewrite registers
if (index < imlSegment->imlList.size()) if (index < imlSegment->imlList.size())
@ -885,12 +779,12 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
index++; index++;
} }
// expire infinite subranges (subranges which cross the segment border) // expire infinite subranges (subranges which cross the segment border)
std::vector<raLivenessSubrange_t*> loadStoreList; std::vector<raLivenessRange*> loadStoreList;
livenessTimeline.ExpireRanges(RA_INTER_RANGE_END); livenessTimeline.ExpireRanges(RA_INTER_RANGE_END);
for (auto liverange : livenessTimeline.GetExpiredRanges()) for (auto liverange : livenessTimeline.GetExpiredRanges())
{ {
// update translation table // update translation table
virtId2PhysRegIdMap.erase(liverange->range->virtualRegister); virtId2PhysRegIdMap.erase(liverange->GetVirtualRegister());
// store GPR // store GPR
if (liverange->hasStore) if (liverange->hasStore)
loadStoreList.emplace_back(liverange); loadStoreList.emplace_back(liverange);
@ -910,10 +804,10 @@ void IMLRA_GenerateSegmentMoveInstructions(IMLRegisterAllocatorContext& ctx, IML
if (subrangeItr->_noLoad == false) if (subrangeItr->_noLoad == false)
loadStoreList.emplace_back(subrangeItr); loadStoreList.emplace_back(subrangeItr);
// update translation table // update translation table
virtId2PhysRegIdMap.try_emplace(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); virtId2PhysRegIdMap.try_emplace(subrangeItr->GetVirtualRegister(), subrangeItr->GetPhysicalRegister());
} }
// next // next
subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; subrangeItr = subrangeItr->link_allSegmentRanges.next;
} }
if (!loadStoreList.empty()) if (!loadStoreList.empty())
PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList); PPCRecRA_insertGPRLoadInstructions(ctx, imlSegment, imlSegment->imlList.size() - suffixInstructionCount, loadStoreList);
@ -1026,7 +920,7 @@ void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx)
} }
} }
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name)
{ {
IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR);
if (!abstractRange) if (!abstractRange)
@ -1034,7 +928,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext
if (abstractRange->isProcessed) if (abstractRange->isProcessed)
{ {
// return already existing segment // return already existing segment
raLivenessSubrange_t* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR);
cemu_assert_debug(existingRange); cemu_assert_debug(existingRange);
return existingRange; return existingRange;
} }
@ -1043,7 +937,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr);
#endif #endif
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); raLivenessRange* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, imlSegment, vGPR, name, abstractRange->usageStart, abstractRange->usageEnd);
// traverse forward // traverse forward
if (abstractRange->usageEnd == RA_INTER_RANGE_END) if (abstractRange->usageEnd == RA_INTER_RANGE_END)
{ {
@ -1052,7 +946,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext
IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR);
if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START)
{ {
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, range); subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name);
subrange->subrangeBranchTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
} }
} }
@ -1061,7 +956,8 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext
IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR);
if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START)
{ {
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, range); subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name);
subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange);
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
} }
} }
@ -1075,7 +971,7 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext
if(!prevRange) if(!prevRange)
continue; continue;
if (prevRange->usageEnd == RA_INTER_RANGE_END) if (prevRange->usageEnd == RA_INTER_RANGE_END)
PPCRecRA_convertToMappedRanges(ctx, it, vGPR, range); PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name);
} }
} }
// for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction
@ -1100,13 +996,12 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
if(it.second.isProcessed) if(it.second.isProcessed)
continue; continue;
IMLRegID regId = it.first; IMLRegID regId = it.first;
raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.raParam->regIdToName.find(regId)->second); PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second);
PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range);
} }
// fill created ranges with read/write location indices // fill created ranges with read/write location indices
// note that at this point there is only one range per register per segment // note that at this point there is only one range per register per segment
// and the algorithm below relies on this // and the algorithm below relies on this
const std::unordered_map<IMLRegID, raLivenessSubrange_t*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); const std::unordered_map<IMLRegID, raLivenessRange*>& regToSubrange = IMLRA_GetSubrangeMap(imlSegment);
size_t index = 0; size_t index = 0;
IMLUsedRegisters gprTracking; IMLUsedRegisters gprTracking;
while (index < imlSegment->imlList.size()) while (index < imlSegment->imlList.size())
@ -1114,7 +1009,7 @@ void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IML
imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); imlSegment->imlList[index].CheckRegisterUsage(&gprTracking);
gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) {
IMLRegID gprId = gprReg.GetRegID(); IMLRegID gprId = gprReg.GetRegID();
raLivenessSubrange_t* subrange = regToSubrange.find(gprId)->second; raLivenessRange* subrange = regToSubrange.find(gprId)->second;
PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten); PPCRecRA_updateOrAddSubrangeLocation(subrange, index, !isWritten, isWritten);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
if ((sint32)index < subrange->start.index) if ((sint32)index < subrange->start.index)
@ -1351,7 +1246,7 @@ void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ct
IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt);
} }
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessRange* subrange)
{ {
bool isRead = false; bool isRead = false;
bool isWritten = false; bool isWritten = false;
@ -1376,23 +1271,135 @@ void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
subrange->_noLoad = true; subrange->_noLoad = true;
} }
void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
struct subrangeEndingInfo_t
{ {
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore //boost::container::small_vector<raLivenessSubrange_t*, 32> subrangeList2;
// first do a per-subrange pass raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE];
for (auto& range : ppcImlGenContext->raInfo.list_ranges) sint32 subrangeCount;
bool hasUndefinedEndings;
};
void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info)
{
if (depth >= 30)
{ {
for (auto& subrange : range->list_subranges) info->hasUndefinedEndings = true;
return;
}
if (subrange->lastIterationIndex == iterationIndex)
return; // already processed
subrange->lastIterationIndex = iterationIndex;
if (subrange->hasStoreDelayed)
return; // no need to traverse this subrange
IMLSegment* imlSegment = subrange->imlSegment;
if (subrange->end.index != RA_INTER_RANGE_END)
{
// ending segment
if (info->subrangeCount >= SUBRANGE_LIST_SIZE)
{ {
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); info->hasUndefinedEndings = true;
return;
}
else
{
info->subrangeList[info->subrangeCount] = subrange;
info->subrangeCount++;
}
return;
}
// traverse next subranges in flow
if (imlSegment->nextSegmentBranchNotTaken)
{
if (subrange->subrangeBranchNotTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info);
} }
} }
// then do a second pass where we scan along subrange flow if (imlSegment->nextSegmentBranchTaken)
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
{ {
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm if (subrange->subrangeBranchTaken == nullptr)
{
info->hasUndefinedEndings = true;
}
else
{
_findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info);
}
}
}
static void _analyzeRangeDataFlow(raLivenessRange* subrange)
{
if (subrange->end.index != RA_INTER_RANGE_END)
return;
// analyze data flow across segments (if this segment has writes)
if (subrange->hasStore)
{
subrangeEndingInfo_t writeEndingInfo;
writeEndingInfo.subrangeCount = 0;
writeEndingInfo.hasUndefinedEndings = false;
_findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo);
if (writeEndingInfo.hasUndefinedEndings == false)
{
// get cost of delaying store into endings
sint32 delayStoreCost = 0;
bool alreadyStoredInAllEndings = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
if( subrangeItr->hasStore )
continue; // this ending already stores, no extra cost
alreadyStoredInAllEndings = false;
sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment);
delayStoreCost = std::max(storeCost, delayStoreCost);
}
if (alreadyStoredInAllEndings)
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
}
else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment))
{
subrange->hasStore = false;
subrange->hasStoreDelayed = true;
for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++)
{
raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i];
subrangeItr->hasStore = true;
}
}
}
}
}
void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext)
{
// this function is called after _assignRegisters(), which means that all liveness ranges are already final and must not be changed anymore
// in the first pass we track read/write dependencies
for(auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while(subrange)
{
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
subrange = subrange->link_allSegmentRanges.next;
}
}
// then we do a second pass where we scan along subrange flow
for(auto& seg : ppcImlGenContext->segmentList2)
{
raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges;
while(subrange)
{ {
_analyzeRangeDataFlow(subrange); _analyzeRangeDataFlow(subrange);
subrange = subrange->link_allSegmentRanges.next;
} }
} }
} }
@ -1407,8 +1414,6 @@ void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext
ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment
ppcImlGenContext->raInfo.list_ranges = std::vector<raLivenessRange_t*>();
ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size());
IMLRA_CalculateLivenessRanges(ctx); IMLRA_CalculateLivenessRanges(ctx);

View file

@ -3,45 +3,110 @@
#include "IMLRegisterAllocatorRanges.h" #include "IMLRegisterAllocatorRanges.h"
#include "util/helpers/MemoryPool.h" #include "util/helpers/MemoryPool.h"
void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessSubrange_t*>& root, raLivenessSubrange_t* subrange) uint32 PPCRecRA_getNextIterationIndex();
IMLRegID raLivenessRange::GetVirtualRegister() const
{ {
IMLRegID regId = subrange->range->virtualRegister; return virtualRegister;
}
sint32 raLivenessRange::GetPhysicalRegister() const
{
return physicalRegister;
}
IMLName raLivenessRange::GetName() const
{
return name;
}
void raLivenessRange::SetPhysicalRegister(sint32 physicalRegister)
{
cemu_assert_suspicious(); // not used yet
this->physicalRegister = physicalRegister;
}
void raLivenessRange::SetPhysicalRegisterForCluster(sint32 physicalRegister)
{
auto clusterRanges = GetAllSubrangesInCluster();
for(auto& range : clusterRanges)
range->physicalRegister = physicalRegister;
}
boost::container::small_vector<raLivenessRange*, 32> raLivenessRange::GetAllSubrangesInCluster()
{
uint32 iterationIndex = PPCRecRA_getNextIterationIndex();
boost::container::small_vector<raLivenessRange*, 32> subranges;
subranges.push_back(this);
this->lastIterationIndex = iterationIndex;
size_t i = 0;
while(i<subranges.size())
{
raLivenessRange* cur = subranges[i];
i++;
// check successors
if(cur->subrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex)
{
cur->subrangeBranchTaken->lastIterationIndex = iterationIndex;
subranges.push_back(cur->subrangeBranchTaken);
}
if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex)
{
cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex;
subranges.push_back(cur->subrangeBranchNotTaken);
}
// check predecessors
for(auto& prev : cur->previousRanges)
{
if(prev->lastIterationIndex != iterationIndex)
{
prev->lastIterationIndex = iterationIndex;
subranges.push_back(prev);
}
}
}
return subranges;
}
void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
{
IMLRegID regId = subrange->GetVirtualRegister();
auto it = root.find(regId); auto it = root.find(regId);
if (it == root.end()) if (it == root.end())
{ {
// new single element // new single element
root.try_emplace(regId, subrange); root.try_emplace(regId, subrange);
subrange->link_sameVirtualRegisterGPR.prev = nullptr; subrange->link_sameVirtualRegister.prev = nullptr;
subrange->link_sameVirtualRegisterGPR.next = nullptr; subrange->link_sameVirtualRegister.next = nullptr;
} }
else else
{ {
// insert in first position // insert in first position
subrange->link_sameVirtualRegisterGPR.next = it->second; subrange->link_sameVirtualRegister.next = it->second;
it->second = subrange; it->second = subrange;
subrange->link_sameVirtualRegisterGPR.prev = subrange; subrange->link_sameVirtualRegister.prev = subrange;
} }
} }
void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
{ {
subrange->link_segmentSubrangesGPR.next = *root; subrange->link_allSegmentRanges.next = *root;
if (*root) if (*root)
(*root)->link_segmentSubrangesGPR.prev = subrange; (*root)->link_allSegmentRanges.prev = subrange;
subrange->link_segmentSubrangesGPR.prev = nullptr; subrange->link_allSegmentRanges.prev = nullptr;
*root = subrange; *root = subrange;
} }
void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessSubrange_t*>& root, raLivenessSubrange_t* subrange) void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLivenessRange*>& root, raLivenessRange* subrange)
{ {
IMLRegID regId = subrange->range->virtualRegister; IMLRegID regId = subrange->GetVirtualRegister();
raLivenessSubrange_t* nextRange = subrange->link_sameVirtualRegisterGPR.next; raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next;
raLivenessSubrange_t* prevRange = subrange->link_sameVirtualRegisterGPR.prev; raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev;
raLivenessSubrange_t* newBase = prevRange ? prevRange : nextRange; raLivenessRange* newBase = prevRange ? prevRange : nextRange;
if (prevRange) if (prevRange)
prevRange->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next;
if (nextRange) if (nextRange)
nextRange->link_sameVirtualRegisterGPR.prev = subrange->link_sameVirtualRegisterGPR.prev; nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev;
if (!prevRange) if (!prevRange)
{ {
@ -55,81 +120,78 @@ void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map<IMLRegID, raLiven
} }
} }
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1; subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1;
subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1; subrange->link_sameVirtualRegister.next = (raLivenessRange*)1;
#endif #endif
} }
void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange)
{ {
raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev; raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev;
if (subrange->link_segmentSubrangesGPR.prev) if (subrange->link_allSegmentRanges.prev)
subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next; subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next;
else else
(*root) = subrange->link_segmentSubrangesGPR.next; (*root) = subrange->link_allSegmentRanges.next;
if (subrange->link_segmentSubrangesGPR.next) if (subrange->link_allSegmentRanges.next)
subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev; subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev;
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1; subrange->link_allSegmentRanges.prev = (raLivenessRange*)1;
subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1; subrange->link_allSegmentRanges.next = (raLivenessRange*)1;
#endif #endif
} }
MemoryPoolPermanentObjects<raLivenessRange_t> memPool_livenessRange(4096); MemoryPoolPermanentObjects<raLivenessRange> memPool_livenessSubrange(4096);
MemoryPoolPermanentObjects<raLivenessSubrange_t> memPool_livenessSubrange(4096);
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name) raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex)
{ {
raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj(); raLivenessRange* range = memPool_livenessSubrange.acquireObj();
livenessRange->list_subranges.resize(0); range->previousRanges.clear();
livenessRange->virtualRegister = virtualRegister; range->list_locations.resize(0);
livenessRange->name = name; range->imlSegment = imlSegment;
livenessRange->physicalRegister = -1; PPCRecompilerIml_setSegmentPoint(&range->start, imlSegment, startIndex);
ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange); PPCRecompilerIml_setSegmentPoint(&range->end, imlSegment, endIndex);
return livenessRange; // register mapping
} range->virtualRegister = virtualRegister;
range->name = name;
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex) range->physicalRegister = -1;
{
raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj();
livenessSubrange->list_locations.resize(0);
livenessSubrange->range = range;
livenessSubrange->imlSegment = imlSegment;
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex);
PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex);
// default values // default values
livenessSubrange->hasStore = false; range->hasStore = false;
livenessSubrange->hasStoreDelayed = false; range->hasStoreDelayed = false;
livenessSubrange->lastIterationIndex = 0; range->lastIterationIndex = 0;
livenessSubrange->subrangeBranchNotTaken = nullptr; range->subrangeBranchNotTaken = nullptr;
livenessSubrange->subrangeBranchTaken = nullptr; range->subrangeBranchTaken = nullptr;
livenessSubrange->_noLoad = false; range->_noLoad = false;
// add to range // add to segment linked lists
range->list_subranges.push_back(livenessSubrange); PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range);
// add to segment PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range);
PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, livenessSubrange); return range;
PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange);
return livenessSubrange;
} }
void _unlinkSubrange(raLivenessSubrange_t* subrange) void _unlinkSubrange(raLivenessRange* subrange)
{ {
IMLSegment* imlSegment = subrange->imlSegment; IMLSegment* imlSegment = subrange->imlSegment;
PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualGPR2, subrange); PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, subrange);
PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, subrange);
} }
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{ {
_unlinkSubrange(subrange); _unlinkSubrange(subrange);
subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); //subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange));
subrange->list_locations.clear(); subrange->list_locations.clear();
// unlink reverse references
if(subrange->subrangeBranchTaken)
subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), subrange));
if(subrange->subrangeBranchNotTaken)
subrange->subrangeBranchTaken->previousRanges.erase(std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), subrange));
PPCRecompilerIml_removeSegmentPoint(&subrange->start); PPCRecompilerIml_removeSegmentPoint(&subrange->start);
PPCRecompilerIml_removeSegmentPoint(&subrange->end); PPCRecompilerIml_removeSegmentPoint(&subrange->end);
memPool_livenessSubrange.releaseObj(subrange); memPool_livenessSubrange.releaseObj(subrange);
} }
void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) // leaves range and linked ranges in invalid state. Only use at final clean up when no range is going to be accessed anymore
void _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{ {
_unlinkSubrange(subrange); _unlinkSubrange(subrange);
PPCRecompilerIml_removeSegmentPoint(&subrange->start); PPCRecompilerIml_removeSegmentPoint(&subrange->start);
@ -137,49 +199,30 @@ void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenCont
memPool_livenessSubrange.releaseObj(subrange); memPool_livenessSubrange.releaseObj(subrange);
} }
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) void PPCRecRA_deleteSubrangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange)
{ {
for (auto& subrange : range->list_subranges) auto clusterRanges = subrange->GetAllSubrangesInCluster();
for (auto& subrange : clusterRanges)
{ {
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); _PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, subrange);
} }
ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range));
memPool_livenessRange.releaseObj(range);
}
void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range)
{
for (auto& subrange : range->list_subranges)
{
_PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange);
}
memPool_livenessRange.releaseObj(range);
} }
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext)
{ {
for(auto& range : ppcImlGenContext->raInfo.list_ranges) for(auto& seg : ppcImlGenContext->segmentList2)
{ {
PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range); raLivenessRange* cur;
while(cur = seg->raInfo.linkedList_allSubranges)
{
_PPCRecRA_deleteSubrangeNoUnlink(ppcImlGenContext, cur);
}
seg->raInfo.linkedList_allSubranges = nullptr;
seg->raInfo.linkedList_perVirtualRegister.clear();
} }
ppcImlGenContext->raInfo.list_ranges.clear();
} }
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange) void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange)
{
cemu_assert_debug(range != absorbedRange);
cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister);
// move all subranges from absorbedRange to range
for (auto& subrange : absorbedRange->list_subranges)
{
range->list_subranges.push_back(subrange);
subrange->range = range;
}
absorbedRange->list_subranges.clear();
PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange);
}
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange)
{ {
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
PPCRecRA_debugValidateSubrange(subrange); PPCRecRA_debugValidateSubrange(subrange);
@ -193,6 +236,12 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub
if (subrange == absorbedSubrange) if (subrange == absorbedSubrange)
assert_dbg(); assert_dbg();
#endif #endif
// update references
if(absorbedSubrange->subrangeBranchTaken)
*std::find(absorbedSubrange->subrangeBranchTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange;
if(absorbedSubrange->subrangeBranchNotTaken)
*std::find(absorbedSubrange->subrangeBranchNotTaken->previousRanges.begin(), absorbedSubrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange;
subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken;
subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken;
@ -210,29 +259,27 @@ void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSub
PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange); PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange);
} }
// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges) // remove all inter-segment connections from the range cluster and split it into local ranges (also removes empty ranges)
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange)
{ {
if (range->list_subranges.size() == 1) auto clusterRanges = originRange->GetAllSubrangesInCluster();
assert_dbg(); for (auto& subrange : clusterRanges)
for (auto& subrange : range->list_subranges)
{ {
if (subrange->list_locations.empty()) if (subrange->list_locations.empty())
continue; continue;
raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name); raLivenessRange* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1);
// copy locations // copy locations
for (auto& location : subrange->list_locations) for (auto& location : subrange->list_locations)
{ {
newSubrange->list_locations.push_back(location); newSubrange->list_locations.push_back(location);
} }
} }
// remove original range // remove subranges
PPCRecRA_deleteRange(ppcImlGenContext, range); PPCRecRA_deleteSubrangeCluster(ppcImlGenContext, originRange);
} }
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange)
{ {
// validate subrange // validate subrange
if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken) if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken)
@ -252,7 +299,7 @@ void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {}
// The return value is the tail subrange // The return value is the tail subrange
// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations // If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations
// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split // Ranges that begin at RA_INTER_RANGE_START are allowed and can be split
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole) raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole)
{ {
// validation // validation
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
@ -266,8 +313,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC
assert_dbg(); assert_dbg();
#endif #endif
// create tail // create tail
raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name); raLivenessRange* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), splitIndex, subrange->end.index);
raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index);
// copy locations // copy locations
for (auto& location : subrange->list_locations) for (auto& location : subrange->list_locations)
{ {
@ -312,7 +358,7 @@ raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenC
return tailSubrange; return tailSubrange;
} }
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite) void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite)
{ {
if (subrange->list_locations.empty()) if (subrange->list_locations.empty())
{ {
@ -339,13 +385,12 @@ sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment)
return v*v; // 25, 100, 225, 400 return v*v; // 25, 100, 225, 400
} }
// calculate cost of entire range // calculate cost of entire range cluster
// ignores data flow and does not detect avoidable reads/stores sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
{ {
sint32 cost = 0; sint32 cost = 0;
// todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code).
// currently we calculate the cost based on the most expensive entry/exit point // currently we calculate the cost based on the most expensive entry/exit point
@ -354,7 +399,7 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
sint32 readCount = 0; sint32 readCount = 0;
sint32 writeCount = 0; sint32 writeCount = 0;
for (auto& subrange : range->list_subranges) for (auto& subrange : ranges)
{ {
if (subrange->start.index != RA_INTER_RANGE_START) if (subrange->start.index != RA_INTER_RANGE_START)
{ {
@ -375,10 +420,11 @@ sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range)
} }
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it // calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range) sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange)
{ {
sint32 cost = -PPCRecRARange_estimateCost(range); auto ranges = subrange->GetAllSubrangesInCluster();
for (auto& subrange : range->list_subranges) sint32 cost = -PPCRecRARange_estimateTotalCost(ranges);
for (auto& subrange : ranges)
{ {
if (subrange->list_locations.empty()) if (subrange->list_locations.empty())
continue; continue;
@ -387,7 +433,7 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t*
return cost; return cost;
} }
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex) sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex)
{ {
// validation // validation
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT

View file

@ -1,26 +1,77 @@
#pragma once #pragma once
raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name); struct raLivenessLocation_t
raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex); {
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange); sint32 index;
void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); bool isRead;
bool isWrite;
raLivenessLocation_t() = default;
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
: index(index), isRead(isRead), isWrite(isWrite) {};
};
struct raLivenessSubrangeLink
{
struct raLivenessRange* prev;
struct raLivenessRange* next;
};
struct raLivenessRange
{
IMLSegment* imlSegment;
IMLSegmentPoint start;
IMLSegmentPoint end;
// dirty state tracking
bool _noLoad;
bool hasStore;
bool hasStoreDelayed;
// next
raLivenessRange* subrangeBranchTaken;
raLivenessRange* subrangeBranchNotTaken;
// reverse counterpart of BranchTaken/BranchNotTaken
boost::container::small_vector<raLivenessRange*, 4> previousRanges;
// processing
uint32 lastIterationIndex;
// instruction locations
std::vector<raLivenessLocation_t> list_locations;
// linked list (subranges with same GPR virtual register)
raLivenessSubrangeLink link_sameVirtualRegister;
// linked list (all subranges for this segment)
raLivenessSubrangeLink link_allSegmentRanges;
// register mapping (constant)
IMLRegID virtualRegister;
IMLName name;
// register allocator result
sint32 physicalRegister;
boost::container::small_vector<raLivenessRange*, 32> GetAllSubrangesInCluster();
IMLRegID GetVirtualRegister() const;
sint32 GetPhysicalRegister() const;
IMLName GetName() const;
void SetPhysicalRegister(sint32 physicalRegister);
void SetPhysicalRegisterForCluster(sint32 physicalRegister);
};
raLivenessRange* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, sint32 startIndex, sint32 endIndex);
void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange);
void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext);
void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange); void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange);
void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range);
void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange); void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange);
raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false); raLivenessRange* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, sint32 splitIndex, bool trimToHole = false);
void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite); void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 index, bool isRead, bool isWrite);
void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange); void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
// cost estimation // cost estimation
sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment); sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment);
sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range); sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange);
sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range); sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex);
// special values to mark the index of ranges that reach across the segment border // special values to mark the index of ranges that reach across the segment border
#define RA_INTER_RANGE_START (-1) #define RA_INTER_RANGE_START (-1)

View file

@ -1,6 +1,8 @@
#pragma once #pragma once
#include "IMLInstruction.h" #include "IMLInstruction.h"
#include <boost/container/small_vector.hpp>
struct IMLSegmentPoint struct IMLSegmentPoint
{ {
sint32 index; sint32 index;
@ -9,63 +11,14 @@ struct IMLSegmentPoint
IMLSegmentPoint* prev; IMLSegmentPoint* prev;
}; };
struct raLivenessLocation_t
{
sint32 index;
bool isRead;
bool isWrite;
raLivenessLocation_t() = default;
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
: index(index), isRead(isRead), isWrite(isWrite) {};
};
struct raLivenessSubrangeLink_t
{
struct raLivenessSubrange_t* prev;
struct raLivenessSubrange_t* next;
};
struct raLivenessSubrange_t
{
struct raLivenessRange_t* range;
IMLSegment* imlSegment;
IMLSegmentPoint start;
IMLSegmentPoint end;
// dirty state tracking
bool _noLoad;
bool hasStore;
bool hasStoreDelayed;
// next
raLivenessSubrange_t* subrangeBranchTaken;
raLivenessSubrange_t* subrangeBranchNotTaken;
// processing
uint32 lastIterationIndex;
// instruction locations
std::vector<raLivenessLocation_t> list_locations;
// linked list (subranges with same GPR virtual register)
raLivenessSubrangeLink_t link_sameVirtualRegisterGPR;
// linked list (all subranges for this segment)
raLivenessSubrangeLink_t link_segmentSubrangesGPR;
};
struct raLivenessRange_t
{
IMLRegID virtualRegister;
sint32 physicalRegister;
IMLName name;
std::vector<raLivenessSubrange_t*> list_subranges;
};
struct PPCSegmentRegisterAllocatorInfo_t struct PPCSegmentRegisterAllocatorInfo_t
{ {
// used during loop detection // used during loop detection
bool isPartOfProcessedLoop{}; bool isPartOfProcessedLoop{};
sint32 lastIterationIndex{}; sint32 lastIterationIndex{};
// linked lists // linked lists
raLivenessSubrange_t* linkedList_allSubranges{}; struct raLivenessRange* linkedList_allSubranges{};
std::unordered_map<IMLRegID, raLivenessSubrange_t*> linkedList_perVirtualGPR2; std::unordered_map<IMLRegID, struct raLivenessRange*> linkedList_perVirtualRegister;
}; };
struct IMLSegment struct IMLSegment

View file

@ -16,6 +16,7 @@
#include "IML/IML.h" #include "IML/IML.h"
#include "IML/IMLRegisterAllocator.h" #include "IML/IMLRegisterAllocator.h"
#include "BackendX64/BackendX64.h" #include "BackendX64/BackendX64.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
struct PPCInvalidationRange struct PPCInvalidationRange
{ {
@ -157,6 +158,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcAddress = range.startAddress;
ppcRecFunc->ppcSize = range.length; ppcRecFunc->ppcSize = range.length;
BenchmarkTimer bt;
bt.Start();
// generate intermediate code // generate intermediate code
ppcImlGenContext_t ppcImlGenContext = { 0 }; ppcImlGenContext_t ppcImlGenContext = { 0 };
ppcImlGenContext.debug_entryPPCAddress = range.startAddress; ppcImlGenContext.debug_entryPPCAddress = range.startAddress;
@ -240,9 +244,18 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); entryPointsOut.emplace_back(ppcEnterOffset, x64Offset);
} }
bt.Stop();
//cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size()); //cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {} Entrypoints: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size(), entryPointsOut.size());
cemuLog_logDebug(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code); uint32 codeHash = 0;
for (uint32 i = 0; i < ppcRecFunc->x86Size; i++)
{
codeHash = _rotr(codeHash, 3);
codeHash += ((uint8*)ppcRecFunc->x86Code)[i];
}
//cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash);
return ppcRecFunc; return ppcRecFunc;
} }

View file

@ -54,11 +54,6 @@ struct ppcImlGenContext_t
std::vector<IMLSegment*> segmentList2; std::vector<IMLSegment*> segmentList2;
// code generation control // code generation control
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
// register allocator info
struct
{
std::vector<raLivenessRange_t*> list_ranges;
}raInfo;
// analysis info // analysis info
struct struct
{ {