diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index b3ae45c3..e66e1424 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -91,13 +91,15 @@ namespace Espresso BCCTR = 528 }; - enum class OPCODE_31 + enum class Opcode31 { - + TW = 4, + MFTB = 371, }; inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); }; inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); }; + inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); }; struct BOField { @@ -132,6 +134,12 @@ namespace Espresso uint8 bo; }; + // returns true if LK bit is set, only valid for branch instructions + inline bool DecodeLK(uint32 opcode) + { + return (opcode & 1) != 0; + } + inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK) { LI = opcode & 0x3fffffc; @@ -183,13 +191,7 @@ namespace Espresso _decodeForm_D_branch(opcode, BD, BO, BI, AA, LK); } - inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) - { - // form XL (with BD field expected to be zero) - _decodeForm_XL(opcode, BO, BI, LK); - } - - inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) + inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR { // form XL (with BD field expected to be zero) _decodeForm_XL(opcode, BO, BI, LK); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index b69d70a0..5080421e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -24,15 +24,7 @@ sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping */ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) { - if( x64GenContext->relocateOffsetTableCount >= x64GenContext->relocateOffsetTableSize ) - { - x64GenContext->relocateOffsetTableSize = std::max(4, x64GenContext->relocateOffsetTableSize*2); - x64GenContext->relocateOffsetTable = (x64RelocEntry_t*)realloc(x64GenContext->relocateOffsetTable, sizeof(x64RelocEntry_t)*x64GenContext->relocateOffsetTableSize); - } - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].offset = x64GenContext->codeBufferIndex; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].type = type; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].extraInfo = extraInfo; - x64GenContext->relocateOffsetTableCount++; + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->codeBufferIndex, type, extraInfo); } /* @@ -306,6 +298,9 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) { + // according to MS ABI the caller needs to save: + // RAX, RCX, RDX, R8, R9, R10, R11 + uint32 ppcAddress = imlInstruction->op_macro.param; uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; @@ -321,7 +316,7 @@ bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, // reserve space on stack for call parameters x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8); x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); - // call HLE function + // call function if( sprId == SPR_TBL ) x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL); else if( sprId == SPR_TBU ) @@ -1971,6 +1966,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("PPCRecompilerX64Gen_imlInstruction_conditionalJump(): Failed on deprecated jump method\n"); + return false; + } + if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) { // jump always @@ -1985,19 +1986,25 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec else { // deprecated (jump to jumpmark) + __debugbreak(); // deprecated PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); x64Gen_jmp_imm32(x64GenContext, 0); } } else { - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - assert_dbg(); + if (!imlInstruction->op_conditionalJump.jumpAccordingToSegment) + { + debug_printf("Unsupported deprecated cjump to ppc address\n"); + return false; + } + cemu_assert_debug(imlSegment->nextSegmentBranchTaken); + // generate jump update marker if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) { // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); sint32 condition = imlInstruction->op_conditionalJump.condition; if( condition == PPCREC_JUMP_CONDITION_E ) x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); @@ -2015,19 +2022,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); return true; } @@ -2036,19 +2043,19 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } @@ -2057,26 +2064,28 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec { if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); return true; } else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); return true; } } + cemu_assert_debug(false); // should not reach? } x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); + cemu_assert_debug(imlSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, (void*)imlSegment->GetBranchTaken()); if( imlInstruction->op_conditionalJump.bitMustBeSet ) { x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); @@ -2094,13 +2103,14 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) - // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write) + // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC // BT x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); + cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, x64GenContext->currentSegment->GetBranchTaken()); + x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); return true; } @@ -2152,22 +2162,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppc void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; - // generate code - if( ppcImlGenContext->hasFPUInstruction ) - { - // old FPU unavailable code - //PPCRecompilerX86_crConditionFlags_saveBeforeOverwrite(PPCRecFunction, ppcImlGenContext, x64GenContext); - //// skip if FP bit in MSR is set - //// #define MSR_FP (1<<13) - //x64Gen_bt_mem8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, msr), 13); - //uint32 jmpCodeOffset = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_CARRY, 0); - //x64Gen_mov_reg32_imm32(x64GenContext, REG_EAX, imlInstruction->op_ppcEnter.ppcAddress&0x7FFFFFFF); - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X86_RELOC_MAKE_RELATIVE); - //x64Gen_jmp_imm32(x64GenContext, (uint32)PPCRecompiler_recompilerCallEscapeAndCallFPUUnavailable); - //// patch jump - //*(uint32*)(x64GenContext->codeBuffer+jmpCodeOffset+2) = x64GenContext->codeBufferIndex-jmpCodeOffset-6; - } } void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) @@ -2193,7 +2187,6 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, } else assert_dbg(); - //x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr)+sizeof(uint32)*(name-PPCREC_NAME_SPR0)); } else assert_dbg(); @@ -2256,7 +2249,7 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2266,6 +2259,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo bool codeGenerationFailed = false; for (IMLSegment* segIt : ppcImlGenContext->segmentList2) { + x64GenContext.currentSegment = segIt; segIt->x64Offset = x64GenContext.codeBufferIndex; for(size_t i=0; iimlList.size(); i++) { @@ -2442,48 +2436,43 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo if( codeGenerationFailed ) { free(x64GenContext.codeBuffer); - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); return false; } // allocate executable memory uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); size_t baseAddress = (size_t)executableMemory; // fix relocs - for(sint32 i=0; isegmentList2) - { - if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) - { - x64Offset = segIt->x64Offset; - break; - } - } - if (x64Offset == 0xFFFFFFFF) - { - debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // todo: Cleanup - return false; - } + cemu_assert_suspicious(); + //for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) + // { + // x64Offset = segIt->x64Offset; + // break; + // } + //} + //if (x64Offset == 0xFFFFFFFF) + //{ + // debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); + // // todo: Cleanup + // return false; + //} } else { - IMLSegment* destSegment = (IMLSegment*)x64GenContext.relocateOffsetTable[i].extraInfo; + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; x64Offset = destSegment->x64Offset; } - uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset; + uint32 relocBase = relocIt.offset; uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) { @@ -2525,8 +2514,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); free(x64GenContext.codeBuffer); x64GenContext.codeBuffer = nullptr; - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); // set code PPCRecFunction->x86Code = executableMemory; PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; @@ -2535,7 +2522,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo void PPCRecompilerX64Gen_generateEnterRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 1024; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; @@ -2615,7 +2602,7 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { - x64GenContext_t x64GenContext = {0}; + x64GenContext_t x64GenContext{}; x64GenContext.codeBufferSize = 128; x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); x64GenContext.codeBufferIndex = 0; diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 60cc1e2a..0548f402 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -3,6 +3,8 @@ struct x64RelocEntry_t { + x64RelocEntry_t(uint32 offset, uint8 type, void* extraInfo) : offset(offset), type(type), extraInfo(extraInfo) {}; + uint32 offset; uint8 type; void* extraInfo; @@ -10,16 +12,16 @@ struct x64RelocEntry_t struct x64GenContext_t { - uint8* codeBuffer; - sint32 codeBufferIndex; - sint32 codeBufferSize; + IMLSegment* currentSegment{}; + + uint8* codeBuffer{}; + sint32 codeBufferIndex{}; + sint32 codeBufferSize{}; // cr state - sint32 activeCRRegister; // current x86 condition flags reflect this cr* register - sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned) + sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register + sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) // relocate offsets - x64RelocEntry_t* relocateOffsetTable; - sint32 relocateOffsetTableSize; - sint32 relocateOffsetTableCount; + std::vector relocateOffsetTable2; }; // Some of these are defined by winnt.h and gnu headers @@ -126,7 +128,6 @@ enum #define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) #define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) -#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction #define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset #define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 3abecb75..33ff52ac 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -5,11 +5,6 @@ // mulx, rorx, sarx, shlx, shrx // PDEP, PEXT -void x64Gen_checkBuffer(x64GenContext_t* x64GenContext) -{ - // todo -} - void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp index e936f1d8..b4021931 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp @@ -203,7 +203,6 @@ template void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB) { static_assert(TA::getType() == MODRM_OPR_TYPE::REG); - x64Gen_checkBuffer(x64GenContext); // REX prefix // 0100 WRXB if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h index 06f39815..72a2d3f5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -28,5 +28,5 @@ void PPCRecompiler_reorderConditionModifyInstructions(struct ppcImlGenContext_t* void IMLRegisterAllocator_AllocateRegisters(struct ppcImlGenContext_t* ppcImlGenContext); // debug -void IMLDebug_DumpSegment(struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, struct IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 69d8e1b7..560f5de1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -104,31 +104,48 @@ void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* iml } } -void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) +std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) +{ + if (!ctx) + { + return ""; + } + // find segment index + for (size_t i = 0; i < ctx->segmentList2.size(); i++) + { + if (ctx->segmentList2[i] == seg) + { + return fmt::format("Seg{:04x}", i); + } + } + return ""; +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) { StringBuf strOutput(1024); - strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); if (imlSegment->isEnterable) { strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); } - else if (imlSegment->isJumpDestination) - { - strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - } + //else if (imlSegment->isJumpDestination) + //{ + // strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); + //} debug_printf("%s\n", strOutput.c_str()); - strOutput.reset(); - strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - debug_printf("%s", strOutput.c_str()); + //strOutput.reset(); + //strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); + //debug_printf("%s", strOutput.c_str()); if (printLivenessRangeInfo) { IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); } - debug_printf("\n"); + //debug_printf("\n"); sint32 lineOffsetParameters = 18; @@ -376,22 +393,22 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}", inst.op_fpr_r_r.registerResult, inst.op_fpr_r_r.registerOperand); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r_r.registerResult, inst.op_fpr_r_r_r_r.registerOperandA, inst.op_fpr_r_r_r_r.registerOperandB, inst.op_fpr_r_r_r_r.registerOperandC); } else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) { - strOutput.addFmt("{:-6} ", IMLDebug_GetOpcodeName(&inst)); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); + strOutput.addFmt("{:>6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("fpr{:02}, fpr{:02}, fpr{:02}", inst.op_fpr_r_r_r.registerResult, inst.op_fpr_r_r_r.registerOperandA, inst.op_fpr_r_r_r.registerOperandB); } else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", inst.op_conditionalJump.jumpmarkAddress); + strOutput.addFmt("CYCLE_CHECK\n"); } else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { @@ -451,14 +468,15 @@ void IMLDebug_DumpSegment(IMLSegment* imlSegment, sint32 segmentIndex, bool prin { if (i) debug_printf(", "); - debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); + debug_printf("%s", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); } debug_printf("\n"); - debug_printf("Links to: "); if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); + debug_printf("BranchNotTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); if (imlSegment->nextSegmentBranchTaken) - debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); + debug_printf("BranchTaken: %s\n", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); + if (imlSegment->nextSegmentIsUncertain) + debug_printf("Dynamic target\n"); debug_printf("\n"); } @@ -466,7 +484,7 @@ void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext) { for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - IMLDebug_DumpSegment(ppcImlGenContext->segmentList2[i], i); + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], false); debug_printf("\n"); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 18cf580d..c86bb610 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -152,7 +152,7 @@ enum PPCREC_IML_TYPE_R_S32, // r* (op) imm PPCREC_IML_TYPE_MACRO, PPCREC_IML_TYPE_CJUMP, // conditional jump - PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 + PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) // conditional @@ -420,6 +420,11 @@ struct IMLInstruction op_jumpmark.address = address; } + void make_debugbreak(uint32 currentPPCAddress = 0) + { + make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0); + } + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) { type = PPCREC_IML_TYPE_MACRO; @@ -431,6 +436,7 @@ struct IMLInstruction void make_ppcEnter(uint32 ppcAddress) { + cemu_assert_suspicious(); // removed type = PPCREC_IML_TYPE_PPC_ENTER; operation = 0; op_ppcEnter.ppcAddress = ppcAddress; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 08d776e7..b90aa9b1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -74,44 +74,44 @@ void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* iml } } -typedef struct -{ - sint32 name; - sint32 virtualRegister; - sint32 physicalRegister; - bool isDirty; -}raRegisterState_t; +//typedef struct +//{ +// sint32 name; +// sint32 virtualRegister; +// sint32 physicalRegister; +// bool isDirty; +//}raRegisterState_t; -const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; - -raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].virtualRegister == virtualRegister) - { -#ifdef CEMU_DEBUG_ASSERT - if (regState[i].physicalRegister < 0) - assert_dbg(); -#endif - return regState + i; - } - } - return nullptr; -} - -raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].physicalRegister < 0) - { - regState[i].physicalRegister = i; - return regState + i; - } - } - return nullptr; -} +//const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; +// +//raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].virtualRegister == virtualRegister) +// { +//#ifdef CEMU_DEBUG_ASSERT +// if (regState[i].physicalRegister < 0) +// assert_dbg(); +//#endif +// return regState + i; +// } +// } +// return nullptr; +//} +// +//raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) +//{ +// for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) +// { +// if (regState[i].physicalRegister < 0) +// { +// regState[i].physicalRegister = i; +// return regState + i; +// } +// } +// return nullptr; +//} typedef struct { @@ -309,18 +309,32 @@ void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) #endif } +void PPCRecRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ + // this works as a pre-pass to actual register allocation. Assigning registers in advance based on fixed requirements (e.g. calling conventions and operations with fixed-reg input/output like x86 DIV/MUL) + // algorithm goes as follows: + // 1) Iterate all instructions from beginning to end and keep a list of covering ranges + // 2) If we encounter an instruction with a fixed-register we: + // 2.0) Check if there are any other ranges already using the same fixed-register and if yes, we split them and unassign the register for any follow-up instructions just prior to the current instruction + // 2.1) For inputs: Split the range that needs to be assigned a phys reg on the current instruction. Basically creating a 1-instruction long subrange that we can assign the physical register. RA will then schedule register allocation around that and avoid moves + // 2.2) For outputs: Split the range that needs to be assigned a phys reg on the current instruction + // Q: What if a specific fixed-register is used both for input and output and thus is destructive? A: Create temporary range + // Q: What if we have 3 different inputs that are all the same virtual register? A: Create temporary range + // Q: Assuming the above is implemented, do we even support overlapping two ranges of separate virtual regs on the same phys register? In theory the RA shouldn't care + // assume imlSegment->raInfo.linkedList_allSubranges is ordered ascending by start index already + + // todo +} + bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) { - // sort subranges ascending by start index - - //std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr); _sortSegmentAllSubrangesLinkedList(imlSegment); + PPCRecRA_HandleFixedRegisters(ppcImlGenContext, imlSegment); + raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; - //sint32 subrangeIndex = 0; - //for (auto& subrange : imlSegment->raInfo.list_subranges) raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) { @@ -365,7 +379,7 @@ bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSe subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; continue; } - // find free register + // find free register for this segment uint32 physRegisterMask = (1<range); physRegisterMask &= allowedPhysRegisterMask; } @@ -761,7 +776,6 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - //for (auto& subrange : imlSegment->raInfo.list_subranges) while(subrangeItr) { if (subrangeItr->start.index == RA_INTER_RANGE_START) @@ -933,7 +947,7 @@ void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code @@ -985,7 +999,7 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext) { - PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); + PPCRecompilerImm_reshapeForRegisterAllocation(ppcImlGenContext); ppcImlGenContext->raInfo.list_ranges = std::vector(); @@ -1243,7 +1257,6 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe if (remainingScanDist <= 0) return; // can't reach end - // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp index 4882a0a1..2b2c56a2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -1,6 +1,13 @@ #include "IMLInstruction.h" #include "IMLSegment.h" +void IMLSegment::SetEnterable(uint32 enterAddress) +{ + cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress); + isEnterable = true; + enterPPCAddress = enterAddress; +} + bool IMLSegment::HasSuffixInstruction() const { if (imlList.empty()) @@ -16,8 +23,30 @@ IMLInstruction* IMLSegment::GetLastInstruction() return &imlList.back(); } +void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchNotTaken) + nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this)); + nextSegmentBranchNotTaken = imlSegmentDst; + if(imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchTaken) + nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this)); + nextSegmentBranchTaken = imlSegmentDst; + if (imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} +IMLInstruction* IMLSegment::AppendInstruction() +{ + IMLInstruction& inst = imlList.emplace_back(); + memset(&inst, 0, sizeof(IMLInstruction)); + return &inst; +} void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) { diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 1e27d303..7ea7903b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -103,8 +103,8 @@ struct IMLSegment bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) uint32 enterPPCAddress{}; // used if isEnterable is true // jump destination segments - bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - uint32 jumpDestinationPPCAddress{}; + //bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps + //uint32 jumpDestinationPPCAddress{}; // PPC FPR use mask bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR // CR use mask @@ -115,10 +115,30 @@ struct IMLSegment PPCSegmentRegisterAllocatorInfo_t raInfo{}; PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; - // segment points - IMLSegmentPoint* segmentPointList{}; + + // segment state API + void SetEnterable(uint32 enterAddress); + void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); + void SetLinkBranchTaken(IMLSegment* imlSegmentDst); + + IMLSegment* GetBranchTaken() + { + return nextSegmentBranchTaken; + } + + IMLSegment* GetBranchNotTaken() + { + return nextSegmentBranchNotTaken; + } + + // instruction API + IMLInstruction* AppendInstruction(); + bool HasSuffixInstruction() const; IMLInstruction* GetLastInstruction(); + + // segment points + IMLSegmentPoint* segmentPointList{}; }; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h index e558292b..96b5143e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h @@ -21,6 +21,16 @@ public: }; public: + ~PPCFunctionBoundaryTracker() + { + while (!map_ranges.empty()) + { + PPCRange_t* range = *map_ranges.begin(); + delete range; + map_ranges.erase(map_ranges.begin()); + } + } + void trackStartPoint(MPTR startAddress) { processRange(startAddress, nullptr, nullptr); @@ -40,10 +50,34 @@ public: return false; } + std::vector GetRanges() + { + std::vector r; + for (auto& it : map_ranges) + r.emplace_back(*it); + return r; + } + + bool ContainsAddress(uint32 addr) const + { + for (auto& it : map_ranges) + { + if (addr >= it->startAddress && addr < it->getEndAddress()) + return true; + } + return false; + } + + const std::set& GetBranchTargets() const + { + return map_branchTargetsAll; + } + private: void addBranchDestination(PPCRange_t* sourceRange, MPTR address) { - map_branchTargets.emplace(address); + map_queuedBranchTargets.emplace(address); + map_branchTargetsAll.emplace(address); } // process flow of instruction @@ -114,7 +148,7 @@ private: Espresso::BOField BO; uint32 BI; bool LK; - Espresso::decodeOp_BCLR(opcode, BO, BI, LK); + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); if (BO.branchAlways() && !LK) { // unconditional BLR @@ -218,7 +252,7 @@ private: auto rangeItr = map_ranges.begin(); PPCRange_t* previousRange = nullptr; - for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); ) + for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); ) { while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr)) { @@ -239,7 +273,7 @@ private: (*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length)) { // delete visited targets - targetItr = map_branchTargets.erase(targetItr); + targetItr = map_queuedBranchTargets.erase(targetItr); continue; } @@ -289,5 +323,6 @@ private: }; std::set map_ranges; - std::set map_branchTargets; + std::set map_queuedBranchTargets; + std::set map_branchTargetsAll; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 9ff113b1..8ec2f545 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -131,7 +131,7 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress) } bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext); -PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut) +PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker) { if (range.startAddress >= PPC_REC_CODE_AREA_END) { @@ -156,10 +156,10 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; - + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; - bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses); + bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { delete ppcRecFunc; @@ -173,6 +173,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } + //if (ppcRecFunc->ppcAddress == 0x12345678) + //{ + // debug_printf("----------------------------------------\n"); + // IMLDebug_Dump(&ppcImlGenContext); + // __debugbreak(); + //} + + // Large functions for testing (botw): + // 3B4049C + // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) @@ -181,6 +191,9 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP } // collect list of PPC-->x64 entry points + cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); + cemu_assert_debug(ppcImlGenContext.imlListCount == 0); + entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { @@ -359,7 +372,7 @@ void PPCRecompiler_recompileAtAddress(uint32 address) PPCRecompilerState.recompilerSpinlock.unlock(); std::vector> functionEntryPoints; - auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints); + auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries); if (!func) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index e943d8d3..10cd0aa0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -31,9 +31,12 @@ struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct pp struct ppcImlGenContext_t { + class PPCFunctionBoundaryTracker* boundaryTracker; PPCRecFunction_t* functionRef; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; + IMLSegment* currentOutputSegment; + struct PPCBasicBlockInfo* currentBasicBlock{}; // fpr mode bool LSQE{ true }; bool PSE{ true }; @@ -82,6 +85,31 @@ struct ppcImlGenContext_t { return *PPCRecompilerImlGen_generateNewEmptyInstruction(this); } + + IMLSegment* NewSegment() + { + IMLSegment* seg = new IMLSegment(); + segmentList2.emplace_back(seg); + return seg; + } + + size_t GetSegmentIndex(IMLSegment* seg) + { + for (size_t i = 0; i < segmentList2.size(); i++) + { + if (segmentList2[i] == seg) + return i; + } + cemu_assert_error(); + return 0; + } + + IMLSegment* InsertSegment(size_t index) + { + IMLSegment* newSeg = new IMLSegment(); + segmentList2.insert(segmentList2.begin() + index, 1, newSeg); + return newSeg; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 3b8783f5..0521c440 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,7 +1,12 @@ #define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker); + +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); + +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index a1cb6f2e..6d488b17 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1,28 +1,74 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "Cafe/HW/Espresso/EspressoISA.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "IML/IML.h" #include "IML/IMLRegisterAllocatorRanges.h" +#include "PPCFunctionBoundaryTracker.h" + +struct PPCBasicBlockInfo +{ + PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress) + { + isEnterable = entryAddresses.find(startAddress) != entryAddresses.end(); + } + + uint32 startAddress; + uint32 lastAddress; // inclusive + bool isEnterable{ false }; + //uint32 enterableAddress{}; -> covered by startAddress + bool hasContinuedFlow{ true }; // non-branch path goes to next segment (lastAddress+4), assumed by default + bool hasBranchTarget{ false }; + uint32 branchTarget{}; + + // associated IML segments + IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments + IMLSegment* appendSegment{}; // last segment in chain, new instructions should be appended to this segment + + void SetInitialSegment(IMLSegment* seg) + { + cemu_assert_debug(!firstSegment); + cemu_assert_debug(!appendSegment); + firstSegment = seg; + appendSegment = seg; + } + + IMLSegment* GetFirstSegmentInChain() + { + return firstSegment; + } + + IMLSegment* GetSegmentForInstructionAppend() + { + return appendSegment; + } +}; bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset); IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) { - if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) - { - sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); - ppcImlGenContext->imlListSize = newSize; - } - IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - memset(imlInstruction, 0x00, sizeof(IMLInstruction)); - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - ppcImlGenContext->imlListCount++; - return imlInstruction; + //if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) + //{ + // sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; + // ppcImlGenContext->imlList = (IMLInstruction*)realloc(ppcImlGenContext->imlList, sizeof(IMLInstruction)*newSize); + // ppcImlGenContext->imlListSize = newSize; + //} + //IMLInstruction* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; + //memset(imlInstruction, 0x00, sizeof(IMLInstruction)); + //imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default + //imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + //ppcImlGenContext->imlListCount++; + //return imlInstruction; + + IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); + memset(&inst, 0x00, sizeof(IMLInstruction)); + inst.crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default +//imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; + + return &inst; } void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) @@ -109,6 +155,8 @@ void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenConte void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 jumpmarkAddress) { + __debugbreak(); + // jump if (imlInstruction == NULL) imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); @@ -168,10 +216,13 @@ void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGen void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) { + __debugbreak(); + // conditional jump IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_CJUMP; imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; imlInstruction->op_conditionalJump.condition = jumpCondition; imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; @@ -179,6 +230,19 @@ void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; } +void PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) +{ + // conditional jump + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + imlInstruction->type = PPCREC_IML_TYPE_CJUMP; + imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; + imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; + imlInstruction->op_conditionalJump.condition = jumpCondition; + imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; + imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; + imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; +} + void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { // load from memory @@ -363,7 +427,13 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // split before and after to make sure the macro is in an isolated segment that we can make enterable + PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction); PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + middleSeg->SetLinkBranchTaken(nullptr); + middleSeg->SetLinkBranchNotTaken(nullptr); } bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -417,6 +487,9 @@ bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + printf("PPCRecompilerImlGen_MFTB(): Not supported\n"); + return false; + uint32 rD, spr1, spr2, spr; PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); spr = spr1 | (spr2<<5); @@ -426,6 +499,8 @@ bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod // TBL / TBU uint32 param2 = spr | (rD << 16); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + return true; } return false; @@ -560,7 +635,7 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin ppcImlGenContext->cyclesSinceLastBranch++; if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) { - assert_dbg(); + cemu_assert_suspicious(); } } // add range @@ -582,33 +657,17 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { // function call - // check if function can be inlined - sint32 inlineFuncInstructionCount = 0; - if (PPCRecompiler_canInlineFunction(jumpAddressDest, &inlineFuncInstructionCount)) - { - // generate NOP iml instead of BL macro (this assures that segment PPC range remains intact) - PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext, NULL); - //cemuLog_log(LogType::Force, "Inline func 0x{:08x} at {:08x}", jumpAddressDest, ppcImlGenContext->ppcAddressOfCurrentInstruction); - uint32* prevInstructionPtr = ppcImlGenContext->currentInstruction; - ppcImlGenContext->currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(jumpAddressDest); - PPCRecompiler_generateInlinedCode(ppcImlGenContext, jumpAddressDest, inlineFuncInstructionCount); - ppcImlGenContext->currentInstruction = prevInstructionPtr; - return true; - } - // generate funtion call instructions ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); return true; } // is jump destination within recompiled function? - if( jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize) ) + if( ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest) ) { - // generate instruction - PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext, NULL, jumpAddressDest); + // jump to target within same function + PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, nullptr); } else { - // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR) ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); } return true; @@ -616,6 +675,8 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); @@ -661,11 +722,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - // generate instruction - //ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); return true; } return false; @@ -678,9 +738,9 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); else - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); return true; } else @@ -688,8 +748,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition ) { // branch always, no condition and no decrementer - debugBreakpoint(); - crRegister = PPC_REC_INVALID_REGISTER; // not necessary but lets optimizer know we dont care for cr register on this instruction + // not supported + return false; } else { @@ -717,17 +777,20 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } - if (jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize)) + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, jumpCondition, crRegister, crBit, conditionMustBeTrue); + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); } else { // far jump + debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); + return false; + PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + //ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } } } @@ -736,6 +799,8 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -750,7 +815,7 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; + //bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) { cemu_assert_debug(false); @@ -760,28 +825,37 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer check + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); if( saveLR ) { ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); } else { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } else { + cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + + //debug_printf("[Rec-Disable] BCLR with condition or LR\n"); + //return false; + // store LR if( saveLR ) { + cemu_assert_unimplemented(); // todo - this is difficult to handle because it needs to jump to the unmodified LR (we should cache it in a register which we pass to the macro?) + return false; + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); } // generate jump condition - if( invertedConditionMustBeTrue ) + if(conditionMustBeTrue) { if( crBit == 0 ) jumpCondition = PPCREC_JUMP_CONDITION_L; @@ -803,9 +877,17 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod else if( crBit == 3 ) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); + + //if(conditionMustBeTrue) + // ppcImlGenContext->emitInst().make_debugbreak(ppcImlGenContext->ppcAddressOfCurrentInstruction); + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + PPCRecompilerImlGen_generateNewInstruction_conditionalJumpSegment(ppcImlGenContext, jumpCondition, crRegister, crBit, conditionMustBeTrue); + + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); } } return true; @@ -813,6 +895,8 @@ bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); @@ -826,6 +910,7 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; bool saveLR = (opcode&PPC_OPC_LK)!=0; + // since we skip this instruction if the condition is true, we need to invert the logic bool invertedConditionMustBeTrue = !conditionMustBeTrue; if( useDecrementer ) @@ -839,51 +924,63 @@ bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { if( ignoreCondition ) { - // store LR + // branch always, no condition and no decrementer if( saveLR ) { uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + } + if (saveLR) ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - ppcImlGenContext->emitInst().make_ppcEnter(ppcImlGenContext->ppcAddressOfCurrentInstruction+4); - } else - { - // branch always, no condition and no decrementer ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } } else { - // store LR - if( saveLR ) + // get jump condition + if (invertedConditionMustBeTrue) { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if( invertedConditionMustBeTrue ) - { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; } else { - if( crBit == 0 ) + if (crBit == 0) jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) + else if (crBit == 1) jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) + else if (crBit == 2) jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) + else if (crBit == 3) jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; } + + // debug checks + //if (saveLR) + // cemu_assert_debug(ppcImlGenContext->currentBasicBlock->); + + // we always store LR + if (saveLR) + { + uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction + 4) & 0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + } + + // write the BCTR instruction to a new segment that is set as a branch target for the current segment + __debugbreak(); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + + //PPCBasicBlockInfo* bctrSeg = currentBasicBlock->Get + __debugbreak(); + + // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); @@ -2915,12 +3012,6 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont return v; } -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset) -{ - uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction + offset/4)); - return v; -} - uint32 PPCRecompiler_getCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext) { uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction)); @@ -3864,268 +3955,884 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) return unsupportedInstructionFound; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses) +// returns false if code flow is not interrupted +// continueDefaultPath: Controls if +bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { - ppcImlGenContext.functionRef = ppcRecFunc; + hasBranchTarget = false; + branchTarget = 0xFFFFFFFF; + makeNextInstEnterable = false; + continueDefaultPath = false; + switch (Espresso::GetPrimaryOpcode(opcode)) + { + case Espresso::PrimaryOpcode::VIRTUAL_HLE: + { + makeNextInstEnterable = true; + hasBranchTarget = false; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::BC: + { + uint32 BD, BI; + Espresso::BOField BO; + bool AA, LK; + Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = (AA ? BD : BD) + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = true; + return true; + } + case Espresso::PrimaryOpcode::B: + { + uint32 LI; + bool AA, LK; + Espresso::decodeOp_B(opcode, LI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = AA ? LI : LI + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::GROUP_19: + switch (Espresso::GetGroup19Opcode(opcode)) + { + //case Espresso::Opcode19::BCLR: + ////case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; // todo - set this to true if this instruction has a condition (including decrementer check) + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + + case Espresso::Opcode19::BCLR: + case Espresso::Opcode19::BCCTR: + { + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); + continueDefaultPath = !BO.conditionIgnore() || !BO.decrementerIgnore(); // if branch is always taken then there is no continued path + makeNextInstEnterable = Espresso::DecodeLK(opcode); + return true; + } + default: + break; + } + break; + case Espresso::PrimaryOpcode::GROUP_31: + switch (Espresso::GetGroup31Opcode(opcode)) + { + //case Espresso::Opcode31::TW: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode31::MFTB: + // continueDefaultPath = true; + // return true; + //case Espresso::Opcode19::BCLR: + //case Espresso::Opcode19::BCCTR: + //{ + // continueDefaultPath = false; + // makeNextInstEnterable = Espresso::DecodeLK(opcode); + // return true; + //} + default: + break; + } + break; + default: + break; + } + return false; +} + +void PPCRecompiler_DetermineBasicBlockRange(std::vector& basicBlockList, PPCFunctionBoundaryTracker& boundaryTracker, uint32 ppcStart, uint32 ppcEnd, const std::set& combinedBranchTargets, const std::set& entryAddresses) +{ + cemu_assert_debug(ppcStart <= ppcEnd); + + uint32 currentAddr = ppcStart; + + PPCBasicBlockInfo* curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + + uint32 basicBlockStart = currentAddr; + while (currentAddr <= ppcEnd) + { + curBlockInfo->lastAddress = currentAddr; + uint32 opcode = memory_readU32(currentAddr); + bool nextInstIsEnterable = false; + bool hasBranchTarget = false; + bool hasContinuedFlow = false; + uint32 branchTarget = 0; + if (PPCRecompiler_CheckIfInstructionEndsSegment(boundaryTracker, currentAddr, opcode, nextInstIsEnterable, hasContinuedFlow, hasBranchTarget, branchTarget)) + { + curBlockInfo->hasBranchTarget = hasBranchTarget; + curBlockInfo->branchTarget = branchTarget; + curBlockInfo->hasContinuedFlow = hasContinuedFlow; + // start new basic block, except if this is the last instruction + if (currentAddr >= ppcEnd) + break; + curBlockInfo = &basicBlockList.emplace_back(currentAddr + 4, entryAddresses); + curBlockInfo->isEnterable = curBlockInfo->isEnterable || nextInstIsEnterable; + currentAddr += 4; + continue; + } + currentAddr += 4; + if (currentAddr <= ppcEnd) + { + if (combinedBranchTargets.find(currentAddr) != combinedBranchTargets.end()) + { + // instruction is branch target, start new basic block + curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + } + } + + } +} + +std::vector PPCRecompiler_DetermineBasicBlockRange(PPCFunctionBoundaryTracker& boundaryTracker, const std::set& entryAddresses) +{ + cemu_assert(!entryAddresses.empty()); + std::vector basicBlockList; + + const std::set branchTargets = boundaryTracker.GetBranchTargets(); + auto funcRanges = boundaryTracker.GetRanges(); + + std::set combinedBranchTargets = branchTargets; + combinedBranchTargets.insert(entryAddresses.begin(), entryAddresses.end()); + + for (auto& funcRangeIt : funcRanges) + PPCRecompiler_DetermineBasicBlockRange(basicBlockList, boundaryTracker, funcRangeIt.startAddress, funcRangeIt.startAddress + funcRangeIt.length - 4, combinedBranchTargets, entryAddresses); + + // mark all segments that start at entryAddresses as enterable (debug code for verification, can be removed) + size_t numMarkedEnterable = 0; + for (auto& basicBlockIt : basicBlockList) + { + if (entryAddresses.find(basicBlockIt.startAddress) != entryAddresses.end()) + { + cemu_assert_debug(basicBlockIt.isEnterable); + numMarkedEnterable++; + } + } + cemu_assert_debug(numMarkedEnterable == entryAddresses.size()); + + // todo - inline BL, currently this is done in the instruction handler of BL but this will mean that instruction cycle increasing is ignored + + return basicBlockList; +} + +bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + ppcImlGenContext.currentOutputSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + ppcImlGenContext.currentInstruction = (uint32*)(memory_base + basicBlockInfo.startAddress); + + uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; + uint32* endCurrentInstruction = (uint32*)(memory_base + basicBlockInfo.lastAddress); + + while (ppcImlGenContext.currentInstruction <= endCurrentInstruction) + { + uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); + ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; + if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) + { + debug_printf("Recompiler encountered unsupported instruction at 0x%08x\n", addressOfCurrentInstruction); + ppcImlGenContext.currentOutputSegment = nullptr; + return false; + } + } + ppcImlGenContext.currentOutputSegment = nullptr; + return true; +} + +// returns split segment from which the continued segment is available via seg->GetBranchNotTaken() +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + //IMLSegment* continuedSegment = ppcImlGenContext.NewSegment(); + IMLSegment* continuedSegment = ppcImlGenContext.InsertSegment(ppcImlGenContext.GetSegmentIndex(writeSegment) + 1); + + continuedSegment->SetLinkBranchTaken(writeSegment->GetBranchTaken()); + continuedSegment->SetLinkBranchNotTaken(writeSegment->GetBranchNotTaken()); + + writeSegment->SetLinkBranchNotTaken(continuedSegment); + writeSegment->SetLinkBranchTaken(nullptr); + + if (ppcImlGenContext.currentOutputSegment == writeSegment) + ppcImlGenContext.currentOutputSegment = continuedSegment; + + cemu_assert_debug(basicBlockInfo.appendSegment == writeSegment); + basicBlockInfo.appendSegment = continuedSegment; + + return writeSegment; +} + +// generates a new segment and sets it as branch target for the current write segment. Returns the created segment +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + IMLSegment* branchTargetSegment = ppcImlGenContext.NewSegment(); + cemu_assert_debug(!writeSegment->GetBranchTaken()); // must not have a target already + writeSegment->SetLinkBranchTaken(branchTargetSegment); + return branchTargetSegment; +} + +// verify that current instruction is the last instruction of the active basic block +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext) +{ + cemu_assert_debug(ppcImlGenContext.currentBasicBlock->lastAddress == ppcImlGenContext.ppcAddressOfCurrentInstruction); +} + +void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); + //if (imlSegment->imlList.empty()) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // return; + //if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // return; + if (!basicBlockInfo.hasBranchTarget) + return; + if (basicBlockInfo.branchTarget >= basicBlockInfo.startAddress) + return; + + // exclude non-infinite tight loops + if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + return; + // potential loop segment found, split this segment into four: + // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // P1: This segment contains the ppc_leave instruction + // P2: This segment contains the iml instructions of the original segment + // PEntry: This segment is used to enter the function, it jumps to P0 + // All segments are considered to be part of the same PPC instruction range + // The first segment also retains the jump destination and enterable properties from the original segment. + //debug_printf("--- Insert cycle counter check ---\n"); + + + // make the segment enterable so execution can return after checking + basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); + + IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); + + // what we know about the crash: + // It doesnt happen with cycle checks disabled + // The debugbreak emitted here is only encountered twice before it crashes + // it doesnt seem to go into the alternative branch (cycles negative) -> tested (debugbreak in exit segment doesnt trigger) + // Its the enterable segment that causes issues? -> I removed the enterable statement and it still happened + // Maybe some general issue with getting x64 offsets for enterable segments.. + + // possible explanations: + // issue with the cycle check / exit logic + // returning from exit is causing the issue + // Segments can get marked as jump destination which we no longer do -> Deleted old code and added asserts + + IMLInstruction* inst = splitSeg->AppendInstruction(); + inst->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + inst->operation = 0; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_conditionalJump.jumpmarkAddress = 0xFFFFFFFF; + inst->associatedPPCAddress = 0xFFFFFFFF; + // PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK + + //splitSeg->AppendInstruction()->make_macro(PPCREC_IML_TYPE_MACRO, ) + + IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); + splitSeg->SetLinkBranchTaken(exitSegment); + + + //exitSegment->AppendInstruction()->make_debugbreak(); + + inst = exitSegment->AppendInstruction();// ->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress); + inst->type = PPCREC_IML_TYPE_MACRO; + inst->operation = PPCREC_IML_MACRO_LEAVE; + inst->crRegister = PPC_REC_INVALID_REGISTER; + inst->op_macro.param = basicBlockInfo.startAddress; + inst->associatedPPCAddress = basicBlockInfo.startAddress; + + + //debug_printf("----------------------------------------\n"); + //IMLDebug_Dump(&ppcImlGenContext); + //__debugbreak(); + + //ppcImlGenContext.NewSegment(); + + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + //imlSegment = NULL; + //IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + //IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + //IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + //// create entry point segment + //PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + //IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + //// relink segments + //IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + //IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + //IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + //IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + //// update segments + //uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + //if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddress = 0xFFFFFFFF; + //imlSegmentP1->ppcAddress = 0xFFFFFFFF; + //imlSegmentP2->ppcAddress = 0xFFFFFFFF; + //cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + //// move segment properties from segment P2 to segment P0 + //imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + //imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->isEnterable = false; + ////imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + //imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + //imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + //imlSegmentP2->isJumpDestination = false; + //imlSegmentP2->jumpDestinationPPCAddress = 0; + //imlSegmentP2->isEnterable = false; + //imlSegmentP2->enterPPCAddress = 0; + //imlSegmentP2->ppcAddrMin = 0; + //imlSegmentP2->ppcAddrMax = 0; + //// setup enterable segment + //if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + //{ + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + //} + //// assign new jumpmark to segment P2 + //imlSegmentP2->isJumpDestination = true; + //imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + //currentLoopEscapeJumpMarker++; + //// create ppc_leave instruction in segment P1 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + //imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + //imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + //imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + //imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// create cycle-based conditional instruction in segment P0 + //PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + //imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + //imlSegmentP0->imlList[0].operation = 0; + //imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + //imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + //imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + //// jump instruction for PEntry + //PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + //PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); +} + +void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + bool isLastSegment = segIt == ppcImlGenContext.segmentList2.back(); + //IMLSegment* nextSegment = isLastSegment ? nullptr : ppcImlGenContext->segmentList2[s + 1]; + // handle empty segment + if (segIt->imlList.empty()) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + continue; + } + // check last instruction of segment + IMLInstruction* imlInstruction = segIt->GetLastInstruction(); + if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + cemu_assert_debug(segIt->GetBranchTaken()); + if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + } + + //// find destination segment by ppc jump address + //IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + //if (jumpDestSegment) + //{ + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + //} + //else + //{ + // imlSegment->nextSegmentIsUncertain = true; + //} + } + else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + { + auto macroType = imlInstruction->operation; + switch (macroType) + { + case PPCREC_IML_MACRO_BLR: + case PPCREC_IML_MACRO_BLRL: + case PPCREC_IML_MACRO_BCTR: + case PPCREC_IML_MACRO_BCTRL: + case PPCREC_IML_MACRO_BL: + case PPCREC_IML_MACRO_B_FAR: + case PPCREC_IML_MACRO_HLE: + case PPCREC_IML_MACRO_LEAVE: + segIt->nextSegmentIsUncertain = true; + break; + case PPCREC_IML_MACRO_DEBUGBREAK: + case PPCREC_IML_MACRO_COUNT_CYCLES: + case PPCREC_IML_MACRO_MFTB: + break; + default: + cemu_assert_unimplemented(); + } + } + } +} + +bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunctionBoundaryTracker& boundaryTracker, std::set& entryAddresses) +{ + std::vector basicBlockList = PPCRecompiler_DetermineBasicBlockRange(boundaryTracker, entryAddresses); + + // create segments + std::unordered_map addrToBB; + ppcImlGenContext.segmentList2.resize(basicBlockList.size()); + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = new IMLSegment(); + seg->ppcAddress = basicBlockInfo.startAddress; + if(basicBlockInfo.isEnterable) + seg->SetEnterable(basicBlockInfo.startAddress); + ppcImlGenContext.segmentList2[i] = seg; + cemu_assert_debug(addrToBB.find(basicBlockInfo.startAddress) == addrToBB.end()); + basicBlockInfo.SetInitialSegment(seg); + addrToBB.emplace(basicBlockInfo.startAddress, &basicBlockInfo); + } + // link segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& bbInfo = basicBlockList[i]; + cemu_assert_debug(bbInfo.GetFirstSegmentInChain() == bbInfo.GetSegmentForInstructionAppend()); + IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + if (bbInfo.hasBranchTarget) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.branchTarget]; + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchTaken(seg, targetBB->GetFirstSegmentInChain()); + } + if (bbInfo.hasContinuedFlow) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.lastAddress + 4]; + if (!targetBB) + { + cemuLog_log(LogType::Recompiler, "Recompiler was unable to link segment [0x{:08x}-0x{:08x}] to 0x{:08x}", bbInfo.startAddress, bbInfo.lastAddress, bbInfo.lastAddress + 4); + return false; + } + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchNotTaken(seg, targetBB->GetFirstSegmentInChain()); + } + } + // we assume that all unreachable segments are potentially enterable + // todo - mark them as such + + + // generate cycle counters + // in theory we could generate these as part of FillBasicBlock() but in the future we might use more complex logic to emit fewer operations + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = basicBlockInfo.GetSegmentForInstructionAppend(); + + uint32 ppcInstructionCount = (basicBlockInfo.lastAddress - basicBlockInfo.startAddress + 4) / 4; + cemu_assert_debug(ppcInstructionCount > 0); + + PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); + seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; + seg->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + seg->imlList[0].op_macro.param = ppcInstructionCount; + } + + // generate cycle check instructions + // note: Introduces new segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext, basicBlockInfo); + } + + // fill in all the basic blocks + // note: This step introduces new segments as is necessary for some instructions + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + ppcImlGenContext.currentBasicBlock = &basicBlockInfo; + if (!PPCIMLGen_FillBasicBlock(ppcImlGenContext, basicBlockInfo)) + return false; + ppcImlGenContext.currentBasicBlock = nullptr; + } + + // mark segments with unknown jump destination (e.g. BLR and most macros) + PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext); + + // debug - check segment graph +#ifdef CEMU_DEBUG_ASSERT + //for (size_t i = 0; i < basicBlockList.size(); i++) + //{ + // IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + // if (seg->list_prevSegments.empty()) + // { + // cemu_assert_debug(seg->isEnterable); + // } + //} + // debug - check if suffix instructions are at the end of segments and if they are present for branching segments + for (size_t segIndex = 0; segIndex < ppcImlGenContext.segmentList2.size(); segIndex++) + { + IMLSegment* seg = ppcImlGenContext.segmentList2[segIndex]; + IMLSegment* nextSeg = (segIndex+1) < ppcImlGenContext.segmentList2.size() ? ppcImlGenContext.segmentList2[segIndex + 1] : nullptr; + + if (seg->imlList.size() > 0) + { + for (size_t f = 0; f < seg->imlList.size() - 1; f++) + { + if (seg->imlList[f].IsSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } + } + } + if (seg->nextSegmentBranchTaken) + { + if (!seg->HasSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + __debugbreak(); + } + } + if (seg->nextSegmentBranchNotTaken) + { + // if branch not taken, flow must continue to next segment in sequence + cemu_assert_debug(seg->nextSegmentBranchNotTaken == nextSeg); + } + // more detailed checks based on actual suffix instruction + if (seg->imlList.size() > 0) + { + IMLInstruction* inst = seg->GetLastInstruction(); + if (inst->type == PPCREC_IML_TYPE_MACRO && inst->op_macro.param == PPCREC_IML_MACRO_B_FAR) + { + cemu_assert_debug(!seg->GetBranchTaken()); + cemu_assert_debug(!seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + cemu_assert_debug(seg->GetBranchTaken()); + cemu_assert_debug(seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP) + { + if (inst->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + { + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) + { + debug_printf("---------------- SegmentDump (Missing branch for CJUMP in segment 0x%x):\n", segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); + } + } + else + { + // proper error checking for branch-always (or branch-never if invert bit is set) + } + } + } + //if (seg->list_prevSegments.empty()) + //{ + // cemu_assert_debug(seg->isEnterable); + //} + segIndex++; + } +#endif + + + // todos: + // - basic block determination should look for the B(L) B(L) pattern. Or maybe just mark every bb without any input segments as an entry segment + + return true; +} + +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +{ + ppcImlGenContext.functionRef = ppcRecFunc; // todo - remove this and replace internally with boundary tracker + ppcImlGenContext.boundaryTracker = &boundaryTracker; + + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; + // add entire range ppcRecRange_t recRange; recRange.ppcAddress = ppcRecFunc->ppcAddress; recRange.ppcSize = ppcRecFunc->ppcSize; ppcRecFunc->list_ranges.push_back(recRange); // process ppc instructions - ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); - bool unsupportedInstructionFound = false; - sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; - sint32 unsupportedInstructionCount = 0; - uint32 unsupportedInstructionLastOffset = 0; - uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; - uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; - - while(ppcImlGenContext.currentInstruction < endCurrentInstruction) - { - uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); - ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; - ppcImlGenContext.cyclesSinceLastBranch++; - ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); - if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) - { - // add PPCEnter for addresses that are in entryAddresses - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) - { - // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) - uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); - uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); - if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) - { - // between two B(L) instructions - // todo: for BL only if they are not inlineable - - bool canInlineFunction = false; - if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) - { - uint32 li; - PPC_OPC_TEMPL_I(opcodePrevious, li); - sint32 inlineSize = 0; - if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) - canInlineFunction = true; - } - if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) - { - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); - if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) - { - // after unconditional BCTR instruction - ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); - } - } - } - - unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); - if( unsupportedInstructionFound ) - { - unsupportedInstructionCount++; - unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; - unsupportedInstructionFound = false; - //break; - } - } - ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) - if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) - { - debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); - return false; - } - // optimize unused jumpmarks away - // first, flag all jumpmarks as unused - std::map map_jumpMarks; - for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; - } - } - // lastly, remove jumpmarks that still have the unused flag set - sint32 currentImlIndex = 0; - for(sint32 i=0; i end of segment after current instruction - // If we encounter a jumpmark -> end of segment before current instruction - // If we encounter ppc_enter -> end of segment before current instruction - if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) - { - // segment ends after current instruction - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart+1; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex+1; - } - else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || - ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // segment ends before current instruction - if( segmentImlIndex > segmentStart ) - { - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - } - segmentImlIndex++; - } - if( segmentImlIndex != segmentStart ) - { - // final segment - IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - // move iml instructions into the segments - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - uint32 imlStartIndex = segIt->startOffset; - uint32 imlCount = segIt->count; - if( imlCount > 0 ) - { - cemu_assert_debug(segIt->imlList.empty()); - segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); - - } - else - { - // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - cemu_assert_debug(segIt->imlList.empty()); - } - segIt->startOffset = 9999999; - segIt->count = 9999999; - } - // clear segment-independent iml list - free(ppcImlGenContext.imlList); - ppcImlGenContext.imlList = nullptr; - ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList - // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - uint32 segmentPPCAddrMin = 0xFFFFFFFF; - uint32 segmentPPCAddrMax = 0x00000000; - for(sint32 i=0; i< segIt->imlList.size(); i++) - { - if(segIt->imlList[i].associatedPPCAddress == 0 ) - continue; - //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) - // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); - } - if( segmentPPCAddrMin != 0xFFFFFFFF ) - { - segIt->ppcAddrMin = segmentPPCAddrMin; - segIt->ppcAddrMax = segmentPPCAddrMax; - } - else - { - segIt->ppcAddrMin = 0; - segIt->ppcAddrMax = 0; - } - } - // certain instructions can change the segment state - // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) - // jumpmarks mark the segment as a jump destination (within the same function) - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - while (segIt->imlList.size() > 0) - { - if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) - { - // mark segment as enterable - if (segIt->isEnterable) - assert_dbg(); // should not happen? - segIt->isEnterable = true; - segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; - // remove ppc_enter instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; - } - else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) - { - // mark segment as jump destination - if(segIt->isJumpDestination ) - assert_dbg(); // should not happen? - segIt->isJumpDestination = true; - segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; - // remove jumpmark instruction - segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].associatedPPCAddress = 0; - } - else - break; - } - } - // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList2[0]->isEnterable = true; - ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; - - // link segments for further inter-segment optimization - PPCRecompilerIML_linkSegments(&ppcImlGenContext); +// ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); +// bool unsupportedInstructionFound = false; +// sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; +// sint32 unsupportedInstructionCount = 0; +// uint32 unsupportedInstructionLastOffset = 0; +// uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; +// uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; +// +// while(ppcImlGenContext.currentInstruction < endCurrentInstruction) +// { +// uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); +// ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; +// ppcImlGenContext.cyclesSinceLastBranch++; +// ppcImlGenContext.emitInst().make_jumpmark(addressOfCurrentInstruction); +// if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) +// { +// // add PPCEnter for addresses that are in entryAddresses +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) +// { +// // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) +// uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); +// uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); +// if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) +// { +// // between two B(L) instructions +// // todo: for BL only if they are not inlineable +// +// bool canInlineFunction = false; +// if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) +// { +// uint32 li; +// PPC_OPC_TEMPL_I(opcodePrevious, li); +// sint32 inlineSize = 0; +// if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) +// canInlineFunction = true; +// } +// if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) +// { +// uint32 BO, BI, BD; +// PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); +// if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) +// { +// // after unconditional BCTR instruction +// ppcImlGenContext.emitInst().make_ppcEnter(addressOfCurrentInstruction); +// } +// } +// } +// +// unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); +// if( unsupportedInstructionFound ) +// { +// unsupportedInstructionCount++; +// unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; +// unsupportedInstructionFound = false; +// //break; +// } +// } +// ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) +// if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) +// { +// debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); +// return false; +// } +// // optimize unused jumpmarks away +// // first, flag all jumpmarks as unused +// std::map map_jumpMarks; +// for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; +// } +// } +// // lastly, remove jumpmarks that still have the unused flag set +// sint32 currentImlIndex = 0; +// for(sint32 i=0; i end of segment after current instruction +// // If we encounter a jumpmark -> end of segment before current instruction +// // If we encounter ppc_enter -> end of segment before current instruction +// if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || +// (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) +// { +// // segment ends after current instruction +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart+1; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex+1; +// } +// else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || +// ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) +// { +// // segment ends before current instruction +// if( segmentImlIndex > segmentStart ) +// { +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// } +// segmentImlIndex++; +// } +// if( segmentImlIndex != segmentStart ) +// { +// // final segment +// IMLSegment* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); +// ppcRecSegment->startOffset = segmentStart; +// ppcRecSegment->count = segmentImlIndex-segmentStart; +// ppcRecSegment->ppcAddress = 0xFFFFFFFF; +// segmentStart = segmentImlIndex; +// } +// // move iml instructions into the segments +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 imlStartIndex = segIt->startOffset; +// uint32 imlCount = segIt->count; +// if( imlCount > 0 ) +// { +// cemu_assert_debug(segIt->imlList.empty()); +// segIt->imlList.insert(segIt->imlList.begin(), ppcImlGenContext.imlList + imlStartIndex, ppcImlGenContext.imlList + imlStartIndex + imlCount); +// +// } +// else +// { +// // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code +// cemu_assert_debug(segIt->imlList.empty()); +// } +// segIt->startOffset = 9999999; +// segIt->count = 9999999; +// } +// // clear segment-independent iml list +// free(ppcImlGenContext.imlList); +// ppcImlGenContext.imlList = nullptr; +// ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList +// // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// uint32 segmentPPCAddrMin = 0xFFFFFFFF; +// uint32 segmentPPCAddrMax = 0x00000000; +// for(sint32 i=0; i< segIt->imlList.size(); i++) +// { +// if(segIt->imlList[i].associatedPPCAddress == 0 ) +// continue; +// //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) +// // continue; // jumpmarks and no-op instructions must not affect segment ppc address range +// segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); +// segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); +// } +// if( segmentPPCAddrMin != 0xFFFFFFFF ) +// { +// segIt->ppcAddrMin = segmentPPCAddrMin; +// segIt->ppcAddrMax = segmentPPCAddrMax; +// } +// else +// { +// segIt->ppcAddrMin = 0; +// segIt->ppcAddrMax = 0; +// } +// } +// // certain instructions can change the segment state +// // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) +// // jumpmarks mark the segment as a jump destination (within the same function) +// for (IMLSegment* segIt : ppcImlGenContext.segmentList2) +// { +// while (segIt->imlList.size() > 0) +// { +// if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) +// { +// // mark segment as enterable +// if (segIt->isEnterable) +// assert_dbg(); // should not happen? +// segIt->isEnterable = true; +// segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; +// // remove ppc_enter instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) +// { +// // mark segment as jump destination +// if(segIt->isJumpDestination ) +// assert_dbg(); // should not happen? +// segIt->isJumpDestination = true; +// segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; +// // remove jumpmark instruction +// segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; +// segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; +// segIt->imlList[0].associatedPPCAddress = 0; +// } +// else +// break; +// } +// } +// // the first segment is always enterable as the recompiled functions entrypoint +// ppcImlGenContext.segmentList2[0]->isEnterable = true; +// ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; +// +// // link segments for further inter-segment optimization +// PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) @@ -4215,129 +4922,132 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - { - if( segIt->ppcAddrMin == 0 ) - continue; - // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions - uint32 lastPPCInstAddr = 0; - uint32 ppcCount2 = 0; - for (sint32 i = 0; i < segIt->imlList.size(); i++) - { - if (segIt->imlList[i].associatedPPCAddress == 0) - continue; - if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) - continue; - lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; - ppcCount2++; - } - //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - uint32 cycleCount = ppcCount2;// ppcCount / 4; - if( cycleCount > 0 ) - { - PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); - segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; - segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - segIt->imlList[0].op_macro.param = cycleCount; - } - } + //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + //{ + // if( segIt->ppcAddrMin == 0 ) + // continue; + // // count number of PPC instructions in segment + // // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions + // uint32 lastPPCInstAddr = 0; + // uint32 ppcCount2 = 0; + // for (sint32 i = 0; i < segIt->imlList.size(); i++) + // { + // if (segIt->imlList[i].associatedPPCAddress == 0) + // continue; + // if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) + // continue; + // lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; + // ppcCount2++; + // } + // //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions + // uint32 cycleCount = ppcCount2;// ppcCount / 4; + // if( cycleCount > 0 ) + // { + // PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); + // segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + // segIt->imlList[0].op_macro.param = cycleCount; + // } + //} return true; } void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) { - // find segments that have a (conditional) jump instruction that points in reverse direction of code flow - // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) - { - // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - if (imlSegment->imlList.empty()) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - continue; - if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - continue; - // exclude non-infinite tight loops - if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) - continue; - // potential loop segment found, split this segment into four: - // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // P2: This segment contains the iml instructions of the original segment - // PEntry: This segment is used to enter the function, it jumps to P0 - // All segments are considered to be part of the same PPC instruction range - // The first segment also retains the jump destination and enterable properties from the original segment. - //debug_printf("--- Insert cycle counter check ---\n"); + return; // deprecated - PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - imlSegment = NULL; - IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - // relink segments - IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // update segments - uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - if (imlSegmentP2->isEnterable) - enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddress = 0xFFFFFFFF; - imlSegmentP1->ppcAddress = 0xFFFFFFFF; - imlSegmentP2->ppcAddress = 0xFFFFFFFF; - cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // move segment properties from segment P2 to segment P0 - imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->isEnterable = false; - //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - imlSegmentP2->isJumpDestination = false; - imlSegmentP2->jumpDestinationPPCAddress = 0; - imlSegmentP2->isEnterable = false; - imlSegmentP2->enterPPCAddress = 0; - imlSegmentP2->ppcAddrMin = 0; - imlSegmentP2->ppcAddrMax = 0; - // setup enterable segment - if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - { - imlSegmentPEntry->isEnterable = true; - imlSegmentPEntry->ppcAddress = enterPPCAddress; - imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - } - // assign new jumpmark to segment P2 - imlSegmentP2->isJumpDestination = true; - imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - currentLoopEscapeJumpMarker++; - // create ppc_leave instruction in segment P1 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // create cycle-based conditional instruction in segment P0 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - imlSegmentP0->imlList[0].operation = 0; - imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // jump instruction for PEntry - PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + //// find segments that have a (conditional) jump instruction that points in reverse direction of code flow + //// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. + //// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) + //uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located + //for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) + //{ + // // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) + // IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; + // if (imlSegment->imlList.empty()) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) + // continue; + // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) + // continue; - // skip the newly created segments - s += 2; - } + // // exclude non-infinite tight loops + // if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) + // continue; + // // potential loop segment found, split this segment into four: + // // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) + // // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. + // // P2: This segment contains the iml instructions of the original segment + // // PEntry: This segment is used to enter the function, it jumps to P0 + // // All segments are considered to be part of the same PPC instruction range + // // The first segment also retains the jump destination and enterable properties from the original segment. + // //debug_printf("--- Insert cycle counter check ---\n"); + + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); + // imlSegment = NULL; + // IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; + // IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; + // IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; + // // create entry point segment + // PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + // IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; + // // relink segments + // IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); + // IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + // IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); + // IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); + // // update segments + // uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; + // if (imlSegmentP2->isEnterable) + // enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddress = 0xFFFFFFFF; + // imlSegmentP1->ppcAddress = 0xFFFFFFFF; + // imlSegmentP2->ppcAddress = 0xFFFFFFFF; + // cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); + // // move segment properties from segment P2 to segment P0 + // imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; + // imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->isEnterable = false; + // //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; + // imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; + // imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; + // imlSegmentP2->isJumpDestination = false; + // imlSegmentP2->jumpDestinationPPCAddress = 0; + // imlSegmentP2->isEnterable = false; + // imlSegmentP2->enterPPCAddress = 0; + // imlSegmentP2->ppcAddrMin = 0; + // imlSegmentP2->ppcAddrMax = 0; + // // setup enterable segment + // if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) + // { + // imlSegmentPEntry->isEnterable = true; + // imlSegmentPEntry->ppcAddress = enterPPCAddress; + // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; + // } + // // assign new jumpmark to segment P2 + // imlSegmentP2->isJumpDestination = true; + // imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; + // currentLoopEscapeJumpMarker++; + // // create ppc_leave instruction in segment P1 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); + // imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; + // imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; + // imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; + // imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // create cycle-based conditional instruction in segment P0 + // PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); + // imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + // imlSegmentP0->imlList[0].operation = 0; + // imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + // imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; + // imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; + // // jump instruction for PEntry + // PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); + // PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); + + // // skip the newly created segments + // s += 2; + //} } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index ccb0fc83..7b4b94fb 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -3,63 +3,67 @@ IMLSegment* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(IMLSegment* segIt : ppcImlGenContext->segmentList2) - { - if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) - { - return segIt; - } - } - debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + __debugbreak(); return nullptr; + //for(IMLSegment* segIt : ppcImlGenContext->segmentList2) + //{ + // if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) + // { + // return segIt; + // } + //} + //debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); + //return nullptr; } void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) { - size_t segCount = ppcImlGenContext->segmentList2.size(); - for(size_t s=0; ssegmentList2[s]; + __debugbreak(); // outdated - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); - IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; - // handle empty segment - if( imlSegment->imlList.empty()) - { - if (isLastSegment == false) - IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment - else - imlSegment->nextSegmentIsUncertain = true; - continue; - } - // check last instruction of segment - IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); - if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // find destination segment by ppc jump address - IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - if( jumpDestSegment ) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); - } - else - { - imlSegment->nextSegmentIsUncertain = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // currently we assume that the next segment is unknown for all macros - imlSegment->nextSegmentIsUncertain = true; - } - else - { - // all other instruction types do not branch - IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); - } - } + //size_t segCount = ppcImlGenContext->segmentList2.size(); + //for(size_t s=0; ssegmentList2[s]; + + // bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); + // IMLSegment* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; + // // handle empty segment + // if( imlSegment->imlList.empty()) + // { + // if (isLastSegment == false) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment + // else + // imlSegment->nextSegmentIsUncertain = true; + // continue; + // } + // // check last instruction of segment + // IMLInstruction* imlInstruction = imlSegment->imlList.data() + (imlSegment->imlList.size() - 1); + // if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) + // { + // // find destination segment by ppc jump address + // IMLSegment* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); + // if( jumpDestSegment ) + // { + // if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // IMLSegment_SetLinkBranchTaken(imlSegment, jumpDestSegment); + // } + // else + // { + // imlSegment->nextSegmentIsUncertain = true; + // } + // } + // else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) + // { + // // currently we assume that the next segment is unknown for all macros + // imlSegment->nextSegmentIsUncertain = true; + // } + // else + // { + // // all other instruction types do not branch + // IMLSegment_SetLinkBranchNotTaken(imlSegment, nextSegment); + // } + //} } void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) diff --git a/src/Cemu/Logging/CemuLogging.h b/src/Cemu/Logging/CemuLogging.h index 5b2e5fa4..d729d364 100644 --- a/src/Cemu/Logging/CemuLogging.h +++ b/src/Cemu/Logging/CemuLogging.h @@ -39,7 +39,6 @@ enum class LogType : sint32 NN_SL = 26, TextureReadback = 29, - ProcUi = 39, nlibcurl = 41, @@ -47,6 +46,7 @@ enum class LogType : sint32 NFC = 41, NTAG = 42, + Recompiler = 60, }; template <>