diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index c315ed0e..ea7edfa2 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -67,7 +67,8 @@ struct PPCInterpreter_t uint32 reservedMemValue; // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; + uint32 temporaryGPR[4]; // deprecated, refactor away backend dependency on this + uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode struct { diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index ce47844d..4197ef89 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -23,6 +23,11 @@ static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) return (x86Assembler64::GPR32)regId; } +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + X86Cond _x86Cond(IMLCondition imlCond) { @@ -32,6 +37,10 @@ X86Cond _x86Cond(IMLCondition imlCond) return X86_CONDITION_Z; case IMLCondition::NEQ: return X86_CONDITION_NZ; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_NBE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_B; default: break; } @@ -758,56 +767,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else assert_dbg(); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, 0); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r.registerResult, (uint32)-1); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = ~registerOperand1 + carry - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = imlInstruction->op_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r.registerA; - // copy operand to result register - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - // execute NOT on result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry - x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0); - // update carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1043,56 +1002,26 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) + if( imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - - bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) { // be careful not to overwrite the operand before we use it if( rRegResult == rRegOperand1 ) - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); else - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); } else { // copy operand1 to destination register before doing addition x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // add operand2 - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry - if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) - { - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) @@ -1128,52 +1057,25 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) + else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = registerOperand1 - registerOperand2 + carry PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - if( rRegOperand1 == rRegOperand2 ) - { - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = operand1 - operand1 -> 0 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperand1 ) - { - // copy inverted xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = result - operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else if ( rRegResult == rRegOperand2 ) - { - // result = operand1 - result - // NOT result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ADC result, operand1 - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } + sint32 rRegA = imlInstruction->op_r_r_r.registerA; + sint32 rRegB = imlInstruction->op_r_r_r.registerB; + if (rRegResult == rRegB) + std::swap(rRegA, rRegB); + + if (rRegResult != rRegA) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegA); + + if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); + else if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // sub operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry flag (todo: is this actually correct in all cases?) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegB); } else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { @@ -1198,79 +1100,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = registerOperand2(rB) - registerOperand1(rA) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // updates carry flag - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - return false; - } - sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; - sint32 rRegOperandA = imlInstruction->op_r_r_r.registerA; - sint32 rRegOperandB = imlInstruction->op_r_r_r.registerB; - // update carry flag - // carry flag is detected this way: - //if ((~a+b) < a) { - // return true; - //} - //if ((~a+b+1) < 1) { - // return true; - //} - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // do subtraction - if( rRegOperandB == rRegOperandA ) - { - // result = operandA - operandA -> 0 - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperandB ) - { - // result = result - operandA - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - else if ( rRegResult == rRegOperandA ) - { - // result = operandB - result - // NEG result - x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operandB - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandB); - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperandB); - // sub operand2 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - } else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) @@ -1351,78 +1180,88 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); } } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + // x86's shift and rotate instruction have the shift amount hardwired to the CL register + // since our register allocator doesn't support instruction based fixed phys registers yet + // we'll instead have to temporarily shuffle registers around + sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; - // save cr - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) + + // we use BMI2's shift instructions until the RA can assign fixed registers + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - return false; + x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - // todo: Use BMI instructions if available? - // MOV registerResult, registerOperand (if different) - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - // reset carry - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); - sint32 jumpInstructionJumpToSignedShift = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // unsigned shift (MSB of input register is not set) - for(sint32 b=0; b<6; b++) + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) { - x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - if( b == 5 ) - { - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex()); + x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - sint32 jumpInstructionJumpToEnd = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0); - // signed shift - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->emitter->GetWriteIndex()); - for(sint32 b=0; b<6; b++) + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - // check if we need to shift by (1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - // set ca if any non-zero bit is shifted out - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->emitter->GetWriteIndex()); - // arithmetic shift - if( b == 5 ) - { - // copy sign bit into all bits - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex()); + x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); } - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->emitter->GetWriteIndex()); - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - // update CR if requested - // todo + + //auto rResult = _reg32(rRegResult); + //auto rOp2 = _reg8_from_reg32(_reg32(rRegOperand2)); + + //if (rRegResult == rRegOperand2) + //{ + // if (rRegResult != rRegOperand1) + // __debugbreak(); // cannot handle yet (we use rRegResult as a temporary reg, but its not possible if it is shared with op2) + //} + + //if(rRegOperand1 != rRegResult) + // x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + + //cemu_assert_debug(rRegOperand1 != X86_REG_ECX); + + //if (rRegOperand2 == X86_REG_ECX) + //{ + // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + // x64GenContext->emitter->SAR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + // x64GenContext->emitter->SHR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + // x64GenContext->emitter->SHL_d_CL(rResult); + // else + // cemu_assert_unimplemented(); + //} + //else + //{ + // auto rRegResultOrg = rRegResult; + // if (rRegResult == X86_REG_ECX) + // { + // x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegResult); + // rRegResult = REG_RESV_TEMP; + // rResult = _reg32(rRegResult); + // } + // + // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + // + // if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + // x64GenContext->emitter->SAR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + // x64GenContext->emitter->SHR_d_CL(rResult); + // else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + // x64GenContext->emitter->SHL_d_CL(rResult); + // else + // cemu_assert_unimplemented(); + + // x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + + // // move result back if it was in ECX + // if (rRegResultOrg == X86_REG_ECX) + // { + // x64Gen_mov_reg64_reg64(x64GenContext, rRegResultOrg, REG_RESV_TEMP); + // } + //} } else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { @@ -1520,6 +1359,44 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); + auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); + auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry); + cemu_assert_debug(regCarry != regR && regCarry != regA); + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + x64GenContext->emitter->ADD_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); @@ -1557,6 +1434,14 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, return true; } +bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->JMP_j32(0); + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1584,65 +1469,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) + else if (imlInstruction->operation == PPCREC_IML_OP_AND || + imlInstruction->operation == PPCREC_IML_OP_OR || + imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // registerResult = registerOperand + immS32 PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; - uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); - // update carry flag - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = immS32 - registerOperand - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; - sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; - sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); - // do actual computation of value, note: a - b is equivalent to a + ~b + 1 - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1); + if (regResult != regOperand) + x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); + if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_imm32(x64GenContext, regResult, immS32); + else if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_imm32(x64GenContext, regResult, immS32); + else // XOR + x64Gen_xor_reg64Low32_imm32(x64GenContext, regResult, immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) { @@ -1679,47 +1519,20 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) - { - // registerResult = registerOperand>>SH and set xer ca flag - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; - // MOV registerResult, registerOperand (if different) - if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); - // todo: Detect if we don't need to update carry - // generic case - // TEST registerResult, (1<<(SH+1))-1 - uint32 caTestMask = 0; - if (sh >= 31) - caTestMask = 0x7FFFFFFF; - else - caTestMask = (1 << (sh)) - 1; - x64Gen_test_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_r_s32.registerResult, caTestMask); - // SETNE/NZ [ESP+XER_CA] - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // SAR registerResult, SH - x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, sh); - // JNS (if sign not set) - sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here - // MOV BYTE [ESP+xer_ca], 0 - x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // jump destination - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); - } - else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || - imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT ) + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // MOV registerResult, registerOperand (if different) if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); - // Shift - if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) + + if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) x64Gen_shl_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); - else + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) x64Gen_shr_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); + else // RIGHT_SHIFT_S + x64Gen_sar_reg64Low32_imm8(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.immS32); } else { @@ -1729,6 +1542,40 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction return true; } +bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); + + auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); + sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; + auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry); + cemu_assert_debug(regCarry != regR && regCarry != regA); + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->ADD_di32(regR, immS32); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_di32(regR, immS32); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLSegment* imlSegment, IMLInstruction* imlInstruction) { if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) @@ -1925,7 +1772,11 @@ void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + x64Emit_mov_reg64_mem32(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64Emit_movZX_reg64_mem8(x64GenContext, imlInstruction->op_r_name.registerIndex, X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); } else assert_dbg(); @@ -1957,7 +1808,11 @@ void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, } else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) { - x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + x64Emit_mov_mem32_reg64(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), imlInstruction->op_r_name.registerIndex); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64GenContext->emitter->MOV_bb_l(X86_REG_RSP, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg32(_reg32(imlInstruction->op_r_name.registerIndex))); } else assert_dbg(); @@ -2016,37 +1871,37 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) { if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) { if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) { - if( PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) codeGenerationFailed = true; - } } else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) { if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { codeGenerationFailed = true; - } + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; } else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) { @@ -2063,6 +1918,13 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo codeGenerationFailed = true; } } + else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) + { + if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) + { + codeGenerationFailed = true; + } + } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) { if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index eefd9da3..b9cb0585 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -33,43 +33,6 @@ struct x64GenContext_t std::vector relocateOffsetTable2; }; -// todo - these definitions are part of the x86_64 emitter. Not the backend itself. We should move them eventually -//#define X86_REG_EAX 0 -//#define X86_REG_ECX 1 -//#define X86_REG_EDX 2 -//#define X86_REG_EBX 3 -//#define X86_REG_ESP 4 // reserved for low half of hCPU pointer -//#define X86_REG_EBP 5 -//#define X86_REG_ESI 6 -//#define X86_REG_EDI 7 -//#define X86_REG_NONE -1 -// -//#define X86_REG_RAX 0 -//#define X86_REG_RCX 1 -//#define X86_REG_RDX 2 -//#define X86_REG_RBX 3 -//#define X86_REG_RSP 4 // reserved for hCPU pointer -//#define X86_REG_RBP 5 -//#define X86_REG_RSI 6 -//#define X86_REG_RDI 7 -//#define X86_REG_R8 8 -//#define X86_REG_R9 9 -//#define X86_REG_R10 10 -//#define X86_REG_R11 11 -//#define X86_REG_R12 12 -//#define X86_REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -//#define X86_REG_R14 14 // reserved as temporary register -//#define X86_REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData -// -//#define X86_REG_AL 0 -//#define X86_REG_CL 1 -//#define X86_REG_DL 2 -//#define X86_REG_BL 3 -//#define X86_REG_AH 4 -> Adressable via non-REX only -//#define X86_REG_CH 5 -//#define X86_REG_DH 6 -//#define X86_REG_BH 7 - // reserved registers #define REG_RESV_TEMP (X86_REG_R14) #define REG_RESV_HCPU (X86_REG_RSP) @@ -79,8 +42,7 @@ struct x64GenContext_t // reserved floating-point registers #define REG_RESV_FPR_TEMP (15) - -#define reg32ToReg16(__x) (__x) +#define reg32ToReg16(__x) (__x) // deprecated // deprecated condition flags enum @@ -308,4 +270,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister); void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); -void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp index c9ffc464..bbb707e0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp @@ -68,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + // SARX reg64, reg64, reg64 + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) { // SHLX reg64, reg64, reg64 @@ -76,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8); x64Gen_writeU8(x64GenContext, 0xF7); x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index c7e11d42..e50052d5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -623,11 +623,11 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); } x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - // store double low part + // store double low part x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); - // store double high part + // store double high part x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, X86_REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, X86_REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h index 4c67797c..6b05a514 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -99,7 +99,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -125,7 +131,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -166,7 +178,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -192,7 +210,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -216,7 +240,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -242,7 +272,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -260,6 +296,1066 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void OR_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x08); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x08); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x09); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x09); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x09); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x09); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x10); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x10); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x12); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x11); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x11); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x11); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x11); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x13); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x13); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x18); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x18); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x1a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x19); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x19); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x20); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x20); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x22); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x21); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x21); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x21); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x21); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x23); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x23); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } void XOR_bb(GPR8_REX dst, GPR8_REX src) { if ((src >= 4) || (dst >= 4)) @@ -275,7 +1371,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -301,7 +1403,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -342,7 +1450,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -368,7 +1482,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -392,7 +1512,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -418,7 +1544,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -451,7 +1583,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -477,7 +1615,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -518,7 +1662,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -544,7 +1694,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -568,7 +1724,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -594,7 +1756,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -612,6 +1780,573 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void ADD_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } void CMP_di32(GPR32 dst, s32 imm) { if (((dst & 8) != 0)) @@ -635,7 +2370,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -662,7 +2403,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -681,6 +2428,573 @@ public: else if (mod == 2) _emitU32((u32)offset); _emitU32((u32)imm); } + void ADD_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } void CMP_di8(GPR32 dst, s8 imm) { if (((dst & 8) != 0)) @@ -704,7 +3018,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -731,7 +3051,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -765,7 +3091,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -806,7 +3138,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -832,7 +3170,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -865,7 +3209,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -891,7 +3241,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -932,7 +3288,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -958,7 +3320,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -982,7 +3350,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1008,7 +3382,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -1056,7 +3436,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1099,7 +3485,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1126,7 +3518,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -1168,7 +3566,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1195,7 +3599,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); @@ -1214,6 +3624,365 @@ public: else if (mod == 2) _emitU32((u32)offset); _emitU8((u8)imm); } + void SHL_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHL_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void JMP_j32(s32 imm) + { + _emitU8(0xe9); + _emitU32((u32)imm); + } void Jcc_j32(X86Cond cond, s32 imm) { _emitU8(0x0f); @@ -1236,7 +4005,13 @@ public: if (offset == 0 && (memReg & 7) != 5) mod = 0; else if (offset == (s32)(s8)offset) mod = 1; else mod = 2; - bool sib_use = (scaler != 0 && index != X86_REG_NONE) || ((memReg & 7) == 4); + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } if (sib_use) { if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) @@ -1257,4 +4032,89 @@ public: if (mod == 1) _emitU8((u8)offset); else if (mod == 2) _emitU32((u32)offset); } + void BT_du8(GPR32 dst, u8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_qu8(GPR64 dst, u8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } }; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp index fae49541..cd40de7f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -5,10 +5,12 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" /* - * Initializes a single segment and returns true if it is a finite loop + * Analyzes a single segment and returns true if it is a finite loop */ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) { + return false; // !!! DISABLED !!! + bool isTightFiniteLoop = false; // base criteria, must jump to beginning of same segment if (imlSegment->nextSegmentBranchTaken != imlSegment) @@ -42,9 +44,7 @@ bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment) if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB)) continue; instIt.CheckRegisterUsage(®istersUsed); - if(registersUsed.writtenNamedReg1 < 0) - continue; - list_modifiedRegisters.remove(registersUsed.writtenNamedReg1); + registersUsed.ForEachWrittenGPR([&](IMLReg r) { list_modifiedRegisters.remove(r); }); } if (list_modifiedRegisters.count > 0) { @@ -63,10 +63,6 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) return true; - if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) - return true; // ?? - if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) - return true; // ?? if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) return true; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) @@ -79,6 +75,18 @@ bool IMLAnalyzer_CanTypeWriteCR(IMLInstruction* imlInstruction) return true; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) return true; + + // new instructions + if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + return true; + if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + return true; + + return false; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp index 72f706d9..4dafaf18 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -14,10 +14,10 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return "MOV"; else if (op == PPCREC_IML_OP_ADD) return "ADD"; + else if (op == PPCREC_IML_OP_ADD_WITH_CARRY) + return "ADC"; else if (op == PPCREC_IML_OP_SUB) return "SUB"; - else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY) - return "ADDCSC"; else if (op == PPCREC_IML_OP_OR) return "OR"; else if (op == PPCREC_IML_OP_AND) @@ -26,8 +26,12 @@ const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) return "XOR"; else if (op == PPCREC_IML_OP_LEFT_SHIFT) return "LSH"; - else if (op == PPCREC_IML_OP_RIGHT_SHIFT) + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U) return "RSH"; + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S) + return "ARSH"; + else if (op == PPCREC_IML_OP_LEFT_ROTATE) + return "LROT"; else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) return "MULS"; else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) @@ -129,6 +133,14 @@ std::string IMLDebug_GetConditionName(IMLCondition cond) return "EQ"; case IMLCondition::NEQ: return "NEQ"; + case IMLCondition::UNSIGNED_GT: + return "UGT"; + case IMLCondition::UNSIGNED_LT: + return "ULT"; + case IMLCondition::SIGNED_GT: + return "SGT"; + case IMLCondition::SIGNED_LT: + return "SLT"; default: cemu_assert_unimplemented(); } @@ -224,6 +236,16 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + } else if (inst.type == PPCREC_IML_TYPE_COMPARE) { strOutput.add("CMP "); @@ -270,6 +292,17 @@ void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool strOutput.addFmt(" -> CR{}", inst.crRegister); } } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); + } else if (inst.type == PPCREC_IML_TYPE_R_S32) { strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp index d4cfdcb1..52e19e8c 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -10,6 +10,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg2 = -1; registersUsed->readNamedReg3 = -1; registersUsed->writtenNamedReg1 = -1; + registersUsed->writtenNamedReg2 = -1; registersUsed->readFPR1 = -1; registersUsed->readFPR2 = -1; registersUsed->readFPR3 = -1; @@ -34,10 +35,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const else if ( operation == PPCREC_IML_OP_OR || operation == PPCREC_IML_OP_AND || - operation == PPCREC_IML_OP_XOR || - operation == PPCREC_IML_OP_ADD_CARRY || // r_r carry stuff is deprecated - operation == PPCREC_IML_OP_ADD_CARRY_ME || - operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) + operation == PPCREC_IML_OP_XOR) { // result is read and written, operand is read registersUsed->writtenNamedReg1 = op_r_r.registerResult; @@ -112,6 +110,24 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg1 = op_r_r_s32.registerA; } } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + registersUsed->writtenNamedReg1 = op_r_r_s32_carry.regR; + registersUsed->readNamedReg1 = op_r_r_s32_carry.regA; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readNamedReg2 = op_r_r_s32_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenNamedReg2 = op_r_r_s32_carry.regCarry; + } else if (type == PPCREC_IML_TYPE_R_R_R) { // in all cases result is written and other operands are read only @@ -119,6 +135,25 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg1 = op_r_r_r.registerA; registersUsed->readNamedReg2 = op_r_r_r.registerB; } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + registersUsed->writtenNamedReg1 = op_r_r_r_carry.regR; + registersUsed->readNamedReg1 = op_r_r_r_carry.regA; + registersUsed->readNamedReg2 = op_r_r_r_carry.regB; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readNamedReg3 = op_r_r_r_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenNamedReg2 = op_r_r_r_carry.regCarry; + } else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) { // no effect on registers @@ -155,6 +190,10 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const { registersUsed->readNamedReg1 = op_conditionalJump2.registerBool; } + else if (type == PPCREC_IML_TYPE_JUMP) + { + // no registers affected + } else if (type == PPCREC_IML_TYPE_LOAD) { registersUsed->writtenNamedReg1 = op_storeLoad.registerData; @@ -215,6 +254,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same registersUsed->readFPR4 = op_storeLoad.registerData; + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: @@ -227,6 +267,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; default: cemu_assert_unimplemented(); @@ -251,6 +292,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const break; case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: // PS1 remains the same + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); registersUsed->readFPR4 = op_storeLoad.registerData; break; case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: @@ -263,6 +305,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; default: cemu_assert_unimplemented(); @@ -283,6 +326,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg2 = op_storeLoad.registerGQR; break; default: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; } } @@ -304,6 +348,7 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const registersUsed->readNamedReg3 = op_storeLoad.registerGQR; break; default: + cemu_assert_debug(op_storeLoad.registerGQR == PPC_REC_INVALID_REGISTER); break; } } @@ -430,8 +475,16 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const #define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) +sint32 replaceRegisterMultiple(sint32 reg, const std::unordered_map& translationTable) +{ + const auto& it = translationTable.find(reg); + cemu_assert_debug(it != translationTable.cend()); + return it->second; +} + sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) { + // deprecated but still used for FPRs for (sint32 i = 0; i < 4; i++) { if (match[i] < 0) @@ -444,56 +497,70 @@ sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) return reg; } -void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +//void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) +void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable) { if (type == PPCREC_IML_TYPE_R_NAME) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_NAME_R) { - op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_name.registerIndex = replaceRegisterMultiple(op_r_name.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_R_R) { - op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); + op_r_r.registerResult = replaceRegisterMultiple(op_r_r.registerResult, translationTable); + op_r_r.registerA = replaceRegisterMultiple(op_r_r.registerA, translationTable); } else if (type == PPCREC_IML_TYPE_R_S32) { - op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_r_immS32.registerIndex = replaceRegisterMultiple(op_r_immS32.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { - op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced); + op_conditional_r_s32.registerIndex = replaceRegisterMultiple(op_conditional_r_s32.registerIndex, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_S32) { - op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); + op_r_r_s32.registerResult = replaceRegisterMultiple(op_r_r_s32.registerResult, translationTable); + op_r_r_s32.registerA = replaceRegisterMultiple(op_r_r_s32.registerA, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + op_r_r_s32_carry.regR = replaceRegisterMultiple(op_r_r_s32_carry.regR, translationTable); + op_r_r_s32_carry.regA = replaceRegisterMultiple(op_r_r_s32_carry.regA, translationTable); + op_r_r_s32_carry.regCarry = replaceRegisterMultiple(op_r_r_s32_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_R_R_R) { - op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); + op_r_r_r.registerResult = replaceRegisterMultiple(op_r_r_r.registerResult, translationTable); + op_r_r_r.registerA = replaceRegisterMultiple(op_r_r_r.registerA, translationTable); + op_r_r_r.registerB = replaceRegisterMultiple(op_r_r_r.registerB, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + op_r_r_r_carry.regR = replaceRegisterMultiple(op_r_r_r_carry.regR, translationTable); + op_r_r_r_carry.regA = replaceRegisterMultiple(op_r_r_r_carry.regA, translationTable); + op_r_r_r_carry.regB = replaceRegisterMultiple(op_r_r_r_carry.regB, translationTable); + op_r_r_r_carry.regCarry = replaceRegisterMultiple(op_r_r_r_carry.regCarry, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE) { - op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, gprRegisterSearched, gprRegisterReplaced); - op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, gprRegisterSearched, gprRegisterReplaced); + op_compare.registerResult = replaceRegisterMultiple(op_compare.registerResult, translationTable); + op_compare.registerOperandA = replaceRegisterMultiple(op_compare.registerOperandA, translationTable); + op_compare.registerOperandB = replaceRegisterMultiple(op_compare.registerOperandB, translationTable); } else if (type == PPCREC_IML_TYPE_COMPARE_S32) { - op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, gprRegisterSearched, gprRegisterReplaced); + op_compare_s32.registerResult = replaceRegisterMultiple(op_compare_s32.registerResult, translationTable); + op_compare_s32.registerOperandA = replaceRegisterMultiple(op_compare_s32.registerOperandA, translationTable); } else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) { - op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, gprRegisterSearched, gprRegisterReplaced); + op_conditionalJump2.registerBool = replaceRegisterMultiple(op_conditionalJump2.registerBool, translationTable); } - else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + else if (type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) { // no effect on registers } @@ -509,7 +576,7 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (operation == PPCREC_IML_MACRO_B_TO_REG) { - op_macro.param = replaceRegisterMultiple(op_macro.param, gprRegisterSearched, gprRegisterReplaced); + op_macro.param = replaceRegisterMultiple(op_macro.param, translationTable); } else { @@ -518,33 +585,33 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste } else if (type == PPCREC_IML_TYPE_LOAD) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } } else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_STORE) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } else if (type == PPCREC_IML_TYPE_STORE_INDEXED) { - op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerData = replaceRegisterMultiple(op_storeLoad.registerData, translationTable); if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } else if (type == PPCREC_IML_TYPE_CR) { @@ -562,52 +629,52 @@ void IMLInstruction::ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegiste { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) { if (op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem = replaceRegisterMultiple(op_storeLoad.registerMem, translationTable); } if (op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerMem2 = replaceRegisterMultiple(op_storeLoad.registerMem2, translationTable); } if (op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) { - op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); + op_storeLoad.registerGQR = replaceRegisterMultiple(op_storeLoad.registerGQR, translationTable); } } else if (type == PPCREC_IML_TYPE_FPR_R_R) @@ -654,7 +721,7 @@ void IMLInstruction::ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegist { // not affected } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) { // not affected } @@ -760,15 +827,15 @@ void IMLInstruction::ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterRe { // not affected } - else if (type == PPCREC_IML_TYPE_R_R_S32) + else if (type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_R_R_S32_CARRY) { // not affected } - else if (type == PPCREC_IML_TYPE_R_R_R) + else if (type == PPCREC_IML_TYPE_R_R_R || type == PPCREC_IML_TYPE_R_R_R_CARRY) { // not affected } - else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + else if (type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32 || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || type == PPCREC_IML_TYPE_JUMP) { // not affected } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 35db10a1..9491136e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -19,14 +19,13 @@ enum PPCREC_IML_OP_XOR, // '^' operator PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator PPCREC_IML_OP_LEFT_SHIFT, // shift left operator - PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) + PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned) + PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed) // ppc PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) - PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_CNTLZW, - PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry) PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 PPCREC_IML_OP_MFCR, // copy cr to gpr PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) @@ -83,7 +82,7 @@ enum // R_R_S32 only // R_R_R + R_R_S32 - PPCREC_IML_OP_ADD, + PPCREC_IML_OP_ADD, // also R_R_R_CARRY PPCREC_IML_OP_SUB, // R_R only @@ -92,14 +91,10 @@ enum PPCREC_IML_OP_ASSIGN_S16_TO_S32, PPCREC_IML_OP_ASSIGN_S8_TO_S32, - // deprecated - PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit - PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag - PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag - + // R_R_R_carry + PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1) }; + #define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) enum @@ -116,7 +111,7 @@ enum PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak }; -enum +enum // deprecated condition codes { PPCREC_JUMP_CONDITION_NONE, PPCREC_JUMP_CONDITION_E, // equal / zero @@ -158,7 +153,9 @@ enum PPCREC_IML_TYPE_NO_OP, // no-op instruction PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r) PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* + PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry) PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* + PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry) PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* @@ -174,6 +171,7 @@ enum // new style of handling conditions and branches: PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm + PPCREC_IML_TYPE_JUMP, // replaces CJUMP. Jump always, no condition PPCREC_IML_TYPE_CONDITIONAL_JUMP, // replaces CJUMP. Jump condition is based on boolean register // conditional @@ -199,6 +197,7 @@ enum PPCREC_NAME_SPR0 = 3000, PPCREC_NAME_FPR0 = 4000, PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 + PPCREC_NAME_XER_CA = 6000, // carry bit }; // special cases for LOAD/STORE @@ -260,8 +259,8 @@ struct IMLUsedRegisters sint16 readNamedReg2; sint16 readNamedReg3; sint16 writtenNamedReg1; + sint16 writtenNamedReg2; }; - sint16 gpr[4]; // 3 read + 1 write }; // FPR union @@ -275,10 +274,69 @@ struct IMLUsedRegisters sint16 readFPR4; sint16 writtenFPR1; }; - sint16 fpr[4]; + //sint16 fpr[4]; }; + + bool IsRegWritten(sint16 imlReg) const // GPRs + { + cemu_assert_debug(imlReg >= 0); + return writtenNamedReg1 == imlReg || writtenNamedReg2 == imlReg; + } + + template + void ForEachWrittenGPR(Fn F) + { + if (writtenNamedReg1 >= 0) + F(writtenNamedReg1); + if (writtenNamedReg2 >= 0) + F(writtenNamedReg2); + } + + template + void ForEachReadGPR(Fn F) + { + if (readNamedReg1 >= 0) + F(readNamedReg1); + if (readNamedReg2 >= 0) + F(readNamedReg2); + if (readNamedReg3 >= 0) + F(readNamedReg3); + } + + template + void ForEachAccessedGPR(Fn F) + { + if (readNamedReg1 >= 0) + F(readNamedReg1, false); + if (readNamedReg2 >= 0) + F(readNamedReg2, false); + if (readNamedReg3 >= 0) + F(readNamedReg3, false); + if (writtenNamedReg1 >= 0) + F(writtenNamedReg1, true); + if (writtenNamedReg2 >= 0) + F(writtenNamedReg2, true); + } + + bool HasFPRReg(sint16 imlReg) const + { + cemu_assert_debug(imlReg >= 0); + if (readFPR1 == imlReg) + return true; + if (readFPR2 == imlReg) + return true; + if (readFPR3 == imlReg) + return true; + if (readFPR4 == imlReg) + return true; + if (writtenFPR1 == imlReg) + return true; + return false; + } }; +using IMLReg = uint8; + struct IMLInstruction { uint8 type; @@ -307,12 +365,25 @@ struct IMLInstruction }op_r_r_r; struct { - // R = A (op) immS32 [update cr* in mode *] + IMLReg regR; + IMLReg regA; + IMLReg regB; + IMLReg regCarry; + }op_r_r_r_carry; + struct + { uint8 registerResult; uint8 registerA; sint32 immS32; }op_r_r_s32; struct + { + IMLReg regR; + IMLReg regA; + sint32 immS32; + IMLReg regCarry; + }op_r_r_s32_carry; + struct { // R/F = NAME or NAME = R/F uint8 registerIndex; @@ -426,6 +497,7 @@ struct IMLInstruction type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_MFTB || type == PPCREC_IML_TYPE_CJUMP || type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || + type == PPCREC_IML_TYPE_JUMP || type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) return true; return false; @@ -496,6 +568,18 @@ struct IMLInstruction this->op_r_r_r.registerB = registerB; } + void make_r_r_r_carry(uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 registerCarry) + { + this->type = PPCREC_IML_TYPE_R_R_R_CARRY; + this->operation = operation; + this->crRegister = 0xFF; + this->crMode = 0xFF; + this->op_r_r_r_carry.regR = registerResult; + this->op_r_r_r_carry.regA = registerA; + this->op_r_r_r_carry.regB = registerB; + this->op_r_r_r_carry.regCarry = registerCarry; + } + void make_r_r_s32(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0) { // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") @@ -508,6 +592,18 @@ struct IMLInstruction this->op_r_r_s32.immS32 = immS32; } + void make_r_r_s32_carry(uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 registerCarry) + { + this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; + this->operation = operation; + this->crRegister = 0xFF; + this->crMode = 0xFF; + this->op_r_r_s32_carry.regR = registerResult; + this->op_r_r_s32_carry.regA = registerA; + this->op_r_r_s32_carry.immS32 = immS32; + this->op_r_r_s32_carry.regCarry = registerCarry; + } + void make_compare(uint8 registerA, uint8 registerB, uint8 registerResult, IMLCondition cond) { this->type = PPCREC_IML_TYPE_COMPARE; @@ -542,6 +638,14 @@ struct IMLInstruction this->op_conditionalJump2.mustBeTrue = mustBeTrue; } + void make_jump_new() + { + this->type = PPCREC_IML_TYPE_JUMP; + this->operation = -999; + this->crRegister = PPC_REC_INVALID_REGISTER; + this->crMode = 0; + } + // load from memory void make_r_memory(uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) { @@ -572,7 +676,8 @@ struct IMLInstruction void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; - void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + //void ReplaceGPR(sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); + void RewriteGPR(const std::unordered_map& translationTable); void ReplaceFPRs(sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]); void ReplaceFPR(sint32 fprRegisterSearched, sint32 fprRegisterReplaced); }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp index 2cbcb0c1..f67b49e1 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -179,15 +179,7 @@ ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegist if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = false; - for (sint32 f = 0; f < 4; f++) - { - if (virtualReg == (sint32)instructionUsedRegisters->fpr[f]) - { - isReserved = true; - break; - } - } + bool isReserved = instructionUsedRegisters->HasFPRReg(virtualReg); if (isReserved) continue; if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) @@ -373,7 +365,7 @@ bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlG imlInstruction->CheckRegisterUsage(®istersUsed); if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) return false; - if( registersUsed.writtenNamedReg1 == registerIndex ) + if (registersUsed.IsRegWritten(registerIndex)) return true; } // todo: Scan next segment(s) @@ -411,7 +403,7 @@ bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcIml IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; IMLUsedRegisters registersUsed; imlInstruction->CheckRegisterUsage(®istersUsed); - if( registersUsed.writtenNamedReg1 == registerIndex ) + if( registersUsed.IsRegWritten(registerIndex) ) { if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) return true; @@ -620,84 +612,84 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext } } -bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) -{ - IMLUsedRegisters registersUsed; - for (sint32 i = startIndex; i <= endIndex; i++) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - imlInstruction->CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenNamedReg1 == vreg) - return true; - } - return false; -} +//bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) +//{ +// IMLUsedRegisters registersUsed; +// for (sint32 i = startIndex; i <= endIndex; i++) +// { +// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; +// imlInstruction->CheckRegisterUsage(®istersUsed); +// if (registersUsed.IsRegWritten(vreg)) +// return true; +// } +// return false; +//} -sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) -{ - // current segment - sint32 currentIndex = startIndex; - IMLSegment* currentSegment = startSegment; - sint32 segmentIterateCount = 0; - sint32 foundRegister = -1; - while (true) - { - // stop scanning if segment is enterable - if (currentSegment->isEnterable) - return -1; - while (currentIndex >= 0) - { - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; - break; - } - // previous instruction - currentIndex--; - } - if (foundRegister >= 0) - break; - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlList.size() - 1; - segmentIterateCount++; - } - // scan again to make sure the register is not modified inbetween - currentIndex = startIndex; - currentSegment = startSegment; - segmentIterateCount = 0; - IMLUsedRegisters registersUsed; - while (true) - { - while (currentIndex >= 0) - { - // check if register is modified - currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); - if (registersUsed.writtenNamedReg1 == foundRegister) - return -1; - // check if end of scan reached - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - return foundRegister; - } - // previous instruction - currentIndex--; - } - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlList.size() - 1; - segmentIterateCount++; - } - return -1; -} +//sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* startSegment, sint32 startIndex, sint32 name) +//{ +// // current segment +// sint32 currentIndex = startIndex; +// IMLSegment* currentSegment = startSegment; +// sint32 segmentIterateCount = 0; +// sint32 foundRegister = -1; +// while (true) +// { +// // stop scanning if segment is enterable +// if (currentSegment->isEnterable) +// return -1; +// while (currentIndex >= 0) +// { +// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) +// { +// foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; +// break; +// } +// // previous instruction +// currentIndex--; +// } +// if (foundRegister >= 0) +// break; +// // continue at previous segment (if there is only one) +// if (segmentIterateCount >= 1) +// return -1; +// if (currentSegment->list_prevSegments.size() != 1) +// return -1; +// currentSegment = currentSegment->list_prevSegments[0]; +// currentIndex = currentSegment->imlList.size() - 1; +// segmentIterateCount++; +// } +// // scan again to make sure the register is not modified inbetween +// currentIndex = startIndex; +// currentSegment = startSegment; +// segmentIterateCount = 0; +// IMLUsedRegisters registersUsed; +// while (true) +// { +// while (currentIndex >= 0) +// { +// // check if register is modified +// currentSegment->imlList[currentIndex].CheckRegisterUsage(®istersUsed); +// if (registersUsed.IsRegWritten(foundRegister)) +// return -1; +// // check if end of scan reached +// if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) +// { +// return foundRegister; +// } +// // previous instruction +// currentIndex--; +// } +// // continue at previous segment (if there is only one) +// if (segmentIterateCount >= 1) +// return -1; +// if (currentSegment->list_prevSegments.size() != 1) +// return -1; +// currentSegment = currentSegment->list_prevSegments[0]; +// currentIndex = currentSegment->imlList.size() - 1; +// segmentIterateCount++; +// } +// return -1; +//} void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) { @@ -830,7 +822,7 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp { break; } - if (registersUsed.writtenNamedReg1 == gprIndex) + if (registersUsed.IsRegWritten(gprIndex)) return; // GPR overwritten, we don't need to byte swap anymore } if (foundMatch) @@ -933,6 +925,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; + if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) { @@ -946,6 +940,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; + if (instIt.op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } } else if (instIt.type == PPCREC_IML_TYPE_FPR_STORE || instIt.type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) @@ -978,6 +974,8 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; + if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } else if (instIt.op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) { @@ -991,127 +989,129 @@ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; else if (formatType == 7) instIt.op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; + if (instIt.op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) + instIt.op_storeLoad.registerGQR = PPC_REC_INVALID_REGISTER; } } } } } -/* - * Returns true if registerWrite overwrites any of the registers read by registerRead - */ -bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) -{ - if (registerWrite->writtenNamedReg1 < 0) - return false; - - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) - return true; - return false; -} +///* +// * Returns true if registerWrite overwrites any of the registers read by registerRead +// */ +//bool PPCRecompilerAnalyzer_checkForGPROverwrite(IMLUsedRegisters* registerRead, IMLUsedRegisters* registerWrite) +//{ +// if (registerWrite->writtenNamedReg1 < 0) +// return false; +// +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) +// return true; +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) +// return true; +// if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) +// return true; +// return false; +//} void _reorderConditionModifyInstructions(IMLSegment* imlSegment) { - IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); - // last instruction is a conditional branch? - if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) - return; - if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) - return; - // get CR bitmask of bit required for conditional jump - PPCRecCRTracking_t crTracking; - IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); - uint32 requiredCRBits = crTracking.readCRBits; - - // scan backwards until we find the instruction that sets the CR - sint32 crSetterInstructionIndex = -1; - sint32 unsafeInstructionIndex = -1; - for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) - { - IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; - IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); - if (crTracking.readCRBits != 0) - return; // dont handle complex cases for now - if (crTracking.writtenCRBits != 0) - { - if ((crTracking.writtenCRBits&requiredCRBits) != 0) - { - crSetterInstructionIndex = i; - break; - } - else - { - return; // other CR bits overwritten (dont handle complex cases) - } - } - // is safe? (no risk of overwriting x64 eflags) - if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || - (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || - (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || - (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) - continue; - // not safe - if (unsafeInstructionIndex == -1) - unsafeInstructionIndex = i; - } - if (crSetterInstructionIndex < 0) - return; - if (unsafeInstructionIndex < 0) - return; // no danger of overwriting eflags, don't reorder - // check if we can move the CR setter instruction to after unsafeInstructionIndex - PPCRecCRTracking_t crTrackingSetter = crTracking; - IMLUsedRegisters regTrackingCRSetter; - imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); - if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) - return; // we don't handle FPR dependency yet so just ignore FPR instructions - IMLUsedRegisters registerTracking; - if (regTrackingCRSetter.writtenNamedReg1 >= 0) - { - // CR setter does write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); - // reads register written by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) - { - return; // cant move CR setter because of dependency - } - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - // overwrites register written by CR setter? - if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) - return; - } - } - else - { - // CR setter does not write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - } - } - - // move CR setter instruction -#ifdef CEMU_DEBUG_ASSERT - if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) - assert_dbg(); -#endif - IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); - memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); - imlSegment->imlList[crSetterInstructionIndex].make_no_op(); +// IMLInstruction* lastInstruction = imlSegment->GetLastInstruction(); +// // last instruction is a conditional branch? +// if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) +// return; +// if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) +// return; +// // get CR bitmask of bit required for conditional jump +// PPCRecCRTracking_t crTracking; +// IMLAnalyzer_GetCRTracking(lastInstruction, &crTracking); +// uint32 requiredCRBits = crTracking.readCRBits; +// +// // scan backwards until we find the instruction that sets the CR +// sint32 crSetterInstructionIndex = -1; +// sint32 unsafeInstructionIndex = -1; +// for (sint32 i = imlSegment->imlList.size() - 2; i >= 0; i--) +// { +// IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; +// IMLAnalyzer_GetCRTracking(imlInstruction, &crTracking); +// if (crTracking.readCRBits != 0) +// return; // dont handle complex cases for now +// if (crTracking.writtenCRBits != 0) +// { +// if ((crTracking.writtenCRBits&requiredCRBits) != 0) +// { +// crSetterInstructionIndex = i; +// break; +// } +// else +// { +// return; // other CR bits overwritten (dont handle complex cases) +// } +// } +// // is safe? (no risk of overwriting x64 eflags) +// if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || +// (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || +// (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || +// (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) +// continue; +// // not safe +// if (unsafeInstructionIndex == -1) +// unsafeInstructionIndex = i; +// } +// if (crSetterInstructionIndex < 0) +// return; +// if (unsafeInstructionIndex < 0) +// return; // no danger of overwriting eflags, don't reorder +// // check if we can move the CR setter instruction to after unsafeInstructionIndex +// PPCRecCRTracking_t crTrackingSetter = crTracking; +// IMLUsedRegisters regTrackingCRSetter; +// imlSegment->imlList[crSetterInstructionIndex].CheckRegisterUsage(®TrackingCRSetter); +// if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) +// return; // we don't handle FPR dependency yet so just ignore FPR instructions +// IMLUsedRegisters registerTracking; +// if (regTrackingCRSetter.writtenNamedReg1 >= 0) +// { +// // CR setter does write GPR +// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) +// { +// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); +// // reads register written by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) +// { +// return; // cant move CR setter because of dependency +// } +// // writes register read by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) +// { +// return; // cant move CR setter because of dependency +// } +// // overwrites register written by CR setter? +// if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) +// return; +// } +// } +// else +// { +// // CR setter does not write GPR +// for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) +// { +// imlSegment->imlList[i].CheckRegisterUsage(®isterTracking); +// // writes register read by CR setter? +// if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) +// { +// return; // cant move CR setter because of dependency +// } +// } +// } +// +// // move CR setter instruction +//#ifdef CEMU_DEBUG_ASSERT +// if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) +// assert_dbg(); +//#endif +// IMLInstruction* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); +// memcpy(newCRSetterInstruction, imlSegment->imlList.data() + crSetterInstructionIndex, sizeof(IMLInstruction)); +// imlSegment->imlList[crSetterInstructionIndex].make_no_op(); } /* diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 8c7c807d..98ca687b 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -764,12 +764,11 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, sint16 virtualReg2PhysReg[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) virtualReg2PhysReg[i] = -1; - + std::unordered_map virt2PhysRegMap; // key = virtual register, value = physical register raLiveRangeInfo_t liveInfo; liveInfo.liveRangesCount = 0; sint32 index = 0; sint32 suffixInstructionCount = imlSegment->HasSuffixInstruction() ? 1 : 0; - //sint32 suffixInstructionIndex = imlSegment->imlList.size() - suffixInstructionCount; // if no suffix instruction exists this matches instruction count // load register ranges that are supplied from previous segments raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; while(subrangeItr) @@ -789,6 +788,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, assert_dbg(); #endif virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -806,6 +806,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) assert_dbg(); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); // store GPR if required // special care has to be taken to execute any stores before the suffix instruction since trailing instructions may not get executed if (liverange->hasStore) @@ -844,37 +845,13 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; } // rewrite registers - // todo - this can be simplified by using a map or lookup table rather than a check + 4 slot translation table if (index < imlSegment->imlList.size()) - { - IMLUsedRegisters gprTracking; - imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - - sint32 inputGpr[4]; - inputGpr[0] = gprTracking.gpr[0]; - inputGpr[1] = gprTracking.gpr[1]; - inputGpr[2] = gprTracking.gpr[2]; - inputGpr[3] = gprTracking.gpr[3]; - sint32 replaceGpr[4]; - for (sint32 f = 0; f < 4; f++) - { - sint32 virtualRegister = gprTracking.gpr[f]; - if (virtualRegister < 0) - { - replaceGpr[f] = -1; - continue; - } - if (virtualRegister >= IML_RA_VIRT_REG_COUNT_MAX) - assert_dbg(); - replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; - cemu_assert_debug(replaceGpr[f] >= 0); - } - imlSegment->imlList[index].ReplaceGPR(inputGpr, replaceGpr); - } + imlSegment->imlList[index].RewriteGPR(virt2PhysRegMap); // next iml instruction index++; } @@ -889,6 +866,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); virtualReg2PhysReg[liverange->range->virtualRegister] = -1; + virt2PhysRegMap.erase(liverange->range->virtualRegister); // store GPR if (liverange->hasStore) { @@ -929,6 +907,7 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, // update translation table cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; + virt2PhysRegMap.insert_or_assign(subrangeItr->range->virtualRegister, subrangeItr->range->physicalRegister); } // next subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; @@ -1039,21 +1018,12 @@ void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { - // end loop at suffix instruction - //if (imlSegment->imlList[index].IsSuffixInstruction()) - // break; - // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - cemu_assert_debug(virtualRegister < IML_RA_VIRT_REG_COUNT_MAX); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction - } - // next instruction + gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { + cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX); + imlSegment->raDistances.reg[gprId].usageStart = std::min(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction + imlSegment->raDistances.reg[gprId].usageEnd = std::max(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction + }); index++; } } @@ -1141,29 +1111,17 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLUsedRegisters gprTracking; while (index < imlSegment->imlList.size()) { - // we parse suffix instructions too for any potential input registers (writes not allowed), but note that any spills/stores need to happen before the suffix instruction - //// end loop at suffix instruction - //if (imlSegment->imlList[index].IsSuffixInstruction()) - // break; - // get accessed GPRs imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); - // handle accessed GPR - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - bool isWrite = (t == 3); + gprTracking.ForEachAccessedGPR([&](IMLReg gprId, bool isWritten) { // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); + PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT - if ((sint32)index < vGPR2Subrange[virtualRegister]->start.index) - assert_dbg(); - if ((sint32)index + 1 > vGPR2Subrange[virtualRegister]->end.index) - assert_dbg(); + if ((sint32)index < vGPR2Subrange[gprId]->start.index) + assert_dbg(); + if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index) + assert_dbg(); #endif - } - // next instruction + }); index++; } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index ed8bee87..63fb5f72 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -167,13 +167,11 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr(); uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr(); if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0) { - if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr) { delete ppcRecFunc; @@ -188,11 +186,16 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return nullptr; } - //if (ppcRecFunc->ppcAddress == 0x12345678) + //if (ppcRecFunc->ppcAddress == 0x11223344) //{ - // debug_printf("----------------------------------------\n"); - // IMLDebug_Dump(&ppcImlGenContext); - // __debugbreak(); + // //debug_printf("----------------------------------------\n"); + // //IMLDebug_Dump(&ppcImlGenContext); + // //__debugbreak(); + //} + //else + //{ + // delete ppcRecFunc; + // return nullptr; //} // Large functions for testing (botw): diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index bd2c02d8..5a4484da 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -109,6 +109,14 @@ struct ppcImlGenContext_t segmentList2.insert(segmentList2.begin() + index, 1, newSeg); return newSeg; } + + std::span InsertSegments(size_t index, size_t count) + { + segmentList2.insert(segmentList2.begin() + index, count, {}); + for (size_t i = index; i < (index + count); i++) + segmentList2[i] = new IMLSegment(); + return { segmentList2.data() + index, count}; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 024b6b86..f7492e59 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -179,6 +179,39 @@ void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContex imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } + +// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards +template +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, F1n genSegmentBranchTaken, F2n genSegmentBranchNotTaken) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 3); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segBranchTaken = segments[1]; + IMLSegment* segMerge = segments[2]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segBranchTaken); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchTaken->SetLinkBranchNotTaken(segMerge); + segBranchNotTaken->SetLinkBranchTaken(segMerge); + // generate code for branch taken segment + ppcImlGenContext.currentOutputSegment = segBranchTaken; + genSegmentBranchTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchTaken); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + ppcImlGenContext.emitInst().make_jump_new(); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; +} + uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { if( mappedName == PPCREC_NAME_NONE ) @@ -782,96 +815,24 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode return true; } -bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; -> Update carry - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - -bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca + -1; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); - return true; -} - bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); //hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; - if( rA != 0 ) + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, imm); } else { // rA not used, instruction is value assignment // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, imm); } // never updates any cr @@ -883,48 +844,93 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm); - if( rA != 0 ) + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); } else { // rA not used, instruction turns into simple value assignment // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); } // never updates any cr return true; } -bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 rD, rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); - // never updates any cr + // r = a + b -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); return true; } -bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool updateCR0) { - // this opcode is identical to ADDIC but additionally it updates CR0 sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa); + if(updateCR0) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); + return true; +} + +bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + b + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + 0xFFFFFFFF + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regRD); return true; } @@ -932,71 +938,80 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + // rD = ~rA + rB + 1 + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - if( rB != 0 ) - debugBreakpoint(); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa); if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + 1; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB); - if (opcode & PPC_OPC_RC) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRD); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1 + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0Logical(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + imm + 1 sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 a = hCPU->gpr[rA]; - //hCPU->gpr[rD] = ~a + imm + 1; - // cr0 is never affected - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); + IMLReg regD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regCa = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa); + // never affects CR0 return true; } @@ -1102,7 +1117,7 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc else if( SH == (32-MB) && ME == 31 ) { // SRWI - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, registerRA, registerRS, MB); } else { @@ -1152,14 +1167,45 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // unlike SRAWI, for SRAW the shift range is 0-63 (6 bits) + // but only shifts up to register bitwidth-1 are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); - if ((opcode & PPC_OPC_RC)) - PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); + uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + + uint32 registerTmpShiftAmount = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + uint32 registerTmpCondBool = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + uint32 registerTmp1 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + uint32 registerTmp2 = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + + // load masked shift factor into temporary register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmpShiftAmount, registerRB, 0x3F); + ppcImlGenContext->emitInst().make_compare_s32(registerTmpShiftAmount, 32, registerTmpCondBool, IMLCondition::UNSIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump_new(registerTmpCondBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken */ + genCtx.emitInst().make_r_r_r(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, registerTmpShiftAmount); + genCtx.emitInst().make_compare_s32(registerRA, 0, registerCarry, IMLCondition::NEQ); // if the sign bit is still set it also means it was shifted out and we can set carry + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, shift size below 32 */ + genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp1, registerRS, 31); // signMask = input >> 31 (arithmetic shift) + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerTmp2, 1); // shiftMask = ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_SRAW, registerRA, registerRS, (sint32)SH); + if (SH == 0) + return false; // becomes a no-op but also sets ca bit to 0? + uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS, false); + uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA); + uint32 registerCarry = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + uint32 registerTmp = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // calculate CA first + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerTmp, registerRS, 31); // signMask = input >> 31 (arithmetic shift) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, registerTmp, registerTmp, registerRS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, registerTmp, registerTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_compare_s32(registerTmp, 0, registerCarry, IMLCondition::NEQ); // ca = (testValue != 0) + // do the actual shift + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, registerRA, registerRS, (sint32)SH); + if ((opcode & PPC_OPC_RC)) PPCImlGen_UpdateCR0Logical(ppcImlGenContext, registerRA); return true; @@ -1999,7 +2056,7 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpReg, dataRegister); sint32 shiftAmount = (3 - b) * 8; if (shiftAmount) - ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT, tmpReg, tmpReg, shiftAmount); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, tmpReg, tmpReg, shiftAmount); ppcImlGenContext->emitInst().make_memory_r(tmpReg, memReg, memOffset + b, 8, false); nb--; if (nb == 0) @@ -2791,7 +2848,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; break; case 10: // CMPLI PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); @@ -2800,11 +2858,11 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); break; case 12: // ADDIC - if (PPCRecompilerImlGen_ADDIC(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; break; case 13: // ADDIC. - if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; break; case 14: // ADDI @@ -4010,36 +4068,6 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) } - - // insert cycle counter instruction in every segment that has a cycle count greater zero - //for (IMLSegment* segIt : ppcImlGenContext.segmentList2) - //{ - // if( segIt->ppcAddrMin == 0 ) - // continue; - // // count number of PPC instructions in segment - // // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions - // uint32 lastPPCInstAddr = 0; - // uint32 ppcCount2 = 0; - // for (sint32 i = 0; i < segIt->imlList.size(); i++) - // { - // if (segIt->imlList[i].associatedPPCAddress == 0) - // continue; - // if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) - // continue; - // lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; - // ppcCount2++; - // } - // //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - // uint32 cycleCount = ppcCount2;// ppcCount / 4; - // if( cycleCount > 0 ) - // { - // PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); - // segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; - // segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - // segIt->imlList[0].op_macro.param = cycleCount; - // } - //} return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index c7f91ac2..95cfd176 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -49,7 +49,7 @@ void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = 0) +void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) { // store to memory IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);