mirror of
https://github.com/zhaobot/yuzu.git
synced 2025-01-12 23:32:29 -03:00
shader: Implement VMAD, VMNMX, VSETP
This commit is contained in:
parent
0e1b213fa7
commit
51475e21ba
9 changed files with 319 additions and 23 deletions
|
@ -127,6 +127,11 @@ add_library(shader_recompiler STATIC
|
|||
frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
|
||||
frontend/maxwell/translate/impl/texture_gather.cpp
|
||||
frontend/maxwell/translate/impl/texture_query.cpp
|
||||
frontend/maxwell/translate/impl/video_helper.cpp
|
||||
frontend/maxwell/translate/impl/video_helper.h
|
||||
frontend/maxwell/translate/impl/video_minimum_maximum.cpp
|
||||
frontend/maxwell/translate/impl/video_multiply_add.cpp
|
||||
frontend/maxwell/translate/impl/video_set_predicate.cpp
|
||||
frontend/maxwell/translate/impl/vote.cpp
|
||||
frontend/maxwell/translate/impl/warp_shuffle.cpp
|
||||
frontend/maxwell/translate/translate.cpp
|
||||
|
|
|
@ -1121,6 +1121,10 @@ U32 IREmitter::UMin(const U32& a, const U32& b) {
|
|||
return Inst<U32>(Opcode::UMin32, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
|
||||
return is_signed ? SMin(a, b) : UMin(a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::SMax(const U32& a, const U32& b) {
|
||||
return Inst<U32>(Opcode::SMax32, a, b);
|
||||
}
|
||||
|
@ -1129,6 +1133,10 @@ U32 IREmitter::UMax(const U32& a, const U32& b) {
|
|||
return Inst<U32>(Opcode::UMax32, a, b);
|
||||
}
|
||||
|
||||
U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
|
||||
return is_signed ? SMax(a, b) : UMax(a, b);
|
||||
}
|
||||
|
||||
U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
|
||||
return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
|
||||
}
|
||||
|
@ -1267,11 +1275,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
|
|||
}
|
||||
|
||||
U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
|
||||
if (is_signed) {
|
||||
return ConvertFToS(bitsize, value);
|
||||
} else {
|
||||
return ConvertFToU(bitsize, value);
|
||||
}
|
||||
return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
|
||||
}
|
||||
|
||||
F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) {
|
||||
|
@ -1360,11 +1364,8 @@ F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const
|
|||
|
||||
F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
|
||||
const Value& value) {
|
||||
if (is_signed) {
|
||||
return ConvertSToF(dest_bitsize, src_bitsize, value);
|
||||
} else {
|
||||
return ConvertUToF(dest_bitsize, src_bitsize, value);
|
||||
}
|
||||
return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value)
|
||||
: ConvertUToF(dest_bitsize, src_bitsize, value);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
|
||||
|
|
|
@ -196,8 +196,10 @@ public:
|
|||
[[nodiscard]] U32 FindUMsb(const U32& value);
|
||||
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
|
||||
[[nodiscard]] U32 SMax(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 UMax(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
|
||||
|
||||
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
|
||||
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
|
||||
|
|
|
@ -385,14 +385,6 @@ void TranslatorVisitor::VADD(u64) {
|
|||
ThrowNotImplemented(Opcode::VADD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VMAD(u64) {
|
||||
ThrowNotImplemented(Opcode::VMAD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VMNMX(u64) {
|
||||
ThrowNotImplemented(Opcode::VMNMX);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VOTE_vtg(u64) {
|
||||
ThrowNotImplemented(Opcode::VOTE_vtg);
|
||||
}
|
||||
|
@ -400,11 +392,6 @@ void TranslatorVisitor::VOTE_vtg(u64) {
|
|||
void TranslatorVisitor::VSET(u64) {
|
||||
ThrowNotImplemented(Opcode::VSET);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VSETP(u64) {
|
||||
ThrowNotImplemented(Opcode::VSETP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VSHL(u64) {
|
||||
ThrowNotImplemented(Opcode::VSHL);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
|
||||
u32 selector, bool is_signed) {
|
||||
switch (width) {
|
||||
case VideoWidth::Byte:
|
||||
case VideoWidth::Unknown:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
|
||||
case VideoWidth::Short:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
|
||||
case VideoWidth::Word:
|
||||
return value;
|
||||
default:
|
||||
throw NotImplementedException("Unknown VideoWidth {}", width);
|
||||
}
|
||||
}
|
||||
|
||||
VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
|
||||
// immediates must be 16-bit format.
|
||||
return is_immediate ? VideoWidth::Short : width;
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
enum class VideoWidth : u64 {
|
||||
Byte,
|
||||
Unknown,
|
||||
Short,
|
||||
Word,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
|
||||
VideoWidth width, u32 selector, bool is_signed);
|
||||
|
||||
[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VideoMinMaxOps : u64 {
|
||||
MRG_16H,
|
||||
MRG_16L,
|
||||
MRG_8B0,
|
||||
MRG_8B2,
|
||||
ACC,
|
||||
MIN,
|
||||
MAX,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
|
||||
VideoMinMaxOps op, bool is_signed) {
|
||||
switch (op) {
|
||||
case VideoMinMaxOps::MIN:
|
||||
return ir.IMin(lhs, rhs, is_signed);
|
||||
case VideoMinMaxOps::MAX:
|
||||
return ir.IMax(lhs, rhs, is_signed);
|
||||
default:
|
||||
throw NotImplementedException("VMNMX op {}", op);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VMNMX(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
BitField<51, 3, VideoMinMaxOps> op;
|
||||
BitField<54, 1, u64> dest_sign;
|
||||
BitField<55, 1, u64> sat;
|
||||
BitField<56, 1, u64> mx;
|
||||
} const vmnmx{insn};
|
||||
|
||||
if (vmnmx.cc != 0) {
|
||||
throw NotImplementedException("VMNMX CC");
|
||||
}
|
||||
if (vmnmx.sat != 0) {
|
||||
throw NotImplementedException("VMNMX SAT");
|
||||
}
|
||||
// Selectors were shown to default to 2 in unit tests
|
||||
if (vmnmx.src_a_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_b_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_a_width != VideoWidth::Word) {
|
||||
throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
|
||||
}
|
||||
|
||||
const bool is_b_imm{vmnmx.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
|
||||
const IR::U32 src_c{GetReg39(insn)};
|
||||
|
||||
const VideoWidth a_width{vmnmx.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vmnmx.src_a_sign != 0};
|
||||
const bool src_b_signed{vmnmx.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
|
||||
|
||||
// First operation's sign is only dependent on operand b's sign
|
||||
const bool op_1_signed{src_b_signed};
|
||||
|
||||
const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
|
||||
: ir.IMin(op_a, op_b, op_1_signed)};
|
||||
X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::VMAD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
BitField<51, 2, u64> scale;
|
||||
BitField<53, 1, u64> src_c_neg;
|
||||
BitField<54, 1, u64> src_a_neg;
|
||||
BitField<55, 1, u64> sat;
|
||||
} const vmad{insn};
|
||||
|
||||
if (vmad.cc != 0) {
|
||||
throw NotImplementedException("VMAD CC");
|
||||
}
|
||||
if (vmad.sat != 0) {
|
||||
throw NotImplementedException("VMAD SAT");
|
||||
}
|
||||
if (vmad.scale != 0) {
|
||||
throw NotImplementedException("VMAD SCALE");
|
||||
}
|
||||
if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
|
||||
throw NotImplementedException("VMAD PO");
|
||||
}
|
||||
if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
|
||||
throw NotImplementedException("VMAD NEG");
|
||||
}
|
||||
const bool is_b_imm{vmad.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
|
||||
const IR::U32 src_c{GetReg39(insn)};
|
||||
|
||||
const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
|
||||
// Immediate values can't have a selector
|
||||
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
|
||||
const VideoWidth a_width{vmad.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vmad.src_a_sign != 0};
|
||||
const bool src_b_signed{vmad.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
|
||||
|
||||
X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VsetpCompareOp : u64 {
|
||||
False = 0,
|
||||
LessThan,
|
||||
Equal,
|
||||
LessThanEqual,
|
||||
GreaterThan = 16,
|
||||
NotEqual,
|
||||
GreaterThanEqual,
|
||||
True,
|
||||
};
|
||||
|
||||
CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
|
||||
switch (op) {
|
||||
case VsetpCompareOp::False:
|
||||
return CompareOp::False;
|
||||
case VsetpCompareOp::LessThan:
|
||||
return CompareOp::LessThan;
|
||||
case VsetpCompareOp::Equal:
|
||||
return CompareOp::Equal;
|
||||
case VsetpCompareOp::LessThanEqual:
|
||||
return CompareOp::LessThanEqual;
|
||||
case VsetpCompareOp::GreaterThan:
|
||||
return CompareOp::GreaterThan;
|
||||
case VsetpCompareOp::NotEqual:
|
||||
return CompareOp::NotEqual;
|
||||
case VsetpCompareOp::GreaterThanEqual:
|
||||
return CompareOp::GreaterThanEqual;
|
||||
case VsetpCompareOp::True:
|
||||
return CompareOp::True;
|
||||
default:
|
||||
throw NotImplementedException("Invalid compare op {}", op);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VSETP(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<43, 5, VsetpCompareOp> compare_op;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
} const vsetp{insn};
|
||||
|
||||
const bool is_b_imm{vsetp.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
|
||||
|
||||
const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
|
||||
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)};
|
||||
const VideoWidth a_width{vsetp.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vsetp.src_a_sign != 0};
|
||||
const bool src_b_signed{vsetp.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)};
|
||||
|
||||
// Compare operation's sign is only dependent on operand b's sign
|
||||
const bool compare_signed{src_b_signed};
|
||||
const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
|
||||
const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
|
||||
const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
|
||||
const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
|
||||
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
|
||||
ir.SetPred(vsetp.dest_pred_a, result_a);
|
||||
ir.SetPred(vsetp.dest_pred_b, result_b);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
Loading…
Reference in a new issue