From dd91650aaf217196a2b1ced17df24bd74349843d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 23 Dec 2018 02:26:35 -0300 Subject: [PATCH] shader_decode: Implement HFMA2 --- src/video_core/engines/shader_bytecode.h | 1 + src/video_core/shader/decode/hfma2.cpp | 54 ++++++++++++++++++++++- src/video_core/shader/glsl_decompiler.cpp | 9 ++-- src/video_core/shader/shader_ir.h | 1 + 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c4987b682..9cb23f375 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -648,6 +648,7 @@ union Instruction { BitField<37, 2, HalfPrecision> precision; BitField<32, 1, u64> saturate; + BitField<31, 1, u64> negate_b; BitField<30, 1, u64> negate_c; BitField<35, 2, HalfType> type_c; } rr; diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5ce08481e..bf7491804 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <tuple> + #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" @@ -9,6 +11,8 @@ namespace VideoCommon::Shader { +using Tegra::Shader::HalfPrecision; +using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED(); + if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { + UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + } else { + UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + } + + constexpr auto identity = HalfType::H0_H1; + + const HalfType type_a = instr.hfma2.type_a; + const Node op_a = GetRegister(instr.gpr8); + + bool neg_b{}, neg_c{}; + auto [saturate, type_b, op_b, type_c, + op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { + switch (opcode->get().GetId()) { + case OpCode::Id::HFMA2_CR: + neg_b = instr.hfma2.negate_b; + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, instr.hfma2.type_b, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, + GetRegister(instr.gpr39)}; + case OpCode::Id::HFMA2_RC: + neg_b = instr.hfma2.negate_b; + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), + instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + case OpCode::Id::HFMA2_RR: + neg_b = instr.hfma2.rr.negate_b; + neg_c = instr.hfma2.rr.negate_c; + return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), + instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; + case OpCode::Id::HFMA2_IMM_R: + neg_c = instr.hfma2.negate_c; + return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), + instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; + default: + return {false, identity, Immediate(0), identity, Immediate(0)}; + } + }(); + UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); + + op_b = GetOperandAbsNegHalf(op_b, false, neg_b); + op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + + MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; + Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); + + SetRegister(bb, instr.gpr0, value); return pc; } diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp index abc9a556d..c364a43ce 100644 --- a/src/video_core/shader/glsl_decompiler.cpp +++ b/src/video_core/shader/glsl_decompiler.cpp @@ -762,9 +762,9 @@ private: return GenerateBinaryInfix(operation, "/", type, type, type); } - std::string FFma(Operation operation) { - return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float, - Type::Float); + template <Type type> + std::string Fma(Operation operation) { + return GenerateTernary(operation, "fma", type, type, type, type); } template <Type type> @@ -1231,7 +1231,7 @@ private: &Add<Type::Float>, &Mul<Type::Float>, &Div<Type::Float>, - &FFma, + &Fma<Type::Float>, &Negate<Type::Float>, &Absolute<Type::Float>, &FClamp, @@ -1289,6 +1289,7 @@ private: &Add<Type::HalfFloat>, &Mul<Type::HalfFloat>, + &Fma<Type::HalfFloat>, &Absolute<Type::HalfFloat>, &HNegate, &HMergeF32, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ccdf316ac..928e3e7d5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -110,6 +110,7 @@ enum class OperationCode { HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float