mirror of
https://github.com/zhaobot/yuzu.git
synced 2025-01-27 15:17:03 -03:00
shader: Fix F2I
This commit is contained in:
parent
68a9505d8a
commit
8cb9443cb9
10 changed files with 147 additions and 9 deletions
|
@ -189,6 +189,9 @@ Id EmitFPSqrt(EmitContext& ctx, Id value);
|
||||||
Id EmitFPSaturate16(EmitContext& ctx, Id value);
|
Id EmitFPSaturate16(EmitContext& ctx, Id value);
|
||||||
Id EmitFPSaturate32(EmitContext& ctx, Id value);
|
Id EmitFPSaturate32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPSaturate64(EmitContext& ctx, Id value);
|
Id EmitFPSaturate64(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||||
|
Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||||
|
Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
|
||||||
Id EmitFPRoundEven16(EmitContext& ctx, Id value);
|
Id EmitFPRoundEven16(EmitContext& ctx, Id value);
|
||||||
Id EmitFPRoundEven32(EmitContext& ctx, Id value);
|
Id EmitFPRoundEven32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPRoundEven64(EmitContext& ctx, Id value);
|
Id EmitFPRoundEven64(EmitContext& ctx, Id value);
|
||||||
|
|
|
@ -15,7 +15,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
|
Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
|
||||||
if (ctx.profile.has_broken_spirv_clamp) {
|
if (ctx.profile.has_broken_spirv_clamp) {
|
||||||
return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
|
return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
|
||||||
} else {
|
} else {
|
||||||
|
@ -139,19 +139,31 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) {
|
||||||
Id EmitFPSaturate16(EmitContext& ctx, Id value) {
|
Id EmitFPSaturate16(EmitContext& ctx, Id value) {
|
||||||
const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
|
const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
|
||||||
const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
|
const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
|
||||||
return Saturate(ctx, ctx.F16[1], value, zero, one);
|
return Clamp(ctx, ctx.F16[1], value, zero, one);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitFPSaturate32(EmitContext& ctx, Id value) {
|
Id EmitFPSaturate32(EmitContext& ctx, Id value) {
|
||||||
const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
|
const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
|
||||||
const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
|
const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
|
||||||
return Saturate(ctx, ctx.F32[1], value, zero, one);
|
return Clamp(ctx, ctx.F32[1], value, zero, one);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitFPSaturate64(EmitContext& ctx, Id value) {
|
Id EmitFPSaturate64(EmitContext& ctx, Id value) {
|
||||||
const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
|
const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
|
||||||
const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
|
const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
|
||||||
return Saturate(ctx, ctx.F64[1], value, zero, one);
|
return Clamp(ctx, ctx.F64[1], value, zero, one);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
|
||||||
|
return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
|
||||||
|
return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
|
||||||
|
return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
|
Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
|
||||||
|
|
|
@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
|
||||||
|
const F16F32F64& max_value) {
|
||||||
|
if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
|
||||||
|
throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
|
||||||
|
max_value.Type());
|
||||||
|
}
|
||||||
|
switch (value.Type()) {
|
||||||
|
case Type::F16:
|
||||||
|
return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
|
F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
|
||||||
switch (value.Type()) {
|
switch (value.Type()) {
|
||||||
case Type::F16:
|
case Type::F16:
|
||||||
|
|
|
@ -147,6 +147,7 @@ public:
|
||||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||||
[[nodiscard]] F32 FPSqrt(const F32& value);
|
[[nodiscard]] F32 FPSqrt(const F32& value);
|
||||||
[[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
|
[[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
|
||||||
|
[[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value);
|
||||||
[[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
|
[[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
|
||||||
[[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
|
[[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
|
||||||
[[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
|
[[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
|
||||||
|
|
|
@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32,
|
||||||
OPCODE(FPSaturate16, F16, F16, )
|
OPCODE(FPSaturate16, F16, F16, )
|
||||||
OPCODE(FPSaturate32, F32, F32, )
|
OPCODE(FPSaturate32, F32, F32, )
|
||||||
OPCODE(FPSaturate64, F64, F64, )
|
OPCODE(FPSaturate64, F64, F64, )
|
||||||
|
OPCODE(FPClamp16, F16, F16, F16, F16, )
|
||||||
|
OPCODE(FPClamp32, F32, F32, F32, F32, )
|
||||||
|
OPCODE(FPClamp64, F64, F64, F64, F64, )
|
||||||
OPCODE(FPRoundEven16, F16, F16, )
|
OPCODE(FPRoundEven16, F16, F16, )
|
||||||
OPCODE(FPRoundEven32, F32, F32, )
|
OPCODE(FPRoundEven32, F32, F32, )
|
||||||
OPCODE(FPRoundEven64, F64, F64, )
|
OPCODE(FPRoundEven64, F64, F64, )
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "shader_recompiler/exception.h"
|
#include "shader_recompiler/exception.h"
|
||||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||||
|
@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
|
||||||
|
if (is_signed) {
|
||||||
|
switch (format) {
|
||||||
|
case DestFormat::I16:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<s16>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<s16>::min())};
|
||||||
|
case DestFormat::I32:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<s32>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<s32>::min())};
|
||||||
|
case DestFormat::I64:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<s64>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<s64>::min())};
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (format) {
|
||||||
|
case DestFormat::I16:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<u16>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<u16>::min())};
|
||||||
|
case DestFormat::I32:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<u32>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<u32>::min())};
|
||||||
|
case DestFormat::I64:
|
||||||
|
return {static_cast<f64>(std::numeric_limits<u64>::max()),
|
||||||
|
static_cast<f64>(std::numeric_limits<u64>::min())};
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw NotImplementedException("Invalid destination format {}", format);
|
||||||
|
}
|
||||||
|
|
||||||
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
|
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
|
@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||||
// For example converting F32 65537.0 to U16, the expected value is 0xffff,
|
// For example converting F32 65537.0 to U16, the expected value is 0xffff,
|
||||||
|
|
||||||
const bool is_signed{f2i.is_signed != 0};
|
const bool is_signed{f2i.is_signed != 0};
|
||||||
const size_t bitsize{BitSize(f2i.dest_format)};
|
const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
|
||||||
const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)};
|
|
||||||
|
IR::F16F32F64 intermediate;
|
||||||
|
switch (f2i.src_format) {
|
||||||
|
case SrcFormat::F16: {
|
||||||
|
const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
|
||||||
|
const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
|
||||||
|
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SrcFormat::F32: {
|
||||||
|
const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
|
||||||
|
const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
|
||||||
|
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SrcFormat::F64: {
|
||||||
|
const IR::F64 max_val{v.ir.Imm64(max_bound)};
|
||||||
|
const IR::F64 min_val{v.ir.Imm64(min_bound)};
|
||||||
|
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
|
||||||
|
IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
|
||||||
|
|
||||||
|
bool handled_special_case = false;
|
||||||
|
const bool special_nan_cases =
|
||||||
|
(f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
|
||||||
|
if (special_nan_cases) {
|
||||||
|
if (f2i.dest_format == DestFormat::I32) {
|
||||||
|
handled_special_case = true;
|
||||||
|
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
|
||||||
|
} else if (f2i.dest_format == DestFormat::I64) {
|
||||||
|
handled_special_case = true;
|
||||||
|
result = IR::U64{
|
||||||
|
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!handled_special_case && is_signed) {
|
||||||
|
if (bitsize != 64) {
|
||||||
|
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
|
||||||
|
} else {
|
||||||
|
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (bitsize == 64) {
|
if (bitsize == 64) {
|
||||||
const IR::Value vector{v.ir.UnpackUint2x32(result)};
|
v.L(f2i.dest_reg, result);
|
||||||
v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)});
|
|
||||||
v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)});
|
|
||||||
} else {
|
} else {
|
||||||
v.X(f2i.dest_reg, result);
|
v.X(f2i.dest_reg, result);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) {
|
||||||
return ir.GetReg(reg);
|
return ir.GetReg(reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::U64 TranslatorVisitor::L(IR::Reg reg) {
|
||||||
|
if (!IR::IsAligned(reg, 2)) {
|
||||||
|
throw NotImplementedException("Unaligned source register {}", reg);
|
||||||
|
}
|
||||||
|
return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
|
||||||
|
}
|
||||||
|
|
||||||
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
|
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
|
||||||
return ir.BitCast<IR::F32>(X(reg));
|
return ir.BitCast<IR::F32>(X(reg));
|
||||||
}
|
}
|
||||||
|
@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
|
||||||
ir.SetReg(dest_reg, value);
|
ir.SetReg(dest_reg, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
|
||||||
|
if (!IR::IsAligned(dest_reg, 2)) {
|
||||||
|
throw NotImplementedException("Unaligned destination register {}", dest_reg);
|
||||||
|
}
|
||||||
|
const IR::Value result{ir.UnpackUint2x32(value)};
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
|
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
|
||||||
X(dest_reg, ir.BitCast<IR::U32>(value));
|
X(dest_reg, ir.BitCast<IR::U32>(value));
|
||||||
}
|
}
|
||||||
|
|
|
@ -341,10 +341,12 @@ public:
|
||||||
void XMAD_imm(u64 insn);
|
void XMAD_imm(u64 insn);
|
||||||
|
|
||||||
[[nodiscard]] IR::U32 X(IR::Reg reg);
|
[[nodiscard]] IR::U32 X(IR::Reg reg);
|
||||||
|
[[nodiscard]] IR::U64 L(IR::Reg reg);
|
||||||
[[nodiscard]] IR::F32 F(IR::Reg reg);
|
[[nodiscard]] IR::F32 F(IR::Reg reg);
|
||||||
[[nodiscard]] IR::F64 D(IR::Reg reg);
|
[[nodiscard]] IR::F64 D(IR::Reg reg);
|
||||||
|
|
||||||
void X(IR::Reg dest_reg, const IR::U32& value);
|
void X(IR::Reg dest_reg, const IR::U32& value);
|
||||||
|
void L(IR::Reg dest_reg, const IR::U64& value);
|
||||||
void F(IR::Reg dest_reg, const IR::F32& value);
|
void F(IR::Reg dest_reg, const IR::F32& value);
|
||||||
void D(IR::Reg dest_reg, const IR::F64& value);
|
void D(IR::Reg dest_reg, const IR::F64& value);
|
||||||
|
|
||||||
|
|
|
@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
case IR::Opcode::FPNeg16:
|
case IR::Opcode::FPNeg16:
|
||||||
case IR::Opcode::FPRoundEven16:
|
case IR::Opcode::FPRoundEven16:
|
||||||
case IR::Opcode::FPSaturate16:
|
case IR::Opcode::FPSaturate16:
|
||||||
|
case IR::Opcode::FPClamp16:
|
||||||
case IR::Opcode::FPTrunc16:
|
case IR::Opcode::FPTrunc16:
|
||||||
case IR::Opcode::FPOrdEqual16:
|
case IR::Opcode::FPOrdEqual16:
|
||||||
case IR::Opcode::FPUnordEqual16:
|
case IR::Opcode::FPUnordEqual16:
|
||||||
|
@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||||
case IR::Opcode::FPRecipSqrt64:
|
case IR::Opcode::FPRecipSqrt64:
|
||||||
case IR::Opcode::FPRoundEven64:
|
case IR::Opcode::FPRoundEven64:
|
||||||
case IR::Opcode::FPSaturate64:
|
case IR::Opcode::FPSaturate64:
|
||||||
|
case IR::Opcode::FPClamp64:
|
||||||
case IR::Opcode::FPTrunc64:
|
case IR::Opcode::FPTrunc64:
|
||||||
case IR::Opcode::FPOrdEqual64:
|
case IR::Opcode::FPOrdEqual64:
|
||||||
case IR::Opcode::FPUnordEqual64:
|
case IR::Opcode::FPUnordEqual64:
|
||||||
|
|
|
@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) {
|
||||||
return IR::Opcode::FPRoundEven32;
|
return IR::Opcode::FPRoundEven32;
|
||||||
case IR::Opcode::FPSaturate16:
|
case IR::Opcode::FPSaturate16:
|
||||||
return IR::Opcode::FPSaturate32;
|
return IR::Opcode::FPSaturate32;
|
||||||
|
case IR::Opcode::FPClamp16:
|
||||||
|
return IR::Opcode::FPClamp32;
|
||||||
case IR::Opcode::FPTrunc16:
|
case IR::Opcode::FPTrunc16:
|
||||||
return IR::Opcode::FPTrunc32;
|
return IR::Opcode::FPTrunc32;
|
||||||
case IR::Opcode::CompositeConstructF16x2:
|
case IR::Opcode::CompositeConstructF16x2:
|
||||||
|
|
Loading…
Add table
Reference in a new issue