From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 6 Apr 2021 02:56:15 -0300
Subject: [PATCH] shader: Implement texture buffers

---
 .../backend/spirv/emit_context.cpp            | 29 ++++++
 .../backend/spirv/emit_context.h              |  5 ++
 .../backend/spirv/emit_spirv.cpp              |  1 +
 .../backend/spirv/emit_spirv_image.cpp        | 24 +++--
 src/shader_recompiler/ir_opt/texture_pass.cpp | 88 ++++++++++++++-----
 src/shader_recompiler/shader_info.h           |  9 ++
 .../renderer_vulkan/pipeline_helper.h         | 10 +++
 .../renderer_vulkan/vk_compute_pipeline.cpp   | 15 ++--
 .../renderer_vulkan/vk_graphics_pipeline.cpp  | 14 +--
 .../renderer_vulkan/vk_pipeline_cache.cpp     |  2 +-
 10 files changed, 158 insertions(+), 39 deletions(-)

diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d39ea373..d01633628 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -46,6 +46,8 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
         return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format);
     case TextureType::ShadowArrayCube:
         return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format);
+    case TextureType::Buffer:
+        break;
     }
     throw InvalidArgument("Invalid texture type {}", desc.type);
 }
@@ -129,6 +131,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
     DefineConstantBuffers(program.info, binding);
     DefineStorageBuffers(program.info, binding);
     DefineTextures(program.info, binding);
+    DefineTextureBuffers(program.info, binding);
     DefineAttributeMemAccess(program.info);
     DefineLabels(program);
 }
@@ -541,6 +544,32 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
     }
 }
 
+void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
+    if (info.texture_buffer_descriptors.empty()) {
+        return;
+    }
+    const spv::ImageFormat format{spv::ImageFormat::Unknown};
+    image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
+    sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
+
+    const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
+    texture_buffers.reserve(info.texture_buffer_descriptors.size());
+    for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Array of texture buffers");
+        }
+        const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset));
+        texture_buffers.insert(texture_buffers.end(), desc.count, id);
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
+        binding += desc.count;
+    }
+}
+
 void EmitContext::DefineLabels(IR::Program& program) {
     for (IR::Block* const block : program.blocks) {
         block->SetDefinition(OpLabel());
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 7a2ac0511..2a10e94e5 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -90,9 +90,13 @@ public:
 
     Id storage_u32{};
 
+    Id image_buffer_type{};
+    Id sampled_texture_buffer_type{};
+
     std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
     std::array<Id, Info::MAX_SSBOS> ssbos{};
     std::vector<TextureDefinition> textures;
+    std::vector<Id> texture_buffers;
 
     Id workgroup_id{};
     Id local_invocation_id{};
@@ -151,6 +155,7 @@ private:
     void DefineConstantBuffers(const Info& info, u32& binding);
     void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineTextures(const Info& info, u32& binding);
+    void DefineTextureBuffers(const Info& info, u32& binding);
     void DefineAttributeMemAccess(const Info& info);
     void DefineLabels(IR::Program& program);
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 191380db0..32512a0e5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -249,6 +249,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
     // TODO: Track this usage
     ctx.AddCapability(spv::Capability::ImageGatherExtended);
     ctx.AddCapability(spv::Capability::ImageQuery);
+    ctx.AddCapability(spv::Capability::SampledBuffer);
 }
 
 Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index fc40615af..525f67c6e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -128,12 +128,18 @@ Id Texture(EmitContext& ctx, const IR::Value& index) {
     throw NotImplementedException("Indirect texture sample");
 }
 
-Id TextureImage(EmitContext& ctx, const IR::Value& index) {
-    if (index.IsImmediate()) {
+Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Indirect texture sample");
+    }
+    if (info.type == TextureType::Buffer) {
+        const Id sampler_id{ctx.texture_buffers.at(index.U32())};
+        const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
+        return ctx.OpImage(ctx.image_buffer_type, id);
+    } else {
         const TextureDefinition def{ctx.textures.at(index.U32())};
         return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
     }
-    throw NotImplementedException("Indirect texture sample");
 }
 
 Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
@@ -297,17 +303,22 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
                 ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
 }
 
+#pragma optimize("", off)
+
 Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
                   Id lod, Id ms) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (info.type == TextureType::Buffer) {
+        lod = Id{};
+    }
     const ImageOperands operands(offset, lod, ms);
     return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
-                TextureImage(ctx, index), coords, operands.Mask(), operands.Span());
+                TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span());
 }
 
 Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
-    const Id image{TextureImage(ctx, index)};
+    const Id image{TextureImage(ctx, index, info)};
     const Id zero{ctx.u32_zero_value};
     const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
     switch (info.type) {
@@ -331,6 +342,9 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
     case TextureType::ShadowArrayCube:
         return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
                                         mips());
+    case TextureType::Buffer:
+        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
+                                        zero, mips());
     }
     throw LogicError("Unspecified image type {}", info.type.Value());
 }
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index bcb94ce4d..290ce4179 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -147,24 +147,39 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
 
 class Descriptors {
 public:
-    explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {}
+    explicit Descriptors(TextureDescriptors& texture_descriptors_,
+                         TextureBufferDescriptors& texture_buffer_descriptors_)
+        : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{
+                                                         texture_buffer_descriptors_} {}
 
-    u32 Add(const TextureDescriptor& descriptor) {
-        // TODO: Handle arrays
-        auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) {
-            return descriptor.cbuf_index == existing.cbuf_index &&
-                   descriptor.cbuf_offset == existing.cbuf_offset &&
-                   descriptor.type == existing.type;
-        })};
-        if (it != descriptors.end()) {
-            return static_cast<u32>(std::distance(descriptors.begin(), it));
-        }
-        descriptors.push_back(descriptor);
-        return static_cast<u32>(descriptors.size()) - 1;
+    u32 Add(const TextureDescriptor& desc) {
+        return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type;
+        });
+    }
+
+    u32 Add(const TextureBufferDescriptor& desc) {
+        return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset;
+        });
     }
 
 private:
-    TextureDescriptors& descriptors;
+    template <typename Descriptors, typename Descriptor, typename Func>
+    static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+        // TODO: Handle arrays
+        const auto it{std::ranges::find_if(descriptors, pred)};
+        if (it != descriptors.end()) {
+            return static_cast<u32>(std::distance(descriptors.begin(), it));
+        }
+        descriptors.push_back(desc);
+        return static_cast<u32>(descriptors.size()) - 1;
+    }
+
+    TextureDescriptors& texture_descriptors;
+    TextureBufferDescriptors& texture_buffer_descriptors;
 };
 } // Anonymous namespace
 
@@ -185,7 +200,10 @@ void TexturePass(Environment& env, IR::Program& program) {
     std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
         return lhs.cbuf.index < rhs.cbuf.index;
     });
-    Descriptors descriptors{program.info.texture_descriptors};
+    Descriptors descriptors{
+        program.info.texture_descriptors,
+        program.info.texture_buffer_descriptors,
+    };
     for (TextureInst& texture_inst : to_replace) {
         // TODO: Handle arrays
         IR::Inst* const inst{texture_inst.inst};
@@ -193,16 +211,42 @@ void TexturePass(Environment& env, IR::Program& program) {
 
         const auto& cbuf{texture_inst.cbuf};
         auto flags{inst->Flags<IR::TextureInstInfo>()};
-        if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) {
+        switch (inst->Opcode()) {
+        case IR::Opcode::ImageQueryDimensions:
             flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset));
             inst->SetFlags(flags);
+            break;
+        case IR::Opcode::ImageFetch:
+            if (flags.type != TextureType::Color1D) {
+                break;
+            }
+            if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) {
+                // Replace with the bound texture type only when it's a texture buffer
+                // If the instruction is 1D and the bound type is 2D, don't change the code and let
+                // the rasterizer robustness handle it
+                // This happens on Fire Emblem: Three Houses
+                flags.type.Assign(TextureType::Buffer);
+            }
+            inst->SetFlags(flags);
+            break;
+        default:
+            break;
+        }
+        u32 index;
+        if (flags.type == TextureType::Buffer) {
+            index = descriptors.Add(TextureBufferDescriptor{
+                .cbuf_index{cbuf.index},
+                .cbuf_offset{cbuf.offset},
+                .count{1},
+            });
+        } else {
+            index = descriptors.Add(TextureDescriptor{
+                .type{flags.type},
+                .cbuf_index{cbuf.index},
+                .cbuf_offset{cbuf.offset},
+                .count{1},
+            });
         }
-        const u32 index{descriptors.Add(TextureDescriptor{
-            .type{flags.type},
-            .cbuf_index{cbuf.index},
-            .cbuf_offset{cbuf.offset},
-            .count{1},
-        })};
         inst->SetArg(0, IR::Value{index});
     }
 }
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 41bb5b9a1..e6f0de8d8 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -29,6 +29,7 @@ enum class TextureType : u32 {
     Shadow3D,
     ShadowCube,
     ShadowArrayCube,
+    Buffer,
 };
 
 enum class Interpolation {
@@ -50,6 +51,13 @@ struct TextureDescriptor {
 };
 using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
 
+struct TextureBufferDescriptor {
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+};
+using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 2>;
+
 struct ConstantBufferDescriptor {
     u32 index;
     u32 count;
@@ -112,6 +120,7 @@ struct Info {
         constant_buffer_descriptors;
     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
     TextureDescriptors texture_descriptors;
+    TextureBufferDescriptors texture_buffer_descriptors;
 };
 
 } // namespace Shader
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index eebe5d569..decf0d32c 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -93,6 +93,9 @@ public:
         for ([[maybe_unused]] const auto& desc : info.texture_descriptors) {
             Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage);
         }
+        for (const auto& desc : info.texture_buffer_descriptors) {
+            Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage);
+        }
     }
 
 private:
@@ -146,6 +149,8 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) {
     case Shader::TextureType::ColorArrayCube:
     case Shader::TextureType::ShadowArrayCube:
         return VideoCommon::ImageViewType::CubeArray;
+    case Shader::TextureType::Buffer:
+        break;
     }
     UNREACHABLE_MSG("Invalid texture type {}", type);
     return {};
@@ -161,6 +166,11 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samp
         update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
         ++index;
     }
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
+        update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
+        ++index;
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 0bb5b852d..9922cbd0f 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -93,20 +93,23 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
     const auto& launch_desc{kepler_compute.launch_description};
     const auto& cbufs{launch_desc.const_buffer_config};
     const bool via_header_index{launch_desc.linked_tsc};
-    for (const auto& desc : info.texture_descriptors) {
-        const u32 cbuf_index{desc.cbuf_index};
-        const u32 cbuf_offset{desc.cbuf_offset};
+    const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) {
         ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
-
         const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset};
         const u32 raw_handle{gpu_memory.Read<u32>(addr)};
-
-        const TextureHandle handle(raw_handle, via_header_index);
+        return TextureHandle(raw_handle, via_header_index);
+    }};
+    for (const auto& desc : info.texture_descriptors) {
+        const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)};
         image_view_indices.push_back(handle.image);
 
         Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
         samplers.push_back(sampler->Handle());
     }
+    for (const auto& desc : info.texture_buffer_descriptors) {
+        const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)};
+        image_view_indices.push_back(handle.image);
+    }
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
     texture_cache.FillComputeImageViews(indices_span, image_view_ids);
 
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index a0ef0e98b..afdd8b371 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -169,19 +169,23 @@ void GraphicsPipeline::Configure(bool is_indexed) {
             ++index;
         }
         const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
-        for (const auto& desc : info.texture_descriptors) {
-            const u32 cbuf_index{desc.cbuf_index};
-            const u32 cbuf_offset{desc.cbuf_offset};
+        const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) {
             ASSERT(cbufs[cbuf_index].enabled);
             const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset};
             const u32 raw_handle{gpu_memory.Read<u32>(addr)};
-
-            const TextureHandle handle(raw_handle, via_header_index);
+            return TextureHandle(raw_handle, via_header_index);
+        }};
+        for (const auto& desc : info.texture_descriptors) {
+            const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)};
             image_view_indices.push_back(handle.image);
 
             Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)};
             samplers.push_back(sampler->Handle());
         }
+        for (const auto& desc : info.texture_buffer_descriptors) {
+            const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)};
+            image_view_indices.push_back(handle.image);
+        }
     }
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
     buffer_cache.UpdateGraphicsBuffers(is_indexed);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 25f592b8a..23bf84a92 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -212,7 +212,7 @@ protected:
             case Tegra::Texture::TextureType::Texture2DArray:
                 return Shader::TextureType::ColorArray2D;
             case Tegra::Texture::TextureType::Texture1DBuffer:
-                throw Shader::NotImplementedException("Texture buffer");
+                return Shader::TextureType::Buffer;
             case Tegra::Texture::TextureType::TextureCubeArray:
                 return Shader::TextureType::ColorArrayCube;
             default: