gl_buffer_cache: Mark buffers as resident
Make stream buffer and cached buffers as resident and query their address. This allows us to use GPU addresses for several proprietary Nvidia extensions.
This commit is contained in:
parent
73fb3a304b
commit
32485917ba
10 changed files with 111 additions and 67 deletions
|
@ -41,7 +41,11 @@ class BufferCache {
|
||||||
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using BufferInfo = std::pair<BufferType, u64>;
|
struct BufferInfo {
|
||||||
|
BufferType handle;
|
||||||
|
u64 offset;
|
||||||
|
u64 address;
|
||||||
|
};
|
||||||
|
|
||||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||||
bool is_written = false, bool use_fast_cbuf = false) {
|
bool is_written = false, bool use_fast_cbuf = false) {
|
||||||
|
@ -50,7 +54,7 @@ public:
|
||||||
auto& memory_manager = system.GPU().MemoryManager();
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
if (!cpu_addr_opt) {
|
if (!cpu_addr_opt) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
const VAddr cpu_addr = *cpu_addr_opt;
|
const VAddr cpu_addr = *cpu_addr_opt;
|
||||||
|
|
||||||
|
@ -88,7 +92,7 @@ public:
|
||||||
Buffer* const block = GetBlock(cpu_addr, size);
|
Buffer* const block = GetBlock(cpu_addr, size);
|
||||||
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||||
if (!map) {
|
if (!map) {
|
||||||
return {GetEmptyBuffer(size), 0};
|
return GetEmptyBuffer(size);
|
||||||
}
|
}
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
map->MarkAsModified(true, GetModifiedTicks());
|
map->MarkAsModified(true, GetModifiedTicks());
|
||||||
|
@ -101,7 +105,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))};
|
return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||||
|
@ -254,13 +258,12 @@ public:
|
||||||
committed_flushes.pop_front();
|
committed_flushes.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
|
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)},
|
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
|
||||||
stream_buffer_handle{stream_buffer->Handle()} {}
|
|
||||||
|
|
||||||
~BufferCache() = default;
|
~BufferCache() = default;
|
||||||
|
|
||||||
|
@ -449,7 +452,7 @@ private:
|
||||||
|
|
||||||
buffer_ptr += size;
|
buffer_ptr += size;
|
||||||
buffer_offset += size;
|
buffer_offset += size;
|
||||||
return {stream_buffer_handle, uploaded_offset};
|
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
|
||||||
}
|
}
|
||||||
|
|
||||||
void AlignBuffer(std::size_t alignment) {
|
void AlignBuffer(std::size_t alignment) {
|
||||||
|
|
|
@ -22,21 +22,28 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||||
|
|
||||||
Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} {
|
Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||||
|
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||||
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Buffer::~Buffer() = default;
|
Buffer::~Buffer() = default;
|
||||||
|
|
||||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device, std::size_t stream_size)
|
const Device& device_, std::size_t stream_size)
|
||||||
: GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
|
: GenericBufferCache{rasterizer, system,
|
||||||
|
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
|
||||||
|
device{device_} {
|
||||||
if (!device.HasFastBufferSubData()) {
|
if (!device.HasFastBufferSubData()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||||
for (const GLuint cbuf : cbufs) {
|
for (const GLuint cbuf : cbufs) {
|
||||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||||
|
@ -48,11 +55,11 @@ OGLBufferCache::~OGLBufferCache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
return std::make_shared<Buffer>(cpu_addr, size);
|
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||||
return 0;
|
return {0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
|
@ -79,8 +86,9 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi
|
||||||
std::size_t size) {
|
std::size_t size) {
|
||||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||||
|
|
||||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||||
return {cbuf, 0};
|
return {cbuf, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -25,15 +25,20 @@ class RasterizerOpenGL;
|
||||||
|
|
||||||
class Buffer : public VideoCommon::BufferBlock {
|
class Buffer : public VideoCommon::BufferBlock {
|
||||||
public:
|
public:
|
||||||
explicit Buffer(VAddr cpu_addr, const std::size_t size);
|
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
||||||
~Buffer();
|
~Buffer();
|
||||||
|
|
||||||
GLuint Handle() const {
|
GLuint Handle() const noexcept {
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const noexcept {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
u64 gpu_address = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||||
|
@ -43,7 +48,7 @@ public:
|
||||||
const Device& device, std::size_t stream_size);
|
const Device& device, std::size_t stream_size);
|
||||||
~OGLBufferCache();
|
~OGLBufferCache();
|
||||||
|
|
||||||
GLuint GetEmptyBuffer(std::size_t) override;
|
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||||
|
|
||||||
void Acquire() noexcept {
|
void Acquire() noexcept {
|
||||||
cbuf_cursor = 0;
|
cbuf_cursor = 0;
|
||||||
|
@ -64,10 +69,13 @@ protected:
|
||||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||||
|
|
||||||
|
const Device& device;
|
||||||
|
|
||||||
std::size_t cbuf_cursor = 0;
|
std::size_t cbuf_cursor = 0;
|
||||||
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
|
|
||||||
cbufs;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -253,8 +253,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
|
glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
|
glBindVertexBuffer(static_cast<GLuint>(index), info.handle, info.offset,
|
||||||
vertex_array.stride);
|
vertex_array.stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -285,9 +285,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
const std::size_t size = CalculateIndexBufferSize();
|
const std::size_t size = CalculateIndexBufferSize();
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||||
return offset;
|
return info.offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
|
@ -643,9 +643,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
if (!device.UseAssemblyShaders()) {
|
if (!device.UseAssemblyShaders()) {
|
||||||
MaxwellUniformData ubo;
|
MaxwellUniformData ubo;
|
||||||
ubo.SetFromRegs(gpu);
|
ubo.SetFromRegs(gpu);
|
||||||
const auto [buffer, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
|
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -956,8 +956,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||||
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -970,24 +969,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
|
|
||||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||||
const GPUVAddr gpu_addr = buffer.address;
|
const GPUVAddr gpu_addr = buffer.address;
|
||||||
auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||||
|
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
UNIMPLEMENTED_IF(use_unified);
|
UNIMPLEMENTED_IF(use_unified);
|
||||||
if (offset != 0) {
|
if (info.offset != 0) {
|
||||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||||
cbuf = staging_cbuf;
|
info.handle = staging_cbuf;
|
||||||
offset = 0;
|
info.offset = 0;
|
||||||
}
|
}
|
||||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_unified) {
|
if (use_unified) {
|
||||||
glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
|
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||||
|
unified_offset, size);
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1023,9 +1023,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr gpu_addr, std::size_t size) {
|
||||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const auto [ssbo, buffer_offset] =
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
|
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1712,8 +1711,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
||||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const std::size_t size = binding.buffer_size;
|
const std::size_t size = binding.buffer_size;
|
||||||
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
|
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||||
|
static_cast<GLsizeiptr>(size));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,12 +2,13 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <deque>
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
@ -15,7 +16,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) {
|
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
|
||||||
|
: buffer_size(size) {
|
||||||
gl_buffer.Create();
|
gl_buffer.Create();
|
||||||
|
|
||||||
GLsizeiptr allocate_size = size;
|
GLsizeiptr allocate_size = size;
|
||||||
|
@ -32,6 +34,11 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage) : buff
|
||||||
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
||||||
mapped_ptr = static_cast<u8*>(
|
mapped_ptr = static_cast<u8*>(
|
||||||
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||||
|
|
||||||
|
if (device.HasVertexBufferUnifiedMemory()) {
|
||||||
|
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||||
|
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||||
|
|
|
@ -11,9 +11,11 @@
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
class OGLStreamBuffer : private NonCopyable {
|
class OGLStreamBuffer : private NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage);
|
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
|
||||||
~OGLStreamBuffer();
|
~OGLStreamBuffer();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -32,13 +34,18 @@ public:
|
||||||
return gl_buffer.handle;
|
return gl_buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLsizeiptr Size() const {
|
u64 Address() const {
|
||||||
|
return gpu_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLsizeiptr Size() const noexcept {
|
||||||
return buffer_size;
|
return buffer_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
OGLBuffer gl_buffer;
|
OGLBuffer gl_buffer;
|
||||||
|
|
||||||
|
GLuint64EXT gpu_address = 0;
|
||||||
GLintptr buffer_pos = 0;
|
GLintptr buffer_pos = 0;
|
||||||
GLsizeiptr buffer_size = 0;
|
GLsizeiptr buffer_size = 0;
|
||||||
GLintptr mapped_offset = 0;
|
GLintptr mapped_offset = 0;
|
||||||
|
|
|
@ -71,14 +71,14 @@ std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s
|
||||||
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
|
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||||
size = std::max(size, std::size_t(4));
|
size = std::max(size, std::size_t(4));
|
||||||
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
||||||
});
|
});
|
||||||
return *empty.handle;
|
return {*empty.handle, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
|
|
|
@ -33,6 +33,10 @@ public:
|
||||||
return *buffer.handle;
|
return *buffer.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
VKBuffer buffer;
|
VKBuffer buffer;
|
||||||
};
|
};
|
||||||
|
@ -44,7 +48,7 @@ public:
|
||||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
||||||
~VKBufferCache();
|
~VKBufferCache();
|
||||||
|
|
||||||
VkBuffer GetEmptyBuffer(std::size_t size) override;
|
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||||
|
|
|
@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||||
|
|
||||||
const GPUVAddr gpu_addr = binding.Address();
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
const auto size = static_cast<VkDeviceSize>(binding.buffer_size);
|
const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
|
|
||||||
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
|
||||||
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||||
});
|
});
|
||||||
|
@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||||
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
|
const auto info = buffer_cache.UploadMemory(start, size);
|
||||||
buffer_bindings.AddVertexBinding(buffer, offset);
|
buffer_bindings.AddVertexBinding(info.handle, info.offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
||||||
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
||||||
|
|
||||||
|
@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
VkBuffer buffer = info.handle;
|
||||||
|
u64 offset = info.offset;
|
||||||
|
|
||||||
auto format = regs.index_array.format;
|
auto format = regs.index_array.format;
|
||||||
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
|
||||||
|
@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
|
||||||
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
|
||||||
ASSERT(size <= MaxConstbufferSize);
|
ASSERT(size <= MaxConstbufferSize);
|
||||||
|
|
||||||
const auto [buffer_handle, offset] =
|
const auto info =
|
||||||
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
|
||||||
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
|
||||||
|
@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
|
||||||
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
|
||||||
// default buffer.
|
// default buffer.
|
||||||
static constexpr std::size_t dummy_size = 4;
|
static constexpr std::size_t dummy_size = 4;
|
||||||
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
|
const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
|
||||||
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto [buffer, offset] = buffer_cache.UploadMemory(
|
const auto info = buffer_cache.UploadMemory(
|
||||||
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
|
||||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
||||||
|
|
|
@ -35,10 +35,14 @@ public:
|
||||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||||
void Unmap(u64 size);
|
void Unmap(u64 size);
|
||||||
|
|
||||||
VkBuffer Handle() const {
|
VkBuffer Handle() const noexcept {
|
||||||
return *buffer;
|
return *buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Address() const noexcept {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Watch final {
|
struct Watch final {
|
||||||
VKFenceWatch fence;
|
VKFenceWatch fence;
|
||||||
|
|
Loading…
Add table
Reference in a new issue