mirror of
https://github.com/zhaobot/yuzu.git
synced 2025-01-15 06:42:29 -03:00
Merge pull request #2400 from FernandoS27/corret-kepler-mem
Implement Kepler Memory on both Linear and BlockLinear.
This commit is contained in:
commit
01100f8afd
4 changed files with 81 additions and 17 deletions
|
@ -10,6 +10,7 @@
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
|
@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
||||||
|
|
||||||
switch (method_call.method) {
|
switch (method_call.method) {
|
||||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||||
state.write_offset = 0;
|
ProcessExec();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case KEPLERMEMORY_REG_INDEX(data): {
|
case KEPLERMEMORY_REG_INDEX(data): {
|
||||||
ProcessData(method_call.argument);
|
ProcessData(method_call.argument, method_call.IsLastCall());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void KeplerMemory::ProcessData(u32 data) {
|
void KeplerMemory::ProcessExec() {
|
||||||
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
|
state.write_offset = 0;
|
||||||
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
|
state.copy_size = regs.line_length_in * regs.line_count;
|
||||||
|
state.inner_buffer.resize(state.copy_size);
|
||||||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
}
|
||||||
// We do this before actually writing the new data because the destination address might
|
|
||||||
// contain a dirty surface that will have to be written back to memory.
|
|
||||||
const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
|
|
||||||
rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
|
|
||||||
memory_manager.Write<u32>(address, data);
|
|
||||||
|
|
||||||
|
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
||||||
|
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
||||||
|
std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size);
|
||||||
|
state.write_offset += sub_copy_size;
|
||||||
|
if (is_last_call) {
|
||||||
|
const GPUVAddr address{regs.dest.Address()};
|
||||||
|
if (regs.exec.linear != 0) {
|
||||||
|
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
|
||||||
|
} else {
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
||||||
|
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||||
|
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
||||||
|
std::vector<u8> tmp_buffer(dst_size);
|
||||||
|
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||||
|
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
|
||||||
|
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
|
||||||
|
state.inner_buffer.data(), tmp_buffer.data());
|
||||||
|
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||||
|
}
|
||||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
}
|
||||||
state.write_offset++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -51,7 +52,11 @@ public:
|
||||||
u32 address_high;
|
u32 address_high;
|
||||||
u32 address_low;
|
u32 address_low;
|
||||||
u32 pitch;
|
u32 pitch;
|
||||||
u32 block_dimensions;
|
union {
|
||||||
|
BitField<0, 4, u32> block_width;
|
||||||
|
BitField<4, 4, u32> block_height;
|
||||||
|
BitField<8, 4, u32> block_depth;
|
||||||
|
};
|
||||||
u32 width;
|
u32 width;
|
||||||
u32 height;
|
u32 height;
|
||||||
u32 depth;
|
u32 depth;
|
||||||
|
@ -63,6 +68,18 @@ public:
|
||||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||||
address_low);
|
address_low);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 BlockWidth() const {
|
||||||
|
return 1U << block_width.Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 BlockHeight() const {
|
||||||
|
return 1U << block_height.Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 BlockDepth() const {
|
||||||
|
return 1U << block_depth.Value();
|
||||||
|
}
|
||||||
} dest;
|
} dest;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
@ -81,6 +98,8 @@ public:
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
u32 write_offset = 0;
|
u32 write_offset = 0;
|
||||||
|
u32 copy_size = 0;
|
||||||
|
std::vector<u8> inner_buffer;
|
||||||
} state{};
|
} state{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -88,7 +107,8 @@ private:
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
MemoryManager& memory_manager;
|
MemoryManager& memory_manager;
|
||||||
|
|
||||||
void ProcessData(u32 data);
|
void ProcessExec();
|
||||||
|
void ProcessData(u32 data, bool is_last_call);
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
|
|
@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
|
||||||
|
const u32 block_height, const std::size_t copy_size, const u8* source_data,
|
||||||
|
u8* swizzle_data) {
|
||||||
|
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
|
||||||
|
std::size_t count = 0;
|
||||||
|
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
|
||||||
|
const std::size_t gob_address_y =
|
||||||
|
(y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
|
||||||
|
((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
|
||||||
|
const auto& table = legacy_swizzle_table[y % gob_size_y];
|
||||||
|
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
|
||||||
|
const std::size_t gob_address =
|
||||||
|
gob_address_y + (x / gob_size_x) * gob_size * block_height;
|
||||||
|
const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
|
||||||
|
const u8* source_line = source_data + count;
|
||||||
|
u8* dest_addr = swizzle_data + swizzled_offset;
|
||||||
|
count++;
|
||||||
|
|
||||||
|
std::memcpy(dest_addr, source_line, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
|
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
|
||||||
u32 height) {
|
u32 height) {
|
||||||
std::vector<u8> rgba_data;
|
std::vector<u8> rgba_data;
|
||||||
|
|
|
@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
|
||||||
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
|
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
|
||||||
u32 offset_x, u32 offset_y);
|
u32 offset_x, u32 offset_y);
|
||||||
|
|
||||||
|
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
|
||||||
|
const u32 block_height, const std::size_t copy_size, const u8* source_data,
|
||||||
|
u8* swizzle_data);
|
||||||
|
|
||||||
} // namespace Tegra::Texture
|
} // namespace Tegra::Texture
|
||||||
|
|
Loading…
Reference in a new issue