mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-04-29 14:59:26 -04:00
Merge 1367e11d26
into b089ae5b32
This commit is contained in:
commit
f6bb885d00
108 changed files with 14995 additions and 480 deletions
4
.gitmodules
vendored
4
.gitmodules
vendored
|
@ -18,3 +18,7 @@
|
|||
path = dependencies/imgui
|
||||
url = https://github.com/ocornut/imgui
|
||||
shallow = true
|
||||
[submodule "dependencies/metal-cpp"]
|
||||
path = dependencies/metal-cpp
|
||||
url = https://github.com/bkaradzic/metal-cpp.git
|
||||
shallow = true
|
||||
|
|
|
@ -102,10 +102,20 @@ if (UNIX AND NOT APPLE)
|
|||
option(ENABLE_BLUEZ "Build with Bluez support" ON)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
set(ENABLE_METAL_DEFAULT ON)
|
||||
else()
|
||||
set(ENABLE_METAL_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
option(ENABLE_OPENGL "Enables the OpenGL backend" ON)
|
||||
option(ENABLE_VULKAN "Enables the Vulkan backend" ON)
|
||||
option(ENABLE_METAL "Enables the Metal backend" ${ENABLE_METAL_DEFAULT})
|
||||
option(ENABLE_DISCORD_RPC "Enables the Discord Rich Presence feature" ON)
|
||||
|
||||
if (ENABLE_METAL AND NOT APPLE)
|
||||
message(FATAL_ERROR "Metal backend is only supported on Apple platforms")
|
||||
endif()
|
||||
|
||||
# input backends
|
||||
if (WIN32)
|
||||
|
@ -180,6 +190,12 @@ if (ENABLE_OPENGL)
|
|||
find_package(OpenGL REQUIRED)
|
||||
endif()
|
||||
|
||||
if (ENABLE_METAL)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/dependencies/metal-cpp)
|
||||
|
||||
add_definitions(-DENABLE_METAL=1)
|
||||
endif()
|
||||
|
||||
if (ENABLE_DISCORD_RPC)
|
||||
add_compile_definitions(ENABLE_DISCORD_RPC)
|
||||
add_subdirectory(dependencies/discord-rpc EXCLUDE_FROM_ALL)
|
||||
|
|
1
dependencies/metal-cpp
vendored
Submodule
1
dependencies/metal-cpp
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6
|
|
@ -530,7 +530,70 @@ add_library(CemuCafe
|
|||
)
|
||||
|
||||
if(APPLE)
|
||||
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Latte/Renderer/Vulkan/CocoaSurface.mm
|
||||
HW/Latte/Renderer/MetalView.mm
|
||||
HW/Latte/Renderer/MetalView.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if(ENABLE_METAL)
|
||||
# TODO: sort alphabetically
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Latte/Renderer/Metal/MetalRenderer.cpp
|
||||
HW/Latte/Renderer/Metal/MetalRenderer.h
|
||||
HW/Latte/Renderer/Metal/MetalCommon.h
|
||||
HW/Latte/Renderer/Metal/MetalCppImpl.cpp
|
||||
HW/Latte/Renderer/Metal/MetalLayer.mm
|
||||
HW/Latte/Renderer/Metal/MetalLayer.h
|
||||
HW/Latte/Renderer/Metal/MetalLayerHandle.cpp
|
||||
HW/Latte/Renderer/Metal/MetalLayerHandle.h
|
||||
HW/Latte/Renderer/Metal/LatteToMtl.cpp
|
||||
HW/Latte/Renderer/Metal/LatteToMtl.h
|
||||
HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
|
||||
HW/Latte/Renderer/Metal/LatteTextureMtl.h
|
||||
HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp
|
||||
HW/Latte/Renderer/Metal/LatteTextureViewMtl.h
|
||||
HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp
|
||||
HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h
|
||||
HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
|
||||
HW/Latte/Renderer/Metal/RendererShaderMtl.h
|
||||
HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
|
||||
HW/Latte/Renderer/Metal/CachedFBOMtl.h
|
||||
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
|
||||
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
|
||||
HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
|
||||
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
|
||||
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
|
||||
HW/Latte/Renderer/Metal/MetalMemoryManager.h
|
||||
HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
|
||||
HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
|
||||
HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
|
||||
HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
|
||||
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
|
||||
HW/Latte/Renderer/Metal/MetalPipelineCache.h
|
||||
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
|
||||
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
|
||||
HW/Latte/Renderer/Metal/MetalSamplerCache.cpp
|
||||
HW/Latte/Renderer/Metal/MetalSamplerCache.h
|
||||
HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.cpp
|
||||
HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h
|
||||
HW/Latte/Renderer/Metal/MetalQuery.cpp
|
||||
HW/Latte/Renderer/Metal/MetalQuery.h
|
||||
HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
|
||||
HW/Latte/Renderer/Metal/UtilityShaderSource.h
|
||||
)
|
||||
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp
|
||||
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
|
||||
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp
|
||||
)
|
||||
|
||||
#target_link_libraries(CemuCafe PRIVATE
|
||||
# "-framework Metal"
|
||||
# "-framework QuartzCore"
|
||||
#)
|
||||
endif()
|
||||
|
||||
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
|
|
@ -255,6 +255,16 @@ void InfoLog_PrintActiveSettings()
|
|||
if (!GetConfig().vk_accurate_barriers.GetValue())
|
||||
cemuLog_log(LogType::Force, "Accurate barriers are disabled!");
|
||||
}
|
||||
else if (ActiveSettings::GetGraphicsAPI() == GraphicAPI::kMetal)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false");
|
||||
cemuLog_log(LogType::Force, "Force mesh shaders: {}", GetConfig().force_mesh_shaders.GetValue() ? "true" : "false");
|
||||
cemuLog_log(LogType::Force, "Fast math: {}", g_current_game_profile->GetFastMath() ? "true" : "false");
|
||||
cemuLog_log(LogType::Force, "Buffer cache type: {}", g_current_game_profile->GetBufferCacheMode());
|
||||
cemuLog_log(LogType::Force, "Position invariance: {}", g_current_game_profile->GetPositionInvariance());
|
||||
if (!GetConfig().vk_accurate_barriers.GetValue())
|
||||
cemuLog_log(LogType::Force, "Accurate barriers are disabled!");
|
||||
}
|
||||
cemuLog_log(LogType::Force, "Console language: {}", stdx::to_underlying(config.console_language.GetValue()));
|
||||
}
|
||||
|
||||
|
|
|
@ -226,6 +226,9 @@ bool GameProfile::Load(uint64_t title_id)
|
|||
m_graphics_api = (GraphicAPI)graphicsApi.value;
|
||||
|
||||
gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul);
|
||||
gameProfile_loadBooleanOption2(iniParser, "fastMath", m_fastMath);
|
||||
gameProfile_loadEnumOption(iniParser, "bufferCacheMode2", m_bufferCacheMode);
|
||||
gameProfile_loadEnumOption(iniParser, "positionInvariance2", m_positionInvariance);
|
||||
|
||||
// legacy support
|
||||
auto option_precompiledShaders = iniParser.FindOption("precompiledShaders");
|
||||
|
@ -292,22 +295,23 @@ void GameProfile::Save(uint64_t title_id)
|
|||
|
||||
#define WRITE_OPTIONAL_ENTRY(__NAME) if (m_##__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME.value()).c_str());
|
||||
#define WRITE_ENTRY(__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME).c_str());
|
||||
#define WRITE_ENTRY_NUMBERED(__NAME, __NUM) fs->writeLine(fmt::format("{} = {}", #__NAME #__NUM, m_##__NAME).c_str());
|
||||
|
||||
fs->writeLine("[General]");
|
||||
WRITE_OPTIONAL_ENTRY(loadSharedLibraries);
|
||||
WRITE_ENTRY(startWithPadView);
|
||||
|
||||
fs->writeLine("");
|
||||
|
||||
|
||||
fs->writeLine("[CPU]");
|
||||
WRITE_OPTIONAL_ENTRY(cpuMode);
|
||||
WRITE_ENTRY(threadQuantum);
|
||||
|
||||
fs->writeLine("");
|
||||
|
||||
fs->writeLine("[Graphics]");
|
||||
WRITE_ENTRY(accurateShaderMul);
|
||||
WRITE_ENTRY(fastMath);
|
||||
WRITE_ENTRY_NUMBERED(bufferCacheMode, 2);
|
||||
WRITE_ENTRY_NUMBERED(positionInvariance, 2);
|
||||
WRITE_OPTIONAL_ENTRY(precompiledShaders);
|
||||
WRITE_OPTIONAL_ENTRY(graphics_api);
|
||||
fs->writeLine("");
|
||||
|
@ -323,6 +327,7 @@ void GameProfile::Save(uint64_t title_id)
|
|||
|
||||
#undef WRITE_OPTIONAL_ENTRY
|
||||
#undef WRITE_ENTRY
|
||||
#undef WRITE_ENTRY_NUMBERED
|
||||
|
||||
delete fs;
|
||||
}
|
||||
|
@ -337,6 +342,9 @@ void GameProfile::ResetOptional()
|
|||
|
||||
// graphic settings
|
||||
m_accurateShaderMul = AccurateShaderMulOption::True;
|
||||
m_fastMath = true;
|
||||
m_bufferCacheMode = BufferCacheMode::Auto;
|
||||
m_positionInvariance = PositionInvariance::Auto;
|
||||
// cpu settings
|
||||
m_threadQuantum = kThreadQuantumDefault;
|
||||
m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler;
|
||||
|
@ -357,6 +365,9 @@ void GameProfile::Reset()
|
|||
|
||||
// graphic settings
|
||||
m_accurateShaderMul = AccurateShaderMulOption::True;
|
||||
m_fastMath = true;
|
||||
m_bufferCacheMode = BufferCacheMode::Auto;
|
||||
m_positionInvariance = PositionInvariance::Auto;
|
||||
m_precompiledShaders = PrecompiledShaderOption::Auto;
|
||||
// cpu settings
|
||||
m_threadQuantum = kThreadQuantumDefault;
|
||||
|
|
|
@ -31,6 +31,9 @@ public:
|
|||
|
||||
[[nodiscard]] const std::optional<GraphicAPI>& GetGraphicsAPI() const { return m_graphics_api; }
|
||||
[[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; }
|
||||
[[nodiscard]] bool GetFastMath() const { return m_fastMath; }
|
||||
[[nodiscard]] BufferCacheMode GetBufferCacheMode() const { return m_bufferCacheMode; }
|
||||
[[nodiscard]] PositionInvariance GetPositionInvariance() const { return m_positionInvariance; }
|
||||
[[nodiscard]] const std::optional<PrecompiledShaderOption>& GetPrecompiledShadersState() const { return m_precompiledShaders; }
|
||||
|
||||
[[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; }
|
||||
|
@ -54,6 +57,9 @@ private:
|
|||
// graphic settings
|
||||
std::optional<GraphicAPI> m_graphics_api{};
|
||||
AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True;
|
||||
bool m_fastMath = true;
|
||||
BufferCacheMode m_bufferCacheMode = BufferCacheMode::Auto;
|
||||
PositionInvariance m_positionInvariance = PositionInvariance::Auto;
|
||||
std::optional<PrecompiledShaderOption> m_precompiledShaders{};
|
||||
// cpu settings
|
||||
uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000
|
||||
|
|
|
@ -269,6 +269,8 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
|
|||
m_renderer_api = RendererAPI::Vulkan;
|
||||
else if (boost::iequals(*option_rendererFilter, "opengl"))
|
||||
m_renderer_api = RendererAPI::OpenGL;
|
||||
else if (boost::iequals(*option_rendererFilter, "metal"))
|
||||
m_renderer_api = RendererAPI::Metal;
|
||||
else
|
||||
cemuLog_log(LogType::Force, "Unknown value '{}' for rendererFilter option", *option_rendererFilter);
|
||||
}
|
||||
|
@ -681,12 +683,14 @@ void GraphicPack2::LoadShaders()
|
|||
wchar_t shader_type[256]{};
|
||||
if (filename.size() < 256 && swscanf(filename.c_str(), L"%" SCNx64 "_%" SCNx64 "_%ls", &shader_base_hash, &shader_aux_hash, shader_type) == 3)
|
||||
{
|
||||
bool isMetalShader = (shader_type[2] == '_' && shader_type[3] == 'm' && shader_type[4] == 's' && shader_type[5] == 'l');
|
||||
|
||||
if (shader_type[0] == 'p' && shader_type[1] == 's')
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::PIXEL));
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::PIXEL, isMetalShader));
|
||||
else if (shader_type[0] == 'v' && shader_type[1] == 's')
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::VERTEX));
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::VERTEX, isMetalShader));
|
||||
else if (shader_type[0] == 'g' && shader_type[1] == 's')
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::GEOMETRY));
|
||||
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::GEOMETRY, isMetalShader));
|
||||
}
|
||||
else if (filename == L"output.glsl")
|
||||
{
|
||||
|
@ -1045,7 +1049,7 @@ bool GraphicPack2::Deactivate()
|
|||
return true;
|
||||
}
|
||||
|
||||
const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer)
|
||||
const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer, bool isMetalRenderer)
|
||||
{
|
||||
for (const auto& gp : GraphicPack2::GetActiveGraphicPacks())
|
||||
{
|
||||
|
@ -1058,6 +1062,9 @@ const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, u
|
|||
if (isVulkanRenderer && (*it).isPreVulkanShader)
|
||||
continue;
|
||||
|
||||
if (isMetalRenderer != (*it).isMetalShader)
|
||||
continue;
|
||||
|
||||
return &it->source;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -1215,7 +1222,7 @@ void GraphicPack2::ApplyShaderPresets(std::string& shader_source) const
|
|||
}
|
||||
}
|
||||
|
||||
GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const
|
||||
GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type, bool isMetalShader) const
|
||||
{
|
||||
CustomShader shader;
|
||||
|
||||
|
@ -1234,6 +1241,7 @@ GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64
|
|||
shader.shader_aux_hash = shader_aux_hash;
|
||||
shader.type = shader_type;
|
||||
shader.isPreVulkanShader = this->m_version <= 3;
|
||||
shader.isMetalShader = isMetalShader;
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@ public:
|
|||
uint64 shader_aux_hash;
|
||||
GP_SHADER_TYPE type;
|
||||
bool isPreVulkanShader{}; // set to true for V3 packs since the shaders are not compatible with the Vulkan renderer
|
||||
bool isMetalShader{}; // set to true if the shader is written in Metal Shading Language
|
||||
};
|
||||
|
||||
enum VarType
|
||||
|
@ -148,7 +149,7 @@ public:
|
|||
bool HasShaders() const;
|
||||
const std::vector<CustomShader>& GetCustomShaders() const { return m_custom_shaders; }
|
||||
|
||||
static const std::string* FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer);
|
||||
static const std::string* FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer, bool isMetalRenderer);
|
||||
|
||||
const std::string& GetOutputShaderSource() const { return m_output_shader_source; }
|
||||
const std::string& GetDownscalingShaderSource() const { return m_downscaling_shader_source; }
|
||||
|
@ -257,7 +258,7 @@ private:
|
|||
|
||||
std::vector<uint64> ParseTitleIds(IniParser& rules, const char* option_name) const;
|
||||
|
||||
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const;
|
||||
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type, bool isMetalShader) const;
|
||||
void ApplyShaderPresets(std::string& shader_source) const;
|
||||
void LoadReplacedFiles();
|
||||
void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC);
|
||||
|
|
|
@ -8,8 +8,12 @@
|
|||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/LatteInstructions.h"
|
||||
#include "HW/Latte/Renderer/Renderer.h"
|
||||
#include "util/containers/LookupTableL3.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#if ENABLE_METAL
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#endif
|
||||
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
|
||||
#include <openssl/evp.h> /* EVP_Digest */
|
||||
|
||||
|
@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu
|
|||
return 4;
|
||||
}
|
||||
|
||||
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
|
||||
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister)
|
||||
{
|
||||
uint64 key = 0;
|
||||
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
|
||||
|
@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
|
|||
key = std::rotl<uint64>(key, 8);
|
||||
key += (uint64)attrib->semanticId;
|
||||
key = std::rotl<uint64>(key, 8);
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
key += (uint64)attrib->offset;
|
||||
else
|
||||
key += (uint64)(attrib->offset & 3);
|
||||
key = std::rotl<uint64>(key, 2);
|
||||
key = std::rotl<uint64>(key, 7);
|
||||
}
|
||||
}
|
||||
// todo - also hash invalid buffer groups?
|
||||
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
|
||||
{
|
||||
LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g];
|
||||
key += (uint64)group.attributeBufferIndex;
|
||||
key = std::rotl<uint64>(key, 5);
|
||||
}
|
||||
}
|
||||
|
||||
fetchShader->key = key;
|
||||
}
|
||||
|
||||
|
@ -146,6 +164,29 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
|
|||
this->vkPipelineHashFragment = h;
|
||||
}
|
||||
|
||||
void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister)
|
||||
{
|
||||
#if ENABLE_METAL
|
||||
for (sint32 g = 0; g < bufferGroups.size(); g++)
|
||||
{
|
||||
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
|
||||
uint32 bufferIndex = group.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
if (bufferStride % 4 != 0)
|
||||
mtlFetchVertexManually = true;
|
||||
|
||||
for (sint32 f = 0; f < group.attribCount; f++)
|
||||
{
|
||||
auto& attr = group.attrib[f];
|
||||
if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride)
|
||||
mtlFetchVertexManually = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
|
||||
{
|
||||
uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader)
|
||||
|
@ -326,8 +367,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
|
|||
{
|
||||
// empty fetch shader, seen in Minecraft
|
||||
// these only make sense when vertex shader does not call FS?
|
||||
LatteShader_calculateFSKey(newFetchShader);
|
||||
LatteShader_calculateFSKey(newFetchShader, contextRegister);
|
||||
newFetchShader->CalculateFetchShaderVkHash();
|
||||
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
|
||||
return newFetchShader;
|
||||
}
|
||||
|
||||
|
@ -385,8 +427,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
|
|||
}
|
||||
bufferGroup.vboStride = vboOffset;
|
||||
}
|
||||
LatteShader_calculateFSKey(newFetchShader);
|
||||
LatteShader_calculateFSKey(newFetchShader, contextRegister);
|
||||
newFetchShader->CalculateFetchShaderVkHash();
|
||||
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
|
||||
|
||||
// register in cache
|
||||
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously
|
||||
|
|
|
@ -46,13 +46,17 @@ struct LatteFetchShader
|
|||
// Vulkan
|
||||
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
|
||||
|
||||
// Metal
|
||||
bool mtlFetchVertexManually{};
|
||||
|
||||
// cache info
|
||||
CacheHash m_cacheHash{};
|
||||
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
|
||||
|
||||
|
||||
void CalculateFetchShaderVkHash();
|
||||
|
||||
void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister);
|
||||
|
||||
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };
|
||||
|
||||
static bool isValidBufferIndex(const uint32 index) { return index < 0x10; };
|
||||
|
|
|
@ -198,6 +198,8 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
|
|||
|
||||
#if BOOST_OS_MACOS
|
||||
if(bufferStride % 4 != 0)
|
||||
{
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
|
||||
{
|
||||
|
@ -206,6 +208,7 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
|
|||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);
|
||||
|
|
|
@ -113,6 +113,21 @@ uint32 LatteIndices_calculateIndexOutputSize(LattePrimitiveMode primitiveMode, L
|
|||
cemu_assert_suspicious();
|
||||
return 0;
|
||||
}
|
||||
else if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN && g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
if (indexType == LatteIndexType::AUTO)
|
||||
{
|
||||
if (count <= 0xFFFF)
|
||||
return count * sizeof(uint16);
|
||||
return count * sizeof(uint32);
|
||||
}
|
||||
if (indexType == LatteIndexType::U16_BE || indexType == LatteIndexType::U16_LE)
|
||||
return count * sizeof(uint16);
|
||||
if (indexType == LatteIndexType::U32_BE || indexType == LatteIndexType::U32_LE)
|
||||
return count * sizeof(uint32);
|
||||
cemu_assert_suspicious();
|
||||
return 0;
|
||||
}
|
||||
else if(indexType == LatteIndexType::AUTO)
|
||||
return 0;
|
||||
else if (indexType == LatteIndexType::U16_BE || indexType == LatteIndexType::U16_LE)
|
||||
|
@ -306,6 +321,44 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
|
|||
indexMax = std::max(count, 1u) - 1;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LatteIndices_unpackTriangleFanAndConvert(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const betype<T>* src = (betype<T>*)indexDataInput;
|
||||
T* dst = (T*)indexDataOutput;
|
||||
// TODO: check this
|
||||
for (sint32 i = 0; i < count; i++)
|
||||
{
|
||||
uint32 i0;
|
||||
if (i % 2 == 0)
|
||||
i0 = i / 2;
|
||||
else
|
||||
i0 = count - 1 - i / 2;
|
||||
T idx = src[i0];
|
||||
indexMin = std::min(indexMin, (uint32)idx);
|
||||
indexMax = std::max(indexMax, (uint32)idx);
|
||||
dst[i] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void LatteIndices_generateAutoTriangleFanIndices(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const betype<T>* src = (betype<T>*)indexDataInput;
|
||||
T* dst = (T*)indexDataOutput;
|
||||
for (sint32 i = 0; i < count; i++)
|
||||
{
|
||||
T idx = i;
|
||||
if (idx % 2 == 0)
|
||||
idx = idx / 2;
|
||||
else
|
||||
idx = count - 1 - idx / 2;
|
||||
dst[i] = idx;
|
||||
}
|
||||
indexMin = 0;
|
||||
indexMax = std::max(count, 1u) - 1;
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
ATTRIBUTE_AVX2
|
||||
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
|
@ -684,6 +737,29 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
|
|||
cemu_assert_debug(false);
|
||||
outputCount = count + 1;
|
||||
}
|
||||
else if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN && g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
if (indexType == LatteIndexType::AUTO)
|
||||
{
|
||||
if (count <= 0xFFFF)
|
||||
{
|
||||
LatteIndices_generateAutoTriangleFanIndices<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
renderIndexType = Renderer::INDEX_TYPE::U16;
|
||||
}
|
||||
else
|
||||
{
|
||||
LatteIndices_generateAutoTriangleFanIndices<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
renderIndexType = Renderer::INDEX_TYPE::U32;
|
||||
}
|
||||
}
|
||||
else if (indexType == LatteIndexType::U16_BE)
|
||||
LatteIndices_unpackTriangleFanAndConvert<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else if (indexType == LatteIndexType::U32_BE)
|
||||
LatteIndices_unpackTriangleFanAndConvert<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
cemu_assert_debug(false);
|
||||
outputCount = count;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (indexType == LatteIndexType::U16_BE)
|
||||
|
|
|
@ -449,14 +449,6 @@ bool LatteMRT::UpdateCurrentFBO()
|
|||
uint8 colorBufferMask = GetActiveColorBufferMask(pixelShader, LatteGPUState.contextNew);
|
||||
bool depthBufferMask = GetActiveDepthBufferMask(LatteGPUState.contextNew);
|
||||
|
||||
// if depth test is not used then detach the depth buffer
|
||||
bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE();
|
||||
bool stencilTestEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
|
||||
bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
|
||||
|
||||
if (!depthEnable && !stencilTestEnable && !backStencilEnable)
|
||||
depthBufferMask = false;
|
||||
|
||||
bool hasResizedTexture = false; // set to true if any of the color buffers or the depth buffer reference a resized texture (via graphic pack texture rules)
|
||||
sLatteRenderTargetState.renderTargetIsResized = false;
|
||||
// real size
|
||||
|
|
|
@ -9,10 +9,15 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
||||
#include "Cafe/GraphicPack/GraphicPack2.h"
|
||||
#include "HW/Latte/Core/Latte.h"
|
||||
#include "HW/Latte/Renderer/Renderer.h"
|
||||
#include "util/helpers/StringParser.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "Cafe/GameProfile/GameProfile.h"
|
||||
#include "util/containers/flat_hash_map.hpp"
|
||||
#if ENABLE_METAL
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
// experimental new decompiler (WIP)
|
||||
|
@ -205,9 +210,7 @@ void LatteShader_free(LatteDecompilerShader* shader)
|
|||
delete shader;
|
||||
}
|
||||
|
||||
// both vertex and geometry/pixel shader depend on PS inputs
|
||||
// we prepare the PS import info in advance
|
||||
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
||||
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters)
|
||||
{
|
||||
// PS control
|
||||
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
|
||||
|
@ -238,12 +241,12 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||
{
|
||||
key += std::rotr<uint64>(spi0_paramGen, 7);
|
||||
key += std::rotr<uint64>(spi0_paramGenAddr, 3);
|
||||
_activePSImportTable.paramGen = spi0_paramGen;
|
||||
_activePSImportTable.paramGenGPR = spi0_paramGenAddr;
|
||||
psInputTable->paramGen = spi0_paramGen;
|
||||
psInputTable->paramGenGPR = spi0_paramGenAddr;
|
||||
}
|
||||
else
|
||||
{
|
||||
_activePSImportTable.paramGen = 0;
|
||||
psInputTable->paramGen = 0;
|
||||
}
|
||||
|
||||
// semantic imports from vertex shader
|
||||
|
@ -277,9 +280,9 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||
key = std::rotl<uint64>(key, 7);
|
||||
if (spi0_positionEnable && f == spi0_positionAddr)
|
||||
{
|
||||
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
|
||||
_activePSImportTable.import[f].isFlat = false;
|
||||
_activePSImportTable.import[f].isNoPerspective = false;
|
||||
psInputTable->import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
|
||||
psInputTable->import[f].isFlat = false;
|
||||
psInputTable->import[f].isNoPerspective = false;
|
||||
key += (uint64)0x33;
|
||||
}
|
||||
else
|
||||
|
@ -292,13 +295,20 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
|
||||
#endif
|
||||
|
||||
_activePSImportTable.import[f].semanticId = psSemanticId;
|
||||
_activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0;
|
||||
_activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
|
||||
psInputTable->import[f].semanticId = psSemanticId;
|
||||
psInputTable->import[f].isFlat = (psInputControl&(1 << 10)) != 0;
|
||||
psInputTable->import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
|
||||
}
|
||||
}
|
||||
_activePSImportTable.key = key;
|
||||
_activePSImportTable.count = numPSInputs;
|
||||
psInputTable->key = key;
|
||||
psInputTable->count = numPSInputs;
|
||||
}
|
||||
|
||||
// both vertex and geometry/pixel shader depend on PS inputs
|
||||
// we prepare the PS import info in advance
|
||||
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
||||
{
|
||||
LatteShader_CreatePSInputTable(&_activePSImportTable, contextRegisters);
|
||||
}
|
||||
|
||||
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
|
||||
|
@ -330,7 +340,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil
|
|||
// check if a custom shader is present
|
||||
std::string shaderSrc;
|
||||
|
||||
const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan);
|
||||
const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan, g_renderer->GetType() == RendererAPI::Metal);
|
||||
if (customShaderSrc)
|
||||
{
|
||||
shaderSrc.assign(*customShaderSrc);
|
||||
|
@ -500,6 +510,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
|||
vsHash += tmp;
|
||||
|
||||
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
||||
// TODO: include always in the hash in case of geometry shader or rect shader on Metal
|
||||
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
|
||||
{
|
||||
vsHash += 13ULL;
|
||||
|
@ -514,6 +525,37 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
|||
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
|
||||
vsHash += 0x1537;
|
||||
|
||||
#if ENABLE_METAL
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
bool isRectVertexShader = (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||
|
||||
if ((usesGeometryShader || isRectVertexShader) || _activeFetchShader->mtlFetchVertexManually)
|
||||
{
|
||||
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
|
||||
{
|
||||
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
|
||||
uint32 bufferIndex = group.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
vsHash += (uint64)bufferStride;
|
||||
vsHash = std::rotl<uint64>(vsHash, 7);
|
||||
}
|
||||
}
|
||||
|
||||
if (!(usesGeometryShader || isRectVertexShader))
|
||||
{
|
||||
if (LatteGPUState.contextNew.IsRasterizationEnabled())
|
||||
vsHash += 51ULL;
|
||||
|
||||
// Vertex fetch
|
||||
if (_activeFetchShader->mtlFetchVertexManually)
|
||||
vsHash += 349ULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
_shaderBaseHash_vs = vsHash;
|
||||
}
|
||||
|
||||
|
@ -539,6 +581,7 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
|
|||
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
|
||||
// get vertex shader
|
||||
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
|
||||
|
||||
_shaderBaseHash_ps = psHash;
|
||||
}
|
||||
|
||||
|
@ -572,6 +615,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
|
|||
auxHashTex += 0x333;
|
||||
}
|
||||
}
|
||||
|
||||
return auxHash + auxHashTex;
|
||||
}
|
||||
|
||||
|
@ -605,6 +649,35 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
|
|||
auxHash = (auxHash << 3) | (auxHash >> 61);
|
||||
auxHash += (uint64)dim;
|
||||
}
|
||||
|
||||
// Textures as render targets
|
||||
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
|
||||
{
|
||||
uint8 t = pixelShader->textureUnitList[i];
|
||||
auxHash = std::rotl<uint64>(auxHash, 11);
|
||||
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
|
||||
}
|
||||
|
||||
#if ENABLE_METAL
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
|
||||
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
|
||||
auxHash = std::rotl<uint64>(auxHash, 7);
|
||||
auxHash += (uint64)dataType;
|
||||
}
|
||||
|
||||
bool hasDepthBuffer = LatteMRT::GetActiveDepthBufferMask(LatteGPUState.contextNew);
|
||||
if (hasDepthBuffer)
|
||||
{
|
||||
auxHash = std::rotl<uint64>(auxHash, 5);
|
||||
auxHash += 13u;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return auxHash;
|
||||
}
|
||||
|
||||
|
@ -613,10 +686,13 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
|||
LatteDecompilerShader* shader = decompilerOutput.shader;
|
||||
shader->baseHash = baseHash;
|
||||
// copy resource mapping
|
||||
// HACK
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingVK;
|
||||
else
|
||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingGL;
|
||||
else
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingMTL;
|
||||
// copy texture info
|
||||
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
|
||||
// copy streamout info
|
||||
|
@ -624,7 +700,8 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
|||
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
|
||||
// copy uniform offsets
|
||||
// for OpenGL these are retrieved in _prepareSeparableUniforms()
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
// HACK
|
||||
if (g_renderer->GetType() != RendererAPI::OpenGL)
|
||||
{
|
||||
shader->uniform.loc_remapped = decompilerOutput.uniformOffsetsVK.offset_remapped;
|
||||
shader->uniform.loc_uniformRegister = decompilerOutput.uniformOffsetsVK.offset_uniformRegister;
|
||||
|
@ -684,9 +761,9 @@ void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteCons
|
|||
{
|
||||
options.usesGeometryShader = geometryShaderEnabled;
|
||||
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
|
||||
options.useTFViaSSBO = g_renderer->UseTFViaSSBO();
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
|
||||
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
|
||||
}
|
||||
options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False;
|
||||
|
|
|
@ -84,6 +84,7 @@ struct LatteShaderPSInputTable
|
|||
}
|
||||
};
|
||||
|
||||
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters);
|
||||
void LatteShader_UpdatePSInputs(uint32* contextRegisters);
|
||||
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable();
|
||||
|
||||
|
|
|
@ -11,6 +11,10 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
||||
#if ENABLE_METAL
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
|
||||
#endif
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h"
|
||||
|
||||
#include <imgui.h>
|
||||
|
@ -65,7 +69,7 @@ FileCache* s_shaderCacheGeneric = nullptr; // contains hardware and version inde
|
|||
#define SHADER_CACHE_TYPE_PIXEL (2)
|
||||
|
||||
bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize);
|
||||
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId);
|
||||
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId);
|
||||
bool LatteShaderCache_updatePipelineLoadingProgress();
|
||||
void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines);
|
||||
|
||||
|
@ -276,6 +280,10 @@ void LatteShaderCache_finish()
|
|||
RendererShaderVk::ShaderCacheLoading_end();
|
||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
RendererShaderGL::ShaderCacheLoading_end();
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
RendererShaderMtl::ShaderCacheLoading_end();
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
|
||||
|
@ -358,8 +366,17 @@ void LatteShaderCache_Load()
|
|||
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
|
||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
|
||||
#endif
|
||||
|
||||
// get cache file name
|
||||
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
|
||||
fs::path pathGeneric;
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_mtlshaders.bin", cacheTitleId);
|
||||
else
|
||||
pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
|
||||
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
|
||||
const auto pathGenericPre1_16_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:08x}.bin", CafeSystem::GetRPXHashBase()); // before 1.16.0
|
||||
|
||||
|
@ -459,9 +476,9 @@ void LatteShaderCache_Load()
|
|||
cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad);
|
||||
#endif
|
||||
LatteShaderCache_finish();
|
||||
// if Vulkan then also load pipeline cache
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId);
|
||||
// if Vulkan or Metal then also load pipeline cache
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan || g_renderer->GetType() == RendererAPI::Metal)
|
||||
LatteShaderCache_LoadPipelineCache(cacheTitleId);
|
||||
|
||||
|
||||
g_renderer->BeginFrame(true);
|
||||
|
@ -547,7 +564,7 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
|
|||
std::string text;
|
||||
if (isPipelines)
|
||||
{
|
||||
text = "Loading cached Vulkan pipelines...";
|
||||
text = "Loading cached pipelines...";
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -621,19 +638,35 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
|
|||
}
|
||||
}
|
||||
|
||||
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId)
|
||||
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId)
|
||||
{
|
||||
auto& pipelineCache = VulkanPipelineStableCache::GetInstance();
|
||||
g_shaderCacheLoaderState.pipelineFileCount = pipelineCache.BeginLoading(cacheTitleId);
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
g_shaderCacheLoaderState.pipelineFileCount = VulkanPipelineStableCache::GetInstance().BeginLoading(cacheTitleId);
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
g_shaderCacheLoaderState.pipelineFileCount = MetalPipelineCache::GetInstance().BeginLoading(cacheTitleId);
|
||||
#endif
|
||||
g_shaderCacheLoaderState.loadedPipelines = 0;
|
||||
LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true);
|
||||
pipelineCache.EndLoading();
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
VulkanPipelineStableCache::GetInstance().EndLoading();
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
MetalPipelineCache::GetInstance().EndLoading();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LatteShaderCache_updatePipelineLoadingProgress()
|
||||
{
|
||||
uint32 pipelinesMissingShaders = 0;
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
return MetalPipelineCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType)
|
||||
|
@ -895,10 +928,18 @@ void LatteShaderCache_Close()
|
|||
RendererShaderVk::ShaderCacheLoading_Close();
|
||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
RendererShaderGL::ShaderCacheLoading_Close();
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
RendererShaderMtl::ShaderCacheLoading_Close();
|
||||
#endif
|
||||
|
||||
// if Vulkan then also close pipeline cache
|
||||
// if Vulkan or Metal then also close pipeline cache
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
VulkanPipelineStableCache::GetInstance().Close();
|
||||
#if ENABLE_METAL
|
||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
MetalPipelineCache::GetInstance().Close();
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <wx/msgdlg.h>
|
||||
|
|
|
@ -26,7 +26,7 @@ bool gxShader_checkIfSuccessfullyLinked(GLuint glProgram)
|
|||
|
||||
void LatteShader_prepareSeparableUniforms(LatteDecompilerShader* shader)
|
||||
{
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
if (g_renderer->GetType() != RendererAPI::OpenGL)
|
||||
return;
|
||||
|
||||
auto shaderGL = (RendererShaderGL*)shader->shader;
|
||||
|
|
|
@ -1308,6 +1308,40 @@ LatteTexture::LatteTexture(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddre
|
|||
{
|
||||
this->enableReadback = true;
|
||||
}
|
||||
|
||||
// calculate number of potential mip levels (from effective size)
|
||||
sint32 effectiveWidth = width;
|
||||
sint32 effectiveHeight = height;
|
||||
sint32 effectiveDepth = depth;
|
||||
if (this->overwriteInfo.hasResolutionOverwrite)
|
||||
{
|
||||
effectiveWidth = this->overwriteInfo.width;
|
||||
effectiveHeight = this->overwriteInfo.height;
|
||||
effectiveDepth = this->overwriteInfo.depth;
|
||||
}
|
||||
this->maxPossibleMipLevels = 1;
|
||||
if (dim != Latte::E_DIM::DIM_3D)
|
||||
{
|
||||
for (sint32 i = 0; i < 20; i++)
|
||||
{
|
||||
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1)
|
||||
{
|
||||
this->maxPossibleMipLevels = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (sint32 i = 0; i < 20; i++)
|
||||
{
|
||||
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1 && (effectiveDepth >> i) <= 1)
|
||||
{
|
||||
this->maxPossibleMipLevels = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LatteTexture::~LatteTexture()
|
||||
|
|
|
@ -73,46 +73,16 @@ void LatteTexture_ReloadData(LatteTexture* tex)
|
|||
LatteTextureView* LatteTexture_CreateTexture(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth)
|
||||
{
|
||||
const auto tex = g_renderer->texture_createTextureEx(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth);
|
||||
|
||||
// init slice/mip info array
|
||||
LatteTexture_InitSliceAndMipInfo(tex);
|
||||
LatteTexture_RegisterTextureMemoryOccupancy(tex);
|
||||
cemu_assert_debug(mipLevels != 0);
|
||||
// calculate number of potential mip levels (from effective size)
|
||||
sint32 effectiveWidth = width;
|
||||
sint32 effectiveHeight = height;
|
||||
sint32 effectiveDepth = depth;
|
||||
if (tex->overwriteInfo.hasResolutionOverwrite)
|
||||
{
|
||||
effectiveWidth = tex->overwriteInfo.width;
|
||||
effectiveHeight = tex->overwriteInfo.height;
|
||||
effectiveDepth = tex->overwriteInfo.depth;
|
||||
}
|
||||
tex->maxPossibleMipLevels = 1;
|
||||
if (dim != Latte::E_DIM::DIM_3D)
|
||||
{
|
||||
for (sint32 i = 0; i < 20; i++)
|
||||
{
|
||||
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1)
|
||||
{
|
||||
tex->maxPossibleMipLevels = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (sint32 i = 0; i < 20; i++)
|
||||
{
|
||||
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1 && (effectiveDepth >> i) <= 1)
|
||||
{
|
||||
tex->maxPossibleMipLevels = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LatteTexture_ReloadData(tex);
|
||||
LatteTC_MarkTextureStillInUse(tex);
|
||||
LatteTC_RegisterTexture(tex);
|
||||
|
||||
// create initial view that maps to the whole texture
|
||||
tex->baseView = tex->GetOrCreateView(0, tex->mipLevels, 0, tex->depth);
|
||||
return tex->baseView;
|
||||
|
|
|
@ -594,7 +594,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class TextureDecoder_R4_G4_UNORM_To_RGBA4_vk : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_UNORM_To_RGBA4_vk>
|
||||
class TextureDecoder_R4_G4_UNORM_To_ABGR4 : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_UNORM_To_ABGR4>
|
||||
{
|
||||
public:
|
||||
sint32 getBytesPerTexel(LatteTextureLoaderCtx* textureLoader) override
|
||||
|
@ -679,6 +679,51 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class TextureDecoder_R4G4_UNORM_To_RG8 : public TextureDecoder, public SingletonClass<TextureDecoder_R4G4_UNORM_To_RG8>
|
||||
{
|
||||
public:
|
||||
sint32 getBytesPerTexel(LatteTextureLoaderCtx* textureLoader) override
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
void decode(LatteTextureLoaderCtx* textureLoader, uint8* outputData) override
|
||||
{
|
||||
for (sint32 y = 0; y < textureLoader->height; y += textureLoader->stepY)
|
||||
{
|
||||
sint32 yc = y;
|
||||
for (sint32 x = 0; x < textureLoader->width; x += textureLoader->stepX)
|
||||
{
|
||||
uint8* blockData = LatteTextureLoader_GetInput(textureLoader, x, y);
|
||||
sint32 pixelOffset = (x + yc * textureLoader->width) * 2;
|
||||
uint8 v0 = (*(uint8*)(blockData + 0));
|
||||
|
||||
uint8 red4 = (v0 >> 4) & 0xF;
|
||||
uint8 green4 = (v0 & 0xF);
|
||||
|
||||
red4 = (red4 << 4) | red4;
|
||||
green4 = (green4 << 4) | green4;
|
||||
|
||||
*(uint8*)(outputData + pixelOffset + 0) = red4;
|
||||
*(uint8*)(outputData + pixelOffset + 1) = green4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void decodePixelToRGBA(uint8* blockData, uint8* outputPixel, uint8 blockOffsetX, uint8 blockOffsetY) override
|
||||
{
|
||||
uint8 v0 = *(blockData + 0);
|
||||
uint8 red4 = (v0 >> 4) & 0xF;
|
||||
uint8 green4 = (v0 & 0xF);
|
||||
red4 = (red4 << 4) | red4;
|
||||
green4 = (green4 << 4) | green4;
|
||||
*(outputPixel + 0) = red4;
|
||||
*(outputPixel + 1) = green4;
|
||||
*(outputPixel + 2) = 0;
|
||||
*(outputPixel + 3) = 255;
|
||||
}
|
||||
};
|
||||
|
||||
class TextureDecoder_R4_G4_B4_A4_UNORM : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_B4_A4_UNORM>
|
||||
{
|
||||
public:
|
||||
|
@ -723,7 +768,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
class TextureDecoder_R4G4B4A4_UNORM_To_RGBA8 : public TextureDecoder, public SingletonClass<TextureDecoder_R4G4B4A4_UNORM_To_RGBA8>
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -1598,6 +1598,24 @@ struct LatteContextRegister
|
|||
{
|
||||
return (uint32*)hleSpecialState;
|
||||
}
|
||||
|
||||
bool IsRasterizationEnabled() const
|
||||
{
|
||||
bool rasterizationEnabled = !PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
|
||||
// GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly
|
||||
// for now we use a workaround:
|
||||
if (!PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||
rasterizationEnabled = true;
|
||||
|
||||
// Culling both front and back faces effectively disables rasterization
|
||||
uint32 cullFront = PA_SU_SC_MODE_CNTL.get_CULL_FRONT();
|
||||
uint32 cullBack = PA_SU_SC_MODE_CNTL.get_CULL_BACK();
|
||||
if (cullFront && cullBack)
|
||||
rasterizationEnabled = false;
|
||||
|
||||
return rasterizationEnabled;
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(LatteContextRegister) == 0x10000 * 4 + 9 * 4);
|
||||
|
|
|
@ -1068,7 +1068,14 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8
|
|||
LatteDecompiler_analyzeDataTypes(shaderContext);
|
||||
// emit code
|
||||
if (shaderContext->shader->hasError == false)
|
||||
{
|
||||
if (g_renderer->GetType() == RendererAPI::OpenGL || g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
LatteDecompiler_emitGLSLShader(shaderContext, shaderContext->shader);
|
||||
#if ENABLE_METAL
|
||||
else
|
||||
LatteDecompiler_emitMSLShader(shaderContext, shaderContext->shader);
|
||||
#endif
|
||||
}
|
||||
LatteDecompiler_cleanup(shaderContext);
|
||||
// fast access
|
||||
_LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader);
|
||||
|
|
|
@ -57,12 +57,16 @@ struct LatteDecompilerShaderResourceMapping
|
|||
// texture
|
||||
sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS];
|
||||
// uniform buffer
|
||||
sint8 uniformVarsBufferBindingPoint{}; // special block for uniform registers/remapped array/custom variables
|
||||
sint8 uniformVarsBufferBindingPoint{-1}; // special block for uniform registers/remapped array/custom variables
|
||||
sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
||||
// shader storage buffer for transform feedback (if alternative mode is used)
|
||||
sint8 tfStorageBindingPoint{-1};
|
||||
// attributes (vertex shader only)
|
||||
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
|
||||
// Metal exclusive
|
||||
sint8 verticesPerInstanceBinding{-1};
|
||||
sint8 indexBufferBinding{-1};
|
||||
sint8 indexTypeBinding{-1};
|
||||
|
||||
sint32 getTextureCount()
|
||||
{
|
||||
|
@ -179,9 +183,12 @@ struct LatteDecompilerShader
|
|||
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
||||
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
|
||||
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
|
||||
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS];
|
||||
|
||||
// analyzer stage (pixel outputs)
|
||||
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
|
||||
// analyzer stage (depth output)
|
||||
bool depthMask{ false };
|
||||
// analyzer stage (geometry shader parameters/inputs)
|
||||
uint32 ringParameterCount{ 0 };
|
||||
uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount
|
||||
|
@ -286,6 +293,7 @@ struct LatteDecompilerOutput_t
|
|||
// mapping and binding information
|
||||
LatteDecompilerShaderResourceMapping resourceMappingGL;
|
||||
LatteDecompilerShaderResourceMapping resourceMappingVK;
|
||||
LatteDecompilerShaderResourceMapping resourceMappingMTL;
|
||||
};
|
||||
|
||||
struct LatteDecompilerSubroutineInfo;
|
||||
|
|
|
@ -8,6 +8,14 @@
|
|||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Common/MemPtr.h"
|
||||
#include "HW/Latte/ISA/LatteReg.h"
|
||||
#if ENABLE_METAL
|
||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#endif
|
||||
|
||||
// Defined in LatteTextureLegacy.cpp
|
||||
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);
|
||||
|
||||
/*
|
||||
* Return index of used color attachment based on shader pixel export index (0-7)
|
||||
|
@ -395,7 +403,8 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
|
|||
}
|
||||
else if (cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61)
|
||||
{
|
||||
// writes pixel depth
|
||||
if (LatteMRT::GetActiveDepthBufferMask(*shaderContext->contextRegistersNew))
|
||||
shader->depthMask = true;
|
||||
}
|
||||
else
|
||||
debugBreakpoint();
|
||||
|
@ -500,6 +509,18 @@ namespace LatteDecompiler
|
|||
}
|
||||
}
|
||||
|
||||
void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
// for Vulkan we use consecutive indices
|
||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
if (!decompilerContext->output->textureUnitMask[i] || decompilerContext->shader->textureRenderTargetIndex[i] != 255)
|
||||
continue;
|
||||
decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL;
|
||||
decompilerContext->currentTextureBindingPointMTL++;
|
||||
}
|
||||
}
|
||||
|
||||
void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
decompilerContext->hasUniformVarBlock = false;
|
||||
|
@ -537,6 +558,13 @@ namespace LatteDecompiler
|
|||
{
|
||||
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
|
||||
}
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
bool usesGeometryShader = UseGeometryShader(*decompilerContext->contextRegistersNew, decompilerContext->options->usesGeometryShader);
|
||||
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && usesGeometryShader)
|
||||
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance
|
||||
}
|
||||
}
|
||||
|
||||
void _initUniformBindingPoints(LatteDecompilerShaderContext* decompilerContext)
|
||||
|
@ -554,14 +582,13 @@ namespace LatteDecompiler
|
|||
}
|
||||
}
|
||||
// assign binding point to uniform var block
|
||||
decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block
|
||||
if (decompilerContext->hasUniformVarBlock)
|
||||
{
|
||||
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
else
|
||||
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1;
|
||||
// assign binding points to uniform buffers
|
||||
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||
{
|
||||
|
@ -580,6 +607,8 @@ namespace LatteDecompiler
|
|||
|
||||
decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
// for OpenGL we use the relative buffer index
|
||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||
|
@ -601,6 +630,8 @@ namespace LatteDecompiler
|
|||
{
|
||||
decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -617,6 +648,7 @@ namespace LatteDecompiler
|
|||
{
|
||||
decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex;
|
||||
decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex;
|
||||
decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex;
|
||||
bindingIndex++;
|
||||
}
|
||||
}
|
||||
|
@ -827,6 +859,81 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||
shader->textureUnitList[shader->textureUnitListCount] = i;
|
||||
shader->textureUnitListCount++;
|
||||
}
|
||||
shader->textureRenderTargetIndex[i] = 255;
|
||||
}
|
||||
// check if textures are used as render targets
|
||||
if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||
{
|
||||
struct {
|
||||
sint32 index;
|
||||
MPTR physAddr;
|
||||
Latte::E_GX2SURFFMT format;
|
||||
Latte::E_HWTILEMODE tileMode;
|
||||
} colorBuffers[LATTE_NUM_COLOR_TARGET]{};
|
||||
|
||||
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
|
||||
sint32 colorBufferCount = 0;
|
||||
for (sint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
auto& colorBuffer = colorBuffers[colorBufferCount];
|
||||
if (((colorBufferMask) & (1 << i)) == 0)
|
||||
continue; // color buffer not enabled
|
||||
|
||||
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + i);
|
||||
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
|
||||
|
||||
uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE];
|
||||
|
||||
MPTR colorBufferPhysMem = regColorBufferBase;
|
||||
Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF);
|
||||
|
||||
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(i, *shaderContext->contextRegistersNew);
|
||||
|
||||
colorBuffer = {i, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode};
|
||||
colorBufferCount++;
|
||||
}
|
||||
|
||||
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
|
||||
{
|
||||
sint32 textureIndex = shader->textureUnitList[i];
|
||||
const auto& texRegister = texRegs[textureIndex];
|
||||
|
||||
// get physical address of texture data
|
||||
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
|
||||
if (physAddr == MPTR_NULL)
|
||||
continue; // invalid data
|
||||
|
||||
auto tileMode = texRegister.word0.get_TILE_MODE();
|
||||
|
||||
// Check for dimension
|
||||
auto dim = shader->textureUnitDim[textureIndex];
|
||||
// TODO: 2D arrays could technically be supported as well
|
||||
if (dim != Latte::E_DIM::DIM_2D)
|
||||
continue;
|
||||
|
||||
// Check for mip level
|
||||
// TODO: uncomment?
|
||||
/*
|
||||
auto lastMip = texRegister.word5.get_LAST_LEVEL();
|
||||
// TODO: multiple mip levels could technically be supported as well
|
||||
if (lastMip != 0)
|
||||
continue;
|
||||
*/
|
||||
|
||||
Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);
|
||||
|
||||
// Check if the texture is used as render target
|
||||
for (sint32 j = 0; j < colorBufferCount; j++)
|
||||
{
|
||||
const auto& colorBuffer = colorBuffers[j];
|
||||
|
||||
if (physAddr == colorBuffer.physAddr && format == colorBuffer.format && tileMode == colorBuffer.tileMode)
|
||||
{
|
||||
shader->textureRenderTargetIndex[textureIndex] = colorBuffer.index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// for geometry shaders check the copy shader for stream writes
|
||||
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
|
||||
|
@ -1002,6 +1109,10 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||
shaderContext->output->resourceMappingVK.setIndex = 2;
|
||||
LatteDecompiler::_initTextureBindingPointsGL(shaderContext);
|
||||
LatteDecompiler::_initTextureBindingPointsVK(shaderContext);
|
||||
LatteDecompiler::_initTextureBindingPointsMTL(shaderContext);
|
||||
LatteDecompiler::_initUniformBindingPoints(shaderContext);
|
||||
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
|
||||
shaderContext->output->resourceMappingMTL.verticesPerInstanceBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
|
|
4448
src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
Normal file
4448
src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,511 @@
|
|||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Cafe/HW/Latte/Core/Latte.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "util/helpers/StringBuf.h"
|
||||
|
||||
#define _CRLF "\r\n"
|
||||
|
||||
static void _readLittleEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readLittleEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readLittleEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readLittleEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readLittleEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readLittleEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder = (attrDecoder>>24)|((attrDecoder>>8)&0xFF00)|((attrDecoder<<8)&0xFF0000)|((attrDecoder<<24));" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.xyz = in.attrDataSem{}.xyz;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.xyz = (attrDecoder.xyz>>24)|((attrDecoder.xyz>>8)&0xFF00)|((attrDecoder.xyz<<8)&0xFF0000)|((attrDecoder.xyz<<24));" _CRLF);
|
||||
src->add("attrDecoder.w = 0;" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = (attrDecoder.xy>>24)|((attrDecoder.xy>>8)&0xFF00)|((attrDecoder.xy<<8)&0xFF0000)|((attrDecoder.xy<<24));" _CRLF);
|
||||
src->add("attrDecoder.z = 0;" _CRLF);
|
||||
src->add("attrDecoder.w = 0;" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.x = in.attrDataSem{}.x;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.x = (attrDecoder.x>>24)|((attrDecoder.x>>8)&0xFF00)|((attrDecoder.x<<8)&0xFF0000)|((attrDecoder.x<<24));" _CRLF);
|
||||
src->add("attrDecoder.y = 0;" _CRLF);
|
||||
src->add("attrDecoder.z = 0;" _CRLF);
|
||||
src->add("attrDecoder.w = 0;" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.x = ((attrDecoder.x>>8)&0xFF)|((attrDecoder.x<<8)&0xFF00);" _CRLF);
|
||||
src->add("attrDecoder.y = 0;" _CRLF);
|
||||
src->add("attrDecoder.z = 0;" _CRLF);
|
||||
src->add("attrDecoder.w = 0;" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = ((attrDecoder.xy>>8)&0xFF)|((attrDecoder.xy<<8)&0xFF00);" _CRLF);
|
||||
src->add("attrDecoder.z = 0;" _CRLF);
|
||||
src->add("attrDecoder.w = 0;" _CRLF);
|
||||
}
|
||||
|
||||
static void _readBigEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder = ((attrDecoder>>8)&0xFF)|((attrDecoder<<8)&0xFF00);" _CRLF);
|
||||
}
|
||||
|
||||
void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib)
|
||||
{
|
||||
if (attrib->attributeBufferIndex >= Latte::GPU_LIMITS::NUM_VERTEX_BUFFERS)
|
||||
{
|
||||
src->add("attrDecoder = int4(0);" _CRLF);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32 attributeInputIndex = attrib->semanticId;
|
||||
if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_U32 )
|
||||
{
|
||||
if( attrib->format == FMT_32_32_32_32_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_32_32_32_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_32_32_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_32_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_2_10_10_10 && attrib->nfa == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
// Bayonetta 2 uses this format to store normals
|
||||
src->add("attrDecoder.xyzw = uint4((attrDecoder.x>>0)&0x3FF,(attrDecoder.x>>10)&0x3FF,(attrDecoder.x>>20)&0x3FF,(attrDecoder.x>>30)&0x3);" _CRLF);
|
||||
if (attrib->isSigned != 0)
|
||||
{
|
||||
src->add("if( (attrDecoder.x&0x200) != 0 ) attrDecoder.x |= 0xFFFFFC00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x200) != 0 ) attrDecoder.y |= 0xFFFFFC00;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x200) != 0 ) attrDecoder.z |= 0xFFFFFC00;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/511.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/511.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/511.0,-1.0));" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/1023.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/1023.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/1023.0,-1.0));" _CRLF);
|
||||
}
|
||||
src->add("attrDecoder.w = as_type<uint>(float(attrDecoder.w));" _CRLF); // unsure?
|
||||
|
||||
}
|
||||
else if( attrib->format == FMT_32_32_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_32_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 0)
|
||||
{
|
||||
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 1)
|
||||
{
|
||||
// we can just read the signed s32 as a u32 since no sign-extension is necessary
|
||||
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
|
||||
{
|
||||
// seen in Minecraft Wii U Edition
|
||||
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
|
||||
{
|
||||
// seen in Minecraft Wii U Edition
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/127.0,-1.0));" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
// seen in Minecraft Wii U Edition
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in Ben 10 Omniverse
|
||||
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.wzyx));" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}\n", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
}
|
||||
else if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_NONE )
|
||||
{
|
||||
if( attrib->format == FMT_32_32_32_32_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readLittleEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_32_32_32_FLOAT && attrib->nfa == 2)
|
||||
{
|
||||
_readLittleEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_32_32_FLOAT && attrib->nfa == 2)
|
||||
{
|
||||
// seen in Cities of Gold
|
||||
_readLittleEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in Nano Assault Neo
|
||||
_readLittleEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_2_10_10_10 && attrib->nfa == 0 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in Fast Racing Neo
|
||||
_readLittleEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.xyzw = uint4((attrDecoder.x>>0)&0x3FF,(attrDecoder.x>>10)&0x3FF,(attrDecoder.x>>20)&0x3FF,(attrDecoder.x>>30)&0x3);" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/1023.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/1023.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/1023.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(float(attrDecoder.w));" _CRLF); // todo - is this correct?
|
||||
}
|
||||
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
|
||||
{
|
||||
// seen in CoD ghosts
|
||||
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/32767.0,-1.0));" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 2 && attrib->isSigned == 1 )
|
||||
{
|
||||
// seen in Rabbids Land
|
||||
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.xyzw = as_type<uint4>(float4(int4(attrDecoder)));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16_16_16_16_FLOAT && attrib->nfa == 2)
|
||||
{
|
||||
// seen in Giana Sisters: Twisted Dreams
|
||||
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
// TODO: uint4?
|
||||
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
|
||||
{
|
||||
// seen in Nano Assault Neo
|
||||
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2)
|
||||
{
|
||||
// seen in Giana Sisters: Twisted Dreams
|
||||
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/127.0,-1.0));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0)
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned != 0)
|
||||
{
|
||||
// seen in Sonic Lost World
|
||||
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0 )
|
||||
{
|
||||
// seen in One Piece
|
||||
// TODO: uint4?
|
||||
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.xyzw));" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned == 0)
|
||||
{
|
||||
if( (attrib->offset&3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL )
|
||||
{
|
||||
// AMD workaround
|
||||
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
}
|
||||
else if (attrib->format == FMT_8_8 && attrib->nfa == 2 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in BotW
|
||||
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
{
|
||||
// AMD workaround
|
||||
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw));" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy));" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
}
|
||||
else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned != 0)
|
||||
{
|
||||
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
{
|
||||
// AMD workaround
|
||||
src->addFmt("attrDecoder.xy = in.attrDataSem{}.zw;" _CRLF, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
}
|
||||
else if (attrib->format == FMT_8_8 && attrib->nfa == 1 && attrib->isSigned == 0)
|
||||
{
|
||||
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
{
|
||||
// AMD workaround
|
||||
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
}
|
||||
else if( attrib->format == FMT_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
|
||||
{
|
||||
// seen in Pikmin 3
|
||||
src->addFmt("attrDecoder.x = as_type<uint>(float(in.attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex);
|
||||
src->add("attrDecoder.yzw = uint3(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}\n", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
}
|
||||
else if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_U16 )
|
||||
{
|
||||
if( attrib->format == FMT_16_16_16_16_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
// TODO: uint4?
|
||||
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
|
||||
{
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/32767.0,-1.0));" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in BotW
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x))/65535.0);" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(float(int(attrDecoder.y))/65535.0);" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(float(int(attrDecoder.z))/65535.0);" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(float(int(attrDecoder.w))/65535.0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 2 && attrib->isSigned != 0 )
|
||||
{
|
||||
// seen in Minecraft Wii U Edition
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x)));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(float(int(attrDecoder.y)));" _CRLF);
|
||||
src->add("attrDecoder.z = as_type<uint>(float(int(attrDecoder.z)));" _CRLF);
|
||||
src->add("attrDecoder.w = as_type<uint>(float(int(attrDecoder.w)));" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 1 && attrib->isSigned != 0 )
|
||||
{
|
||||
// seen in Minecraft Wii U Edition
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = as_type<uint2>(float2(float(attrDecoder.x), float(attrDecoder.y))/65535.0);" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 1 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 1 && attrib->isSigned != 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 2 && attrib->isSigned == 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.xy = as_type<uint2>(float2(float(attrDecoder.x), float(attrDecoder.y)));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if( attrib->format == FMT_16_16 && attrib->nfa == 2 && attrib->isSigned != 0 )
|
||||
{
|
||||
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
|
||||
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
|
||||
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
|
||||
src->add("attrDecoder.xy = as_type<uint2>(float2(float(int(attrDecoder.x)), float(int(attrDecoder.y))));" _CRLF);
|
||||
src->add("attrDecoder.zw = uint2(0);" _CRLF);
|
||||
}
|
||||
else if (attrib->format == FMT_16 && attrib->nfa == 1 && attrib->isSigned == 0)
|
||||
{
|
||||
_readBigEndianAttributeU16x1(shaderContext, src, attributeInputIndex);
|
||||
}
|
||||
else if (attrib->format == FMT_16 && attrib->nfa == 0 && attrib->isSigned == 0)
|
||||
{
|
||||
// seen in CoD ghosts
|
||||
_readBigEndianAttributeU16x1(shaderContext, src, attributeInputIndex);
|
||||
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x))/65535.0);" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,554 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/precompiled.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
|
||||
namespace LatteDecompiler
|
||||
{
|
||||
static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext, bool usesGeometryShader)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
auto& uniformOffsets = decompilerContext->output->uniformOffsetsVK;
|
||||
|
||||
src->add("struct SupportBuffer {" _CRLF);
|
||||
|
||||
sint32 uniformCurrentOffset = 0;
|
||||
auto shader = decompilerContext->shader;
|
||||
auto shaderType = decompilerContext->shader->shaderType;
|
||||
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED)
|
||||
{
|
||||
// uniform registers or buffers are accessed statically with predictable offsets
|
||||
// this allows us to remap the used entries into a more compact array
|
||||
src->addFmt("int4 remapped[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
|
||||
uniformOffsets.offset_remapped = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 16 * shader->list_remappedUniformEntries.size();
|
||||
}
|
||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
||||
{
|
||||
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
|
||||
// full or partial uniform register file has to be present
|
||||
src->addFmt("int4 uniformRegister[{}];" _CRLF, cfileSize);
|
||||
uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
|
||||
uniformOffsets.count_uniformRegister = cfileSize;
|
||||
uniformCurrentOffset += 16 * cfileSize;
|
||||
}
|
||||
// special uniforms
|
||||
bool hasAnyViewportScaleDisabled =
|
||||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
|
||||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
|
||||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
|
||||
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && hasAnyViewportScaleDisabled)
|
||||
{
|
||||
// aka GX2 special state 0
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
|
||||
src->add("float2 windowSpaceToClipSpaceTransform;" _CRLF);
|
||||
uniformOffsets.offset_windowSpaceToClipSpaceTransform = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 8;
|
||||
}
|
||||
bool alphaTestEnable = decompilerContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE();
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable)
|
||||
{
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
|
||||
src->add("float alphaTestRef;" _CRLF);
|
||||
uniformOffsets.offset_alphaTestRef = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 4;
|
||||
}
|
||||
if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
||||
{
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
|
||||
src->add("float pointSize;" _CRLF);
|
||||
uniformOffsets.offset_pointSize = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 4;
|
||||
}
|
||||
}
|
||||
// define fragCoordScale which holds the xy scale for render target resolution vs effective resolution
|
||||
if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||
{
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
|
||||
src->add("float2 fragCoordScale;" _CRLF);
|
||||
uniformOffsets.offset_fragCoordScale = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 8;
|
||||
}
|
||||
// provide scale factor for every texture that is accessed via texel coordinates (texelFetch)
|
||||
for (sint32 t = 0; t < LATTE_NUM_MAX_TEX_UNITS; t++)
|
||||
{
|
||||
if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false)
|
||||
continue;
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 7) & ~7;
|
||||
src->addFmt("float2 tex{}Scale;" _CRLF, t);
|
||||
uniformOffsets.offset_texScale[t] = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 8;
|
||||
}
|
||||
// define verticesPerInstance + streamoutBufferBaseX
|
||||
if ((shader->shaderType == LatteConst::ShaderType::Vertex &&
|
||||
usesGeometryShader) ||
|
||||
(decompilerContext->analyzer.useSSBOForStreamout &&
|
||||
(shader->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
(shader->shaderType == LatteConst::ShaderType::Geometry)))
|
||||
{
|
||||
src->add("int verticesPerInstance;" _CRLF);
|
||||
uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 4;
|
||||
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||
{
|
||||
if (decompilerContext->output->streamoutBufferWriteMask[i])
|
||||
{
|
||||
src->addFmt("int streamoutBufferBase{};" _CRLF, i);
|
||||
uniformOffsets.offset_streamoutBufferBase[i] = uniformCurrentOffset;
|
||||
uniformCurrentOffset += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
|
||||
uniformOffsets.offset_endOfBlock = uniformCurrentOffset;
|
||||
}
|
||||
|
||||
static void _emitUniformBuffers(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
auto shaderSrc = decompilerContext->shaderSource;
|
||||
// uniform buffer definition
|
||||
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||
{
|
||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||
{
|
||||
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||
continue;
|
||||
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
|
||||
|
||||
shaderSrc->addFmt("struct UBuff{} {{" _CRLF, i);
|
||||
shaderSrc->addFmt("float4 d[{}];" _CRLF, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||
shaderSrc->add("};" _CRLF _CRLF);
|
||||
}
|
||||
}
|
||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED)
|
||||
{
|
||||
// already generated in _emitUniformVariables
|
||||
}
|
||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
||||
{
|
||||
// already generated in _emitUniformVariables
|
||||
}
|
||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_NONE)
|
||||
{
|
||||
// no uniforms used
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
}
|
||||
|
||||
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool fetchVertexManually)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
std::string attributeNames;
|
||||
|
||||
if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
src->add("struct VertexIn {" _CRLF);
|
||||
// attribute inputs
|
||||
for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++)
|
||||
{
|
||||
if (decompilerContext->analyzer.inputAttributSemanticMask[i])
|
||||
{
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0);
|
||||
|
||||
src->addFmt("uint4 attrDataSem{}", i);
|
||||
if (fetchVertexManually)
|
||||
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
|
||||
else
|
||||
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]);
|
||||
src->add(";" _CRLF);
|
||||
}
|
||||
}
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
src->addFmt("{}", attributeNames);
|
||||
}
|
||||
|
||||
static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext, bool isRectVertexShader)
|
||||
{
|
||||
auto* src = shaderContext->shaderSource;
|
||||
|
||||
src->add("struct VertexOut {" _CRLF);
|
||||
src->add("float4 position [[position]] [[invariant]];" _CRLF);
|
||||
if (shaderContext->analyzer.outputPointSize)
|
||||
src->add("float pointSize [[point_size]];" _CRLF);
|
||||
|
||||
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
|
||||
auto parameterMask = shaderContext->shader->outputParameterMask;
|
||||
bool psInputsWritten[GPU7_PS_MAX_INPUTS] = {false};
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask&(1 << i)) == 0)
|
||||
continue;
|
||||
uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, i);
|
||||
if (vsSemanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
|
||||
continue;
|
||||
// get import based on semanticId
|
||||
sint32 psInputIndex = -1;
|
||||
for (sint32 f = 0; f < psInputTable->count; f++)
|
||||
{
|
||||
if (psInputTable->import[f].semanticId == vsSemanticId)
|
||||
{
|
||||
psInputIndex = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (psInputIndex == -1)
|
||||
continue; // no ps input
|
||||
|
||||
psInputsWritten[psInputIndex] = true;
|
||||
|
||||
src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId);
|
||||
if (!isRectVertexShader)
|
||||
{
|
||||
src->addFmt(" [[user(locn{})]]", psInputIndex);
|
||||
if (psInputTable->import[psInputIndex].isFlat)
|
||||
src->add(" [[flat]]");
|
||||
if (psInputTable->import[psInputIndex].isNoPerspective)
|
||||
src->add(" [[center_no_perspective]]");
|
||||
}
|
||||
src->addFmt(";" _CRLF);
|
||||
}
|
||||
|
||||
// TODO: handle this in the fragment shader instead?
|
||||
// Declare all PS inputs that are not written by the VS
|
||||
for (uint32 i = 0; i < psInputTable->count; i++)
|
||||
{
|
||||
if (psInputsWritten[i])
|
||||
continue;
|
||||
|
||||
if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
|
||||
continue;
|
||||
|
||||
src->addFmt("float4 unknown{} [[user(locn{})]];" _CRLF, psInputTable->import[i].semanticId, i);
|
||||
}
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
|
||||
if (isRectVertexShader)
|
||||
{
|
||||
src->add("struct ObjectPayload {" _CRLF);
|
||||
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
}
|
||||
|
||||
static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext)
|
||||
{
|
||||
auto* src = shaderContext->shaderSource;
|
||||
|
||||
src->add("#define GET_FRAGCOORD() float4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF);
|
||||
|
||||
src->add("struct FragmentIn {" _CRLF);
|
||||
src->add("float4 position [[position]];" _CRLF);
|
||||
|
||||
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
|
||||
for (sint32 i = 0; i < psInputTable->count; i++)
|
||||
{
|
||||
if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
|
||||
continue;
|
||||
src->addFmt("float4 passParameterSem{}", psInputTable->import[i].semanticId);
|
||||
src->addFmt(" [[user(locn{})]]", i);
|
||||
if (psInputTable->import[i].isFlat)
|
||||
src->add(" [[flat]]");
|
||||
if (psInputTable->import[i].isNoPerspective)
|
||||
src->add(" [[center_no_perspective]]");
|
||||
src->add(";" _CRLF);
|
||||
}
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
|
||||
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually, bool rasterizationEnabled)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
_emitAttributes(decompilerContext, fetchVertexManually);
|
||||
}
|
||||
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
|
||||
{
|
||||
_emitPSInputs(decompilerContext);
|
||||
|
||||
src->add("struct FragmentOut {" _CRLF);
|
||||
|
||||
// generate pixel outputs for pixel shader
|
||||
for (uint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
|
||||
{
|
||||
auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew);
|
||||
if (dataType != MetalDataType::NONE)
|
||||
{
|
||||
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generate depth output for pixel shader
|
||||
if (decompilerContext->shader->depthMask)
|
||||
src->add("float passDepth [[depth(any)]];" _CRLF);
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
|
||||
if (!usesGeometryShader || isRectVertexShader)
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && rasterizationEnabled)
|
||||
_emitVSOutputs(decompilerContext, isRectVertexShader);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
src->add("struct VertexOut {" _CRLF);
|
||||
uint32 ringParameterCountVS2GS = 0;
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage;
|
||||
}
|
||||
for (uint32 f = 0; f < ringParameterCountVS2GS; f++)
|
||||
src->addFmt("int4 passParameterSem{};" _CRLF, f);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
src->add("struct ObjectPayload {" _CRLF);
|
||||
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
// parameters shared between geometry and pixel shader
|
||||
uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF;
|
||||
if ((ringItemSize & 0xF) != 0)
|
||||
debugBreakpoint();
|
||||
if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0)
|
||||
debugBreakpoint();
|
||||
|
||||
src->add("struct GeometryOut {" _CRLF);
|
||||
src->add("float4 position [[position]];" _CRLF);
|
||||
for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++)
|
||||
{
|
||||
if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2)
|
||||
continue;
|
||||
src->addFmt("float4 passParameterSem{} [[user(locn{})]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F);
|
||||
}
|
||||
src->add("};" _CRLF _CRLF);
|
||||
|
||||
const uint32 MAX_VERTEX_COUNT = 32;
|
||||
|
||||
// Define the mesh shader output type
|
||||
src->addFmt("using MeshType = mesh<GeometryOut, void, {}, GET_PRIMITIVE_COUNT({}), topology::MTL_PRIMITIVE_TYPE>;" _CRLF, MAX_VERTEX_COUNT, MAX_VERTEX_COUNT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually, bool rasterizationEnabled)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
if (usesGeometryShader && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
LattePrimitiveMode vsOutPrimType = decompilerContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
||||
src->addFmt("#define VERTICES_PER_VERTEX_PRIMITIVE {}" _CRLF, GetVerticesPerPrimitive(vsOutPrimType));
|
||||
|
||||
uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
switch (gsOutPrimType)
|
||||
{
|
||||
case 0: // Point
|
||||
src->add("#define MTL_PRIMITIVE_TYPE point" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount / 1)" _CRLF);
|
||||
break;
|
||||
case 1: // Line strip
|
||||
src->add("#define MTL_PRIMITIVE_TYPE line" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 1)" _CRLF);
|
||||
break;
|
||||
case 2: // Triangle strip
|
||||
src->add("#define MTL_PRIMITIVE_TYPE triangle" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 2)" _CRLF);
|
||||
break;
|
||||
default:
|
||||
cemuLog_log(LogType::Force, "Unknown geometry out primitive type {}", gsOutPrimType);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (decompilerContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
|
||||
src->add("#define SET_POSITION(_v) out.position = _v" _CRLF);
|
||||
else
|
||||
src->add("#define SET_POSITION(_v) out.position = _v; out.position.z = (out.position.z + out.position.w) / 2.0" _CRLF);
|
||||
|
||||
const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled();
|
||||
if(dump_shaders_enabled)
|
||||
decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF);
|
||||
// uniform variables
|
||||
_emitUniformVariables(decompilerContext, usesGeometryShader);
|
||||
// uniform buffers
|
||||
_emitUniformBuffers(decompilerContext);
|
||||
// inputs and outputs
|
||||
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, usesGeometryShader, fetchVertexManually, rasterizationEnabled);
|
||||
|
||||
if (dump_shaders_enabled)
|
||||
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
|
||||
}
|
||||
|
||||
static void _emitUniformBufferDefinitions(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
// uniform buffer definition
|
||||
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||
{
|
||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||
{
|
||||
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||
continue;
|
||||
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
|
||||
|
||||
src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
|
||||
{
|
||||
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
|
||||
|
||||
auto src = shaderContext->shaderSource;
|
||||
// texture sampler definition
|
||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
if (!shaderContext->output->textureUnitMask[i])
|
||||
continue;
|
||||
|
||||
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
|
||||
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
|
||||
{
|
||||
if (!renderTargetIndexUsed[renderTargetIndex])
|
||||
{
|
||||
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
|
||||
renderTargetIndexUsed[renderTargetIndex] = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
src->add(", ");
|
||||
|
||||
// Only certain texture dimensions can be used with comparison samplers
|
||||
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
|
||||
src->add("depth");
|
||||
else
|
||||
src->add("texture");
|
||||
|
||||
if (shaderContext->shader->textureIsIntegerFormat[i])
|
||||
{
|
||||
// integer samplers
|
||||
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
|
||||
src->add("1d<uint>");
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
|
||||
src->add("2d<uint>");
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
|
||||
src->add("2d<float>");
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
|
||||
src->add("1d<float>");
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
|
||||
src->add("2d_array<float>");
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
|
||||
src->add("cube_array<float>");
|
||||
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
|
||||
src->add("3d<float>");
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
||||
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
|
||||
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
|
||||
//uint32 samplerBinding = textureBinding % 16;
|
||||
src->addFmt(" tex{} [[texture({})]]", i, binding);
|
||||
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
switch (decompilerContext->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
if (usesGeometryShader)
|
||||
{
|
||||
src->add("object_data ObjectPayload& objectPayload [[payload]]");
|
||||
src->add(", mesh_grid_properties meshGridProperties");
|
||||
src->add(", uint tig [[threadgroup_position_in_grid]]");
|
||||
src->add(", uint tid [[thread_index_in_threadgroup]]");
|
||||
// TODO: only include index buffer if needed
|
||||
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
|
||||
// TODO: put into the support buffer?
|
||||
src->addFmt(", constant uchar& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: only include these if needed?
|
||||
src->add("uint vid [[vertex_id]]");
|
||||
src->add(", uint iid [[instance_id]]");
|
||||
}
|
||||
|
||||
if (fetchVertexManually)
|
||||
src->add(" VERTEX_BUFFER_DEFINITIONS");
|
||||
else
|
||||
src->add(", VertexIn in [[stage_in]]");
|
||||
|
||||
break;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
src->add("MeshType mesh");
|
||||
src->add(", const object_data ObjectPayload& objectPayload [[payload]]");
|
||||
break;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
src->add("FragmentIn in [[stage_in]]");
|
||||
// TODO: only include these if needed?
|
||||
src->add(", float2 pointCoord [[point_coord]]");
|
||||
src->add(", bool frontFacing [[front_facing]]");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint >= 0)
|
||||
src->addFmt(", constant SupportBuffer& supportBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint);
|
||||
|
||||
// streamout buffer (transform feedback)
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
|
||||
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint);
|
||||
}
|
||||
|
||||
// uniform buffers
|
||||
_emitUniformBufferDefinitions(decompilerContext);
|
||||
// textures
|
||||
_emitTextureDefinitions(decompilerContext);
|
||||
}
|
||||
}
|
|
@ -260,6 +260,8 @@ struct LatteDecompilerShaderContext
|
|||
// emitter
|
||||
bool hasUniformVarBlock;
|
||||
sint32 currentBindingPointVK{};
|
||||
sint32 currentBufferBindingPointMTL{};
|
||||
sint32 currentTextureBindingPointMTL{};
|
||||
struct ALUClauseTemporariesState* aluPVPSState{nullptr};
|
||||
// misc
|
||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||
|
@ -268,6 +270,7 @@ struct LatteDecompilerShaderContext
|
|||
void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
|
||||
void LatteDecompiler_analyzeDataTypes(LatteDecompilerShaderContext* shaderContext);
|
||||
void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
|
||||
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
|
||||
|
||||
void LatteDecompiler_cleanup(LatteDecompilerShaderContext* shaderContext);
|
||||
|
||||
|
|
64
src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
Normal file
64
src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
|
||||
CachedFBOMtl::CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key) : LatteCachedFBO(key)
|
||||
{
|
||||
m_renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
|
||||
|
||||
bool hasAttachment = false;
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
const auto& buffer = colorBuffer[i];
|
||||
auto textureView = (LatteTextureViewMtl*)buffer.texture;
|
||||
if (!textureView)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto colorAttachment = m_renderPassDescriptor->colorAttachments()->object(i);
|
||||
colorAttachment->setTexture(textureView->GetRGBAView());
|
||||
colorAttachment->setLoadAction(MTL::LoadActionLoad);
|
||||
colorAttachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
hasAttachment = true;
|
||||
}
|
||||
|
||||
// setup depth attachment
|
||||
if (depthBuffer.texture)
|
||||
{
|
||||
auto textureView = static_cast<LatteTextureViewMtl*>(depthBuffer.texture);
|
||||
auto depthAttachment = m_renderPassDescriptor->depthAttachment();
|
||||
depthAttachment->setTexture(textureView->GetRGBAView());
|
||||
depthAttachment->setLoadAction(MTL::LoadActionLoad);
|
||||
depthAttachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
// setup stencil attachment
|
||||
if (depthBuffer.hasStencil && GetMtlPixelFormatInfo(depthBuffer.texture->format, true).hasStencil)
|
||||
{
|
||||
auto stencilAttachment = m_renderPassDescriptor->stencilAttachment();
|
||||
stencilAttachment->setTexture(textureView->GetRGBAView());
|
||||
stencilAttachment->setLoadAction(MTL::LoadActionLoad);
|
||||
stencilAttachment->setStoreAction(MTL::StoreActionStore);
|
||||
}
|
||||
|
||||
hasAttachment = true;
|
||||
}
|
||||
|
||||
// HACK: setup a dummy color attachment to prevent Metal from discarding draws for stremout draws in Super Smash Bros. for Wii U (works fine on MoltenVK without this hack though)
|
||||
if (!hasAttachment)
|
||||
{
|
||||
auto colorAttachment = m_renderPassDescriptor->colorAttachments()->object(0);
|
||||
colorAttachment->setTexture(metalRenderer->GetNullTexture2D());
|
||||
colorAttachment->setLoadAction(MTL::LoadActionDontCare);
|
||||
colorAttachment->setStoreAction(MTL::StoreActionDontCare);
|
||||
}
|
||||
|
||||
// Visibility buffer
|
||||
m_renderPassDescriptor->setVisibilityResultBuffer(metalRenderer->GetOcclusionQueryResultBuffer());
|
||||
}
|
||||
|
||||
CachedFBOMtl::~CachedFBOMtl()
|
||||
{
|
||||
m_renderPassDescriptor->release();
|
||||
}
|
22
src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h
Normal file
22
src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteCachedFBO.h"
|
||||
|
||||
class CachedFBOMtl : public LatteCachedFBO
|
||||
{
|
||||
public:
|
||||
CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key);
|
||||
|
||||
~CachedFBOMtl();
|
||||
|
||||
MTL::RenderPassDescriptor* GetRenderPassDescriptor()
|
||||
{
|
||||
return m_renderPassDescriptor;
|
||||
}
|
||||
|
||||
private:
|
||||
MTL::RenderPassDescriptor* m_renderPassDescriptor = nullptr;
|
||||
};
|
107
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
Normal file
107
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
Normal file
|
@ -0,0 +1,107 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
|
||||
LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle,
|
||||
Latte::E_HWTILEMODE tileMode, bool isDepth)
|
||||
: LatteTexture(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth), m_mtlr(mtlRenderer)
|
||||
{
|
||||
NS_STACK_SCOPED MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
|
||||
desc->setStorageMode(MTL::StorageModePrivate);
|
||||
//desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined);
|
||||
|
||||
sint32 effectiveBaseWidth = width;
|
||||
sint32 effectiveBaseHeight = height;
|
||||
sint32 effectiveBaseDepth = depth;
|
||||
if (overwriteInfo.hasResolutionOverwrite)
|
||||
{
|
||||
effectiveBaseWidth = overwriteInfo.width;
|
||||
effectiveBaseHeight = overwriteInfo.height;
|
||||
effectiveBaseDepth = overwriteInfo.depth;
|
||||
}
|
||||
effectiveBaseWidth = std::max(1, effectiveBaseWidth);
|
||||
effectiveBaseHeight = std::max(1, effectiveBaseHeight);
|
||||
effectiveBaseDepth = std::max(1, effectiveBaseDepth);
|
||||
|
||||
MTL::TextureType textureType;
|
||||
switch (dim)
|
||||
{
|
||||
case Latte::E_DIM::DIM_1D:
|
||||
textureType = MTL::TextureType1D;
|
||||
effectiveBaseHeight = 1;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_2D:
|
||||
case Latte::E_DIM::DIM_2D_MSAA:
|
||||
textureType = MTL::TextureType2D;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_2D_ARRAY:
|
||||
textureType = MTL::TextureType2DArray;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_3D:
|
||||
textureType = MTL::TextureType3D;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_CUBEMAP:
|
||||
cemu_assert_debug(effectiveBaseDepth % 6 == 0 && "cubemaps must have an array length multiple of 6");
|
||||
|
||||
textureType = MTL::TextureTypeCubeArray;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
textureType = MTL::TextureType2D;
|
||||
break;
|
||||
}
|
||||
desc->setTextureType(textureType);
|
||||
|
||||
// Clamp mip levels
|
||||
mipLevels = std::min(mipLevels, (uint32)maxPossibleMipLevels);
|
||||
mipLevels = std::max(mipLevels, (uint32)1);
|
||||
|
||||
desc->setWidth(effectiveBaseWidth);
|
||||
desc->setHeight(effectiveBaseHeight);
|
||||
desc->setMipmapLevelCount(mipLevels);
|
||||
|
||||
if (textureType == MTL::TextureType3D)
|
||||
{
|
||||
desc->setDepth(effectiveBaseDepth);
|
||||
}
|
||||
else if (textureType == MTL::TextureTypeCubeArray)
|
||||
{
|
||||
desc->setArrayLength(effectiveBaseDepth / 6);
|
||||
}
|
||||
else if (textureType == MTL::TextureType2DArray)
|
||||
{
|
||||
desc->setArrayLength(effectiveBaseDepth);
|
||||
}
|
||||
|
||||
auto pixelFormat = GetMtlPixelFormat(format, isDepth);
|
||||
desc->setPixelFormat(pixelFormat);
|
||||
|
||||
MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView;
|
||||
if (FormatIsRenderable(format))
|
||||
usage |= MTL::TextureUsageRenderTarget;
|
||||
desc->setUsage(usage);
|
||||
|
||||
m_texture = mtlRenderer->GetDevice()->newTexture(desc);
|
||||
}
|
||||
|
||||
LatteTextureMtl::~LatteTextureMtl()
|
||||
{
|
||||
m_texture->release();
|
||||
}
|
||||
|
||||
LatteTextureView* LatteTextureMtl::CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount)
|
||||
{
|
||||
cemu_assert_debug(mipCount > 0);
|
||||
cemu_assert_debug(sliceCount > 0);
|
||||
cemu_assert_debug((firstMip + mipCount) <= this->mipLevels);
|
||||
cemu_assert_debug((firstSlice + sliceCount) <= this->depth);
|
||||
|
||||
return new LatteTextureViewMtl(m_mtlr, this, dim, format, firstMip, mipCount, firstSlice, sliceCount);
|
||||
}
|
||||
|
||||
// TODO: lazy allocation?
|
||||
void LatteTextureMtl::AllocateOnHost()
|
||||
{
|
||||
// The texture is already allocated
|
||||
}
|
29
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h
Normal file
29
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
#include "HW/Latte/ISA/LatteReg.h"
|
||||
#include "util/ChunkedHeap/ChunkedHeap.h"
|
||||
|
||||
class LatteTextureMtl : public LatteTexture
|
||||
{
|
||||
public:
|
||||
LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels,
|
||||
uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth);
|
||||
~LatteTextureMtl();
|
||||
|
||||
MTL::Texture* GetTexture() const {
|
||||
return m_texture;
|
||||
}
|
||||
|
||||
void AllocateOnHost() override;
|
||||
|
||||
protected:
|
||||
LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override;
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::Texture* m_texture;
|
||||
};
|
52
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp
Normal file
52
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp
Normal file
|
@ -0,0 +1,52 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
|
||||
LatteTextureReadbackInfoMtl::~LatteTextureReadbackInfoMtl()
|
||||
{
|
||||
if (m_commandBuffer)
|
||||
m_commandBuffer->release();
|
||||
}
|
||||
|
||||
void LatteTextureReadbackInfoMtl::StartTransfer()
|
||||
{
|
||||
cemu_assert(m_textureView);
|
||||
|
||||
auto* baseTexture = (LatteTextureMtl*)m_textureView->baseTexture;
|
||||
|
||||
cemu_assert_debug(m_textureView->firstSlice == 0);
|
||||
cemu_assert_debug(m_textureView->firstMip == 0);
|
||||
cemu_assert_debug(m_textureView->baseTexture->dim != Latte::E_DIM::DIM_3D);
|
||||
|
||||
size_t bytesPerRow = GetMtlTextureBytesPerRow(baseTexture->format, baseTexture->isDepth, baseTexture->width);
|
||||
size_t bytesPerImage = GetMtlTextureBytesPerImage(baseTexture->format, baseTexture->isDepth, baseTexture->height, bytesPerRow);
|
||||
|
||||
auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();
|
||||
|
||||
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
|
||||
|
||||
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
|
||||
// TODO: uncomment?
|
||||
//m_mtlr->RequestSoonCommit();
|
||||
m_mtlr->CommitCommandBuffer();
|
||||
}
|
||||
|
||||
bool LatteTextureReadbackInfoMtl::IsFinished()
|
||||
{
|
||||
// Command buffer wasn't even comitted, let's commit immediately
|
||||
//if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
|
||||
// m_mtlr->CommitCommandBuffer();
|
||||
|
||||
return CommandBufferCompleted(m_commandBuffer);
|
||||
}
|
||||
|
||||
void LatteTextureReadbackInfoMtl::ForceFinish()
|
||||
{
|
||||
m_commandBuffer->waitUntilCompleted();
|
||||
}
|
||||
|
||||
uint8* LatteTextureReadbackInfoMtl::GetData()
|
||||
{
|
||||
return (uint8*)m_mtlr->GetTextureReadbackBuffer()->contents() + m_bufferOffset;
|
||||
}
|
25
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h
Normal file
25
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteTextureReadbackInfo.h"
|
||||
|
||||
class LatteTextureReadbackInfoMtl : public LatteTextureReadbackInfo
|
||||
{
|
||||
public:
|
||||
LatteTextureReadbackInfoMtl(class MetalRenderer* mtlRenderer, LatteTextureView* textureView, uint32 bufferOffset) : LatteTextureReadbackInfo(textureView), m_mtlr{mtlRenderer}, m_bufferOffset{bufferOffset} {}
|
||||
~LatteTextureReadbackInfoMtl();
|
||||
|
||||
void StartTransfer() override;
|
||||
|
||||
bool IsFinished() override;
|
||||
void ForceFinish() override;
|
||||
|
||||
uint8* GetData() override;
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::CommandBuffer* m_commandBuffer = nullptr;
|
||||
|
||||
uint32 m_bufferOffset = 0;
|
||||
};
|
191
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp
Normal file
191
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp
Normal file
|
@ -0,0 +1,191 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Metal/MTLTexture.hpp"
|
||||
|
||||
uint32 LatteTextureMtl_AdjustTextureCompSel(Latte::E_GX2SURFFMT format, uint32 compSel)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case Latte::E_GX2SURFFMT::R8_UNORM: // R8 is replicated on all channels (while OpenGL would return 1.0 for BGA instead)
|
||||
case Latte::E_GX2SURFFMT::R8_SNORM: // probably the same as _UNORM, but needs testing
|
||||
if (compSel >= 1 && compSel <= 3)
|
||||
compSel = 0;
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM: // order of components is reversed (RGBA -> ABGR)
|
||||
if (compSel >= 0 && compSel <= 3)
|
||||
compSel = 3 - compSel;
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::BC4_UNORM:
|
||||
case Latte::E_GX2SURFFMT::BC4_SNORM:
|
||||
if (compSel >= 1 && compSel <= 3)
|
||||
compSel = 0;
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::BC5_UNORM:
|
||||
case Latte::E_GX2SURFFMT::BC5_SNORM:
|
||||
// RG maps to RG
|
||||
// B maps to ?
|
||||
// A maps to G (guessed)
|
||||
if (compSel == 3)
|
||||
compSel = 1; // read Alpha as Green
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM:
|
||||
// reverse components (Wii U: ABGR, OpenGL: RGBA)
|
||||
// used in Resident Evil Revelations
|
||||
if (compSel >= 0 && compSel <= 3)
|
||||
compSel = 3 - compSel;
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::X24_G8_UINT:
|
||||
// map everything to alpha?
|
||||
if (compSel >= 0 && compSel <= 3)
|
||||
compSel = 3;
|
||||
break;
|
||||
case Latte::E_GX2SURFFMT::R4_G4_UNORM:
|
||||
// red and green swapped
|
||||
if (compSel == 0)
|
||||
compSel = 1;
|
||||
else if (compSel == 1)
|
||||
compSel = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return compSel;
|
||||
}
|
||||
|
||||
LatteTextureViewMtl::LatteTextureViewMtl(MetalRenderer* mtlRenderer, LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount)
|
||||
: LatteTextureView(texture, firstMip, mipCount, firstSlice, sliceCount, dim, format), m_mtlr(mtlRenderer), m_baseTexture(texture)
|
||||
{
|
||||
m_rgbaView = CreateSwizzledView(RGBA_SWIZZLE);
|
||||
}
|
||||
|
||||
LatteTextureViewMtl::~LatteTextureViewMtl()
|
||||
{
|
||||
m_rgbaView->release();
|
||||
for (sint32 i = 0; i < std::size(m_viewCache); i++)
|
||||
{
|
||||
if (m_viewCache[i].key != INVALID_SWIZZLE)
|
||||
m_viewCache[i].texture->release();
|
||||
}
|
||||
|
||||
for (auto& [key, texture] : m_fallbackViewCache)
|
||||
{
|
||||
texture->release();
|
||||
}
|
||||
}
|
||||
|
||||
MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle)
|
||||
{
|
||||
// Mask out
|
||||
gpuSamplerSwizzle &= 0x0FFF0000;
|
||||
|
||||
// RGBA swizzle == no swizzle
|
||||
if (gpuSamplerSwizzle == RGBA_SWIZZLE)
|
||||
{
|
||||
return m_rgbaView;
|
||||
}
|
||||
|
||||
// First, try to find a view in the cache
|
||||
|
||||
// Fast cache
|
||||
sint32 freeIndex = -1;
|
||||
for (sint32 i = 0; i < std::size(m_viewCache); i++)
|
||||
{
|
||||
const auto& entry = m_viewCache[i];
|
||||
if (entry.key == gpuSamplerSwizzle)
|
||||
{
|
||||
return entry.texture;
|
||||
}
|
||||
else if (entry.key == INVALID_SWIZZLE && freeIndex == -1)
|
||||
{
|
||||
freeIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback cache
|
||||
auto& fallbackEntry = m_fallbackViewCache[gpuSamplerSwizzle];
|
||||
if (fallbackEntry)
|
||||
{
|
||||
return fallbackEntry;
|
||||
}
|
||||
|
||||
MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle);
|
||||
if (freeIndex != -1)
|
||||
m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture};
|
||||
else
|
||||
fallbackEntry = texture;
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
MTL::Texture* LatteTextureViewMtl::CreateSwizzledView(uint32 gpuSamplerSwizzle)
|
||||
{
|
||||
uint32 compSelR = (gpuSamplerSwizzle >> 16) & 0x7;
|
||||
uint32 compSelG = (gpuSamplerSwizzle >> 19) & 0x7;
|
||||
uint32 compSelB = (gpuSamplerSwizzle >> 22) & 0x7;
|
||||
uint32 compSelA = (gpuSamplerSwizzle >> 25) & 0x7;
|
||||
compSelR = LatteTextureMtl_AdjustTextureCompSel(format, compSelR);
|
||||
compSelG = LatteTextureMtl_AdjustTextureCompSel(format, compSelG);
|
||||
compSelB = LatteTextureMtl_AdjustTextureCompSel(format, compSelB);
|
||||
compSelA = LatteTextureMtl_AdjustTextureCompSel(format, compSelA);
|
||||
|
||||
MTL::TextureType textureType;
|
||||
switch (dim)
|
||||
{
|
||||
case Latte::E_DIM::DIM_1D:
|
||||
textureType = MTL::TextureType1D;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_2D:
|
||||
case Latte::E_DIM::DIM_2D_MSAA:
|
||||
textureType = MTL::TextureType2D;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_2D_ARRAY:
|
||||
textureType = MTL::TextureType2DArray;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_3D:
|
||||
textureType = MTL::TextureType3D;
|
||||
break;
|
||||
case Latte::E_DIM::DIM_CUBEMAP:
|
||||
cemu_assert_debug(this->numSlice % 6 == 0 && "cubemaps must have an array length multiple of 6");
|
||||
|
||||
textureType = MTL::TextureTypeCubeArray;
|
||||
break;
|
||||
default:
|
||||
cemu_assert_unimplemented();
|
||||
textureType = MTL::TextureType2D;
|
||||
break;
|
||||
}
|
||||
|
||||
uint32 baseLevel = firstMip;
|
||||
uint32 levelCount = this->numMip;
|
||||
uint32 baseLayer = 0;
|
||||
uint32 layerCount = 1;
|
||||
// TODO: check if base texture is 3D texture as well
|
||||
if (textureType == MTL::TextureType3D)
|
||||
{
|
||||
cemu_assert_debug(firstMip == 0);
|
||||
cemu_assert_debug(this->numSlice == baseTexture->depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
baseLayer = firstSlice;
|
||||
if (textureType == MTL::TextureTypeCubeArray || textureType == MTL::TextureType2DArray)
|
||||
layerCount = this->numSlice;
|
||||
}
|
||||
|
||||
MTL::TextureSwizzleChannels swizzle;
|
||||
swizzle.red = GetMtlTextureSwizzle(compSelR);
|
||||
swizzle.green = GetMtlTextureSwizzle(compSelG);
|
||||
swizzle.blue = GetMtlTextureSwizzle(compSelB);
|
||||
swizzle.alpha = GetMtlTextureSwizzle(compSelA);
|
||||
|
||||
// Clamp mip levels
|
||||
levelCount = std::min(levelCount, m_baseTexture->maxPossibleMipLevels - baseLevel);
|
||||
levelCount = std::max(levelCount, (uint32)1);
|
||||
|
||||
auto pixelFormat = GetMtlPixelFormat(format, m_baseTexture->isDepth);
|
||||
MTL::Texture* texture = m_baseTexture->GetTexture()->newTextureView(pixelFormat, textureType, NS::Range::Make(baseLevel, levelCount), NS::Range::Make(baseLayer, layerCount), swizzle);
|
||||
|
||||
return texture;
|
||||
}
|
37
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h
Normal file
37
src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
|
||||
#define RGBA_SWIZZLE 0x06880000
|
||||
#define INVALID_SWIZZLE 0xFFFFFFFF
|
||||
|
||||
class LatteTextureViewMtl : public LatteTextureView
|
||||
{
|
||||
public:
|
||||
LatteTextureViewMtl(class MetalRenderer* mtlRenderer, class LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount);
|
||||
~LatteTextureViewMtl();
|
||||
|
||||
MTL::Texture* GetSwizzledView(uint32 gpuSamplerSwizzle);
|
||||
|
||||
MTL::Texture* GetRGBAView()
|
||||
{
|
||||
return GetSwizzledView(RGBA_SWIZZLE);
|
||||
}
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
class LatteTextureMtl* m_baseTexture;
|
||||
|
||||
MTL::Texture* m_rgbaView;
|
||||
struct {
|
||||
uint32 key;
|
||||
MTL::Texture* texture;
|
||||
} m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}};
|
||||
std::unordered_map<uint32, MTL::Texture*> m_fallbackViewCache;
|
||||
|
||||
MTL::Texture* CreateSwizzledView(uint32 gpuSamplerSwizzle);
|
||||
};
|
511
src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp
Normal file
511
src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp
Normal file
|
@ -0,0 +1,511 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "HW/Latte/Core/LatteTextureLoader.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
|
||||
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
|
||||
{Latte::E_GX2SURFFMT::INVALID_FORMAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}},
|
||||
|
||||
{Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}},
|
||||
{Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, MetalDataType::FLOAT, 1}},
|
||||
{Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, MetalDataType::UINT, 1}},
|
||||
{Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, MetalDataType::INT, 1}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, MetalDataType::UINT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, MetalDataType::INT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, MetalDataType::INT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: sRGB?
|
||||
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, MetalDataType::UINT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, MetalDataType::INT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, MetalDataType::FLOAT, 2}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, MetalDataType::INT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, MetalDataType::INT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, MetalDataType::UINT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, MetalDataType::INT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, MetalDataType::UINT, 16}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, MetalDataType::INT, 16}},
|
||||
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, MetalDataType::FLOAT, 16}},
|
||||
{Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
|
||||
};
|
||||
|
||||
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
|
||||
{Latte::E_GX2SURFFMT::INVALID_FORMAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}},
|
||||
|
||||
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
|
||||
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
|
||||
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5, {1, 1}, true}},
|
||||
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2, {1, 1}}},
|
||||
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4, {1, 1}}},
|
||||
};
|
||||
|
||||
// TODO: R10_G10_B10_A2_UINT and R10_G10_B10_A2_SINT
|
||||
// TODO: A2_B10_G10_R10_UNORM and A2_B10_G10_R10_UINT
|
||||
void CheckForPixelFormatSupport(const MetalPixelFormatSupport& support)
|
||||
{
|
||||
// Texture decoders
|
||||
|
||||
// Color
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT].textureDecoder = TextureDecoder_R32_G32_B32_A32_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT].textureDecoder = TextureDecoder_R32_G32_B32_A32_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT].textureDecoder = TextureDecoder_R16_G16_B16_A16_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT].textureDecoder = TextureDecoder_R16_G16_B16_A16_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM].textureDecoder = TextureDecoder_R16_G16_B16_A16::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM].textureDecoder = TextureDecoder_R16_G16_B16_A16::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_FLOAT].textureDecoder = TextureDecoder_R32_G32_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_UINT].textureDecoder = TextureDecoder_R32_G32_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_UNORM].textureDecoder = TextureDecoder_R16_G16::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_FLOAT].textureDecoder = TextureDecoder_R16_G16_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_UNORM].textureDecoder = TextureDecoder_R8_G8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_SNORM].textureDecoder = TextureDecoder_R8_G8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].textureDecoder = TextureDecoder_R4_G4_UNORM_To_ABGR4::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_FLOAT].textureDecoder = TextureDecoder_R32_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_UINT].textureDecoder = TextureDecoder_R32_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_FLOAT].textureDecoder = TextureDecoder_R16_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_UNORM].textureDecoder = TextureDecoder_R16_UNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_SNORM].textureDecoder = TextureDecoder_R16_SNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_UINT].textureDecoder = TextureDecoder_R16_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_UNORM].textureDecoder = TextureDecoder_R8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_SNORM].textureDecoder = TextureDecoder_R8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_UINT].textureDecoder = TextureDecoder_R8_UINT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].textureDecoder = TextureDecoder_R5_G6_B5_swappedRB::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].textureDecoder = TextureDecoder_R5_G5_B5_A1_UNORM_swappedRB::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].textureDecoder = TextureDecoder_A1_B5_G5_R5_UNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT].textureDecoder = TextureDecoder_R11_G11_B10_FLOAT::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].textureDecoder = TextureDecoder_R4_G4_B4_A4_UNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM].textureDecoder = TextureDecoder_R10_G10_B10_A2_UNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM].textureDecoder = TextureDecoder_R10_G10_B10_A2_SNORM_To_RGBA16::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB].textureDecoder = TextureDecoder_R10_G10_B10_A2_UNORM::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC1_SRGB].textureDecoder = TextureDecoder_BC1::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC1_UNORM].textureDecoder = TextureDecoder_BC1::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC2_UNORM].textureDecoder = TextureDecoder_BC2::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC2_SRGB].textureDecoder = TextureDecoder_BC2::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC3_UNORM].textureDecoder = TextureDecoder_BC3::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC3_SRGB].textureDecoder = TextureDecoder_BC3::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC4_UNORM].textureDecoder = TextureDecoder_BC4::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC4_SNORM].textureDecoder = TextureDecoder_BC4::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC5_UNORM].textureDecoder = TextureDecoder_BC5::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC5_SNORM].textureDecoder = TextureDecoder_BC5::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R24_X8_UNORM].textureDecoder = TextureDecoder_R24_X8::getInstance();
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::X24_G8_UINT].textureDecoder = TextureDecoder_X24_G8_UINT::getInstance();
|
||||
|
||||
if (!support.m_supportsPacked16BitFormats)
|
||||
{
|
||||
// B5G6R5Unorm
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].bytesPerBlock = 4;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].textureDecoder = TextureDecoder_R5G6B5_UNORM_To_RGBA8::getInstance();
|
||||
|
||||
// A1BGR5Unorm
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].textureDecoder = TextureDecoder_A1_B5_G5_R5_UNORM_vulkan_To_RGBA8::getInstance();
|
||||
|
||||
// ABGR4Unorm
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].pixelFormat = MTL::PixelFormatRG8Unorm;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].bytesPerBlock = 2;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].textureDecoder = TextureDecoder_R4G4_UNORM_To_RG8::getInstance();
|
||||
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].bytesPerBlock = 4;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].textureDecoder = TextureDecoder_R4G4B4A4_UNORM_To_RGBA8::getInstance();
|
||||
|
||||
// BGR5A1Unorm
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
|
||||
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].textureDecoder = TextureDecoder_R5_G5_B5_A1_UNORM_swappedRB_To_RGBA8::getInstance();
|
||||
}
|
||||
|
||||
// Depth
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].textureDecoder = TextureDecoder_D24_S8::getInstance();
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_FLOAT].textureDecoder = TextureDecoder_NullData64::getInstance(); // TODO: why?
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D32_FLOAT].textureDecoder = TextureDecoder_R32_FLOAT::getInstance();
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D16_UNORM].textureDecoder = TextureDecoder_R16_UNORM::getInstance();
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D32_S8_FLOAT].textureDecoder = TextureDecoder_D32_S8_UINT_X24::getInstance();
|
||||
|
||||
if (!support.m_supportsDepth24Unorm_Stencil8)
|
||||
{
|
||||
// Depth24Unorm_Stencil8
|
||||
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
|
||||
// TODO: implement the decoder
|
||||
//MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].textureDecoder = TextureDecoder_D24_S8_To_D32_S8::getInstance();
|
||||
}
|
||||
}
|
||||
|
||||
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
|
||||
{
|
||||
if (isDepth)
|
||||
{
|
||||
auto it = MTL_DEPTH_FORMAT_TABLE.find(format);
|
||||
if (it == MTL_DEPTH_FORMAT_TABLE.end())
|
||||
return {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}; // Fallback
|
||||
else
|
||||
return it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto it = MTL_COLOR_FORMAT_TABLE.find(format);
|
||||
if (it == MTL_COLOR_FORMAT_TABLE.end())
|
||||
return {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}; // Fallback
|
||||
else
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth)
|
||||
{
|
||||
auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat;
|
||||
if (pixelFormat == MTL::PixelFormatInvalid)
|
||||
cemuLog_log(LogType::Force, "invalid pixel format 0x{:x}, is depth: {}\n", format, isDepth);
|
||||
|
||||
return pixelFormat;
|
||||
}
|
||||
|
||||
inline uint32 CeilDivide(uint32 a, uint32 b) {
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width)
|
||||
{
|
||||
const auto& formatInfo = GetMtlPixelFormatInfo(format, isDepth);
|
||||
|
||||
return CeilDivide(width, formatInfo.blockTexelSize.x) * formatInfo.bytesPerBlock;
|
||||
}
|
||||
|
||||
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow)
|
||||
{
|
||||
const auto& formatInfo = GetMtlPixelFormatInfo(format, isDepth);
|
||||
|
||||
return CeilDivide(height, formatInfo.blockTexelSize.y) * bytesPerRow;
|
||||
}
|
||||
|
||||
MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode)
|
||||
{
|
||||
switch (primitiveMode)
|
||||
{
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS:
|
||||
return MTL::PrimitiveTypePoint;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINES:
|
||||
return MTL::PrimitiveTypeLine;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP:
|
||||
return MTL::PrimitiveTypeLineStrip;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_LOOP:
|
||||
return MTL::PrimitiveTypeLineStrip; // line loops are emulated as line strips with an extra connecting strip at the end
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP_ADJACENT: // Tropical Freeze level 3-6
|
||||
cemuLog_logOnce(LogType::Force, "Metal doesn't support line strip adjacent primitive, using line strip instead");
|
||||
return MTL::PrimitiveTypeLineStrip;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES:
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN:
|
||||
return MTL::PrimitiveTypeTriangleStrip;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP:
|
||||
return MTL::PrimitiveTypeTriangleStrip;
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUADS:
|
||||
return MTL::PrimitiveTypeTriangle; // quads are emulated as 2 triangles
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUAD_STRIP:
|
||||
return MTL::PrimitiveTypeTriangle; // quad strips are emulated as (count-2)/2 triangles
|
||||
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS:
|
||||
return MTL::PrimitiveTypeTriangle; // rects are emulated as 2 triangles
|
||||
default:
|
||||
cemuLog_log(LogType::Force, "Unsupported primitive mode {}", primitiveMode);
|
||||
cemu_assert_debug(false);
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
}
|
||||
}
|
||||
|
||||
MTL::VertexFormat GetMtlVertexFormat(uint8 format)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
return MTL::VertexFormatUInt4;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
return MTL::VertexFormatUInt3;
|
||||
case FMT_32_32_FLOAT:
|
||||
return MTL::VertexFormatUInt2;
|
||||
case FMT_32_FLOAT:
|
||||
return MTL::VertexFormatUInt;
|
||||
case FMT_8_8_8_8:
|
||||
return MTL::VertexFormatUChar4;
|
||||
case FMT_8_8_8:
|
||||
return MTL::VertexFormatUChar3;
|
||||
case FMT_8_8:
|
||||
return MTL::VertexFormatUChar2;
|
||||
case FMT_8:
|
||||
return MTL::VertexFormatUChar;
|
||||
case FMT_32_32_32_32:
|
||||
return MTL::VertexFormatUInt4;
|
||||
case FMT_32_32_32:
|
||||
return MTL::VertexFormatUInt3;
|
||||
case FMT_32_32:
|
||||
return MTL::VertexFormatUInt2;
|
||||
case FMT_32:
|
||||
return MTL::VertexFormatUInt;
|
||||
case FMT_16_16_16_16:
|
||||
return MTL::VertexFormatUShort4; // verified to match OpenGL
|
||||
case FMT_16_16_16:
|
||||
return MTL::VertexFormatUShort3;
|
||||
case FMT_16_16:
|
||||
return MTL::VertexFormatUShort2;
|
||||
case FMT_16:
|
||||
return MTL::VertexFormatUShort;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
return MTL::VertexFormatUShort4; // verified to match OpenGL
|
||||
case FMT_16_16_16_FLOAT:
|
||||
return MTL::VertexFormatUShort3;
|
||||
case FMT_16_16_FLOAT:
|
||||
return MTL::VertexFormatUShort2;
|
||||
case FMT_16_FLOAT:
|
||||
return MTL::VertexFormatUShort;
|
||||
case FMT_2_10_10_10:
|
||||
return MTL::VertexFormatUInt; // verified to match OpenGL
|
||||
default:
|
||||
cemuLog_log(LogType::Force, "unsupported vertex format {}", (uint32)format);
|
||||
assert_dbg();
|
||||
return MTL::VertexFormatInvalid;
|
||||
}
|
||||
}
|
||||
|
||||
uint32 GetMtlVertexFormatSize(uint8 format)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
return 16;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
return 12;
|
||||
case FMT_32_32_FLOAT:
|
||||
return 8;
|
||||
case FMT_32_FLOAT:
|
||||
return 4;
|
||||
case FMT_8_8_8_8:
|
||||
return 4;
|
||||
case FMT_8_8_8:
|
||||
return 3;
|
||||
case FMT_8_8:
|
||||
return 2;
|
||||
case FMT_8:
|
||||
return 1;
|
||||
case FMT_32_32_32_32:
|
||||
return 16;
|
||||
case FMT_32_32_32:
|
||||
return 12;
|
||||
case FMT_32_32:
|
||||
return 8;
|
||||
case FMT_32:
|
||||
return 4;
|
||||
case FMT_16_16_16_16:
|
||||
return 8;
|
||||
case FMT_16_16_16:
|
||||
return 6;
|
||||
case FMT_16_16:
|
||||
return 4;
|
||||
case FMT_16:
|
||||
return 2;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
return 8;
|
||||
case FMT_16_16_16_FLOAT:
|
||||
return 6;
|
||||
case FMT_16_16_FLOAT:
|
||||
return 4;
|
||||
case FMT_16_FLOAT:
|
||||
return 2;
|
||||
case FMT_2_10_10_10:
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType)
|
||||
{
|
||||
switch (indexType)
|
||||
{
|
||||
case Renderer::INDEX_TYPE::U16:
|
||||
return MTL::IndexTypeUInt16;
|
||||
case Renderer::INDEX_TYPE::U32:
|
||||
return MTL::IndexTypeUInt32;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
return MTL::IndexTypeUInt32;
|
||||
}
|
||||
}
|
||||
|
||||
MTL::BlendOperation GetMtlBlendOp(Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC combineFunc)
|
||||
{
|
||||
switch (combineFunc)
|
||||
{
|
||||
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_PLUS_SRC:
|
||||
return MTL::BlendOperationAdd;
|
||||
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::SRC_MINUS_DST:
|
||||
return MTL::BlendOperationSubtract;
|
||||
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MIN_DST_SRC:
|
||||
return MTL::BlendOperationMin;
|
||||
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MAX_DST_SRC:
|
||||
return MTL::BlendOperationMax;
|
||||
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_MINUS_SRC:
|
||||
return MTL::BlendOperationReverseSubtract;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
return MTL::BlendOperationAdd;
|
||||
}
|
||||
}
|
||||
|
||||
const MTL::BlendFactor MTL_BLEND_FACTORS[] =
|
||||
{
|
||||
/* 0x00 */ MTL::BlendFactorZero,
|
||||
/* 0x01 */ MTL::BlendFactorOne,
|
||||
/* 0x02 */ MTL::BlendFactorSourceColor,
|
||||
/* 0x03 */ MTL::BlendFactorOneMinusSourceColor,
|
||||
/* 0x04 */ MTL::BlendFactorSourceAlpha,
|
||||
/* 0x05 */ MTL::BlendFactorOneMinusSourceAlpha,
|
||||
/* 0x06 */ MTL::BlendFactorDestinationAlpha,
|
||||
/* 0x07 */ MTL::BlendFactorOneMinusDestinationAlpha,
|
||||
/* 0x08 */ MTL::BlendFactorDestinationColor,
|
||||
/* 0x09 */ MTL::BlendFactorOneMinusDestinationColor,
|
||||
/* 0x0A */ MTL::BlendFactorSourceAlphaSaturated,
|
||||
/* 0x0B */ MTL::BlendFactorZero, // TODO
|
||||
/* 0x0C */ MTL::BlendFactorZero, // TODO
|
||||
/* 0x0D */ MTL::BlendFactorBlendColor,
|
||||
/* 0x0E */ MTL::BlendFactorOneMinusBlendColor,
|
||||
/* 0x0F */ MTL::BlendFactorSource1Color,
|
||||
/* 0x10 */ MTL::BlendFactorOneMinusSource1Color,
|
||||
/* 0x11 */ MTL::BlendFactorSource1Alpha,
|
||||
/* 0x12 */ MTL::BlendFactorOneMinusSource1Alpha,
|
||||
/* 0x13 */ MTL::BlendFactorBlendAlpha,
|
||||
/* 0x14 */ MTL::BlendFactorOneMinusBlendAlpha
|
||||
};
|
||||
|
||||
MTL::BlendFactor GetMtlBlendFactor(Latte::LATTE_CB_BLENDN_CONTROL::E_BLENDFACTOR factor)
|
||||
{
|
||||
cemu_assert_debug((uint32)factor < std::size(MTL_BLEND_FACTORS));
|
||||
return MTL_BLEND_FACTORS[(uint32)factor];
|
||||
}
|
||||
|
||||
const MTL::CompareFunction MTL_COMPARE_FUNCTIONS[8] =
|
||||
{
|
||||
MTL::CompareFunctionNever,
|
||||
MTL::CompareFunctionLess,
|
||||
MTL::CompareFunctionEqual,
|
||||
MTL::CompareFunctionLessEqual,
|
||||
MTL::CompareFunctionGreater,
|
||||
MTL::CompareFunctionNotEqual,
|
||||
MTL::CompareFunctionGreaterEqual,
|
||||
MTL::CompareFunctionAlways
|
||||
};
|
||||
|
||||
MTL::CompareFunction GetMtlCompareFunc(Latte::E_COMPAREFUNC func)
|
||||
{
|
||||
cemu_assert_debug((uint32)func < std::size(MTL_COMPARE_FUNCTIONS));
|
||||
return MTL_COMPARE_FUNCTIONS[(uint32)func];
|
||||
}
|
||||
|
||||
// TODO: clamp to border color? (should be fine though)
|
||||
const MTL::SamplerAddressMode MTL_SAMPLER_ADDRESS_MODES[] = {
|
||||
MTL::SamplerAddressModeRepeat, // WRAP
|
||||
MTL::SamplerAddressModeMirrorRepeat, // MIRROR
|
||||
MTL::SamplerAddressModeClampToEdge, // CLAMP_LAST_TEXEL
|
||||
MTL::SamplerAddressModeMirrorClampToEdge, // MIRROR_ONCE_LAST_TEXEL
|
||||
MTL::SamplerAddressModeClampToEdge, // unsupported HALF_BORDER
|
||||
MTL::SamplerAddressModeClampToBorderColor, // unsupported MIRROR_ONCE_HALF_BORDER
|
||||
MTL::SamplerAddressModeClampToBorderColor, // CLAMP_BORDER
|
||||
MTL::SamplerAddressModeClampToBorderColor // MIRROR_ONCE_BORDER
|
||||
};
|
||||
|
||||
MTL::SamplerAddressMode GetMtlSamplerAddressMode(Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_CLAMP clamp)
|
||||
{
|
||||
cemu_assert_debug((uint32)clamp < std::size(MTL_SAMPLER_ADDRESS_MODES));
|
||||
return MTL_SAMPLER_ADDRESS_MODES[(uint32)clamp];
|
||||
}
|
||||
|
||||
const MTL::TextureSwizzle MTL_TEXTURE_SWIZZLES[] = {
|
||||
MTL::TextureSwizzleRed,
|
||||
MTL::TextureSwizzleGreen,
|
||||
MTL::TextureSwizzleBlue,
|
||||
MTL::TextureSwizzleAlpha,
|
||||
MTL::TextureSwizzleZero,
|
||||
MTL::TextureSwizzleOne,
|
||||
MTL::TextureSwizzleZero,
|
||||
MTL::TextureSwizzleZero
|
||||
};
|
||||
|
||||
MTL::TextureSwizzle GetMtlTextureSwizzle(uint32 swizzle)
|
||||
{
|
||||
cemu_assert_debug(swizzle < std::size(MTL_TEXTURE_SWIZZLES));
|
||||
return MTL_TEXTURE_SWIZZLES[swizzle];
|
||||
}
|
||||
|
||||
const MTL::StencilOperation MTL_STENCIL_OPERATIONS[8] = {
|
||||
MTL::StencilOperationKeep,
|
||||
MTL::StencilOperationZero,
|
||||
MTL::StencilOperationReplace,
|
||||
MTL::StencilOperationIncrementClamp,
|
||||
MTL::StencilOperationDecrementClamp,
|
||||
MTL::StencilOperationInvert,
|
||||
MTL::StencilOperationIncrementWrap,
|
||||
MTL::StencilOperationDecrementWrap
|
||||
};
|
||||
|
||||
MTL::StencilOperation GetMtlStencilOp(Latte::LATTE_DB_DEPTH_CONTROL::E_STENCILACTION action)
|
||||
{
|
||||
cemu_assert_debug((uint32)action < std::size(MTL_STENCIL_OPERATIONS));
|
||||
return MTL_STENCIL_OPERATIONS[(uint32)action];
|
||||
}
|
||||
|
||||
MTL::ColorWriteMask GetMtlColorWriteMask(uint8 mask)
|
||||
{
|
||||
MTL::ColorWriteMask mtlMask = MTL::ColorWriteMaskNone;
|
||||
if (mask & 0x1) mtlMask |= MTL::ColorWriteMaskRed;
|
||||
if (mask & 0x2) mtlMask |= MTL::ColorWriteMaskGreen;
|
||||
if (mask & 0x4) mtlMask |= MTL::ColorWriteMaskBlue;
|
||||
if (mask & 0x8) mtlMask |= MTL::ColorWriteMaskAlpha;
|
||||
|
||||
return mtlMask;
|
||||
}
|
86
src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h
Normal file
86
src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
|
||||
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
//#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Core/LatteTextureLoader.h"
|
||||
|
||||
struct Uvec2 {
|
||||
uint32 x;
|
||||
uint32 y;
|
||||
};
|
||||
|
||||
enum class MetalDataType
|
||||
{
|
||||
NONE,
|
||||
INT,
|
||||
UINT,
|
||||
FLOAT,
|
||||
};
|
||||
|
||||
struct MetalPixelFormatInfo {
|
||||
MTL::PixelFormat pixelFormat;
|
||||
MetalDataType dataType;
|
||||
size_t bytesPerBlock;
|
||||
Uvec2 blockTexelSize = {1, 1};
|
||||
bool hasStencil = false;
|
||||
TextureDecoder* textureDecoder = nullptr;
|
||||
};
|
||||
|
||||
void CheckForPixelFormatSupport(const MetalPixelFormatSupport& support);
|
||||
|
||||
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);
|
||||
|
||||
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth);
|
||||
|
||||
inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr)
|
||||
{
|
||||
auto format = LatteMRT::GetColorBufferFormat(index, lcr);
|
||||
return GetMtlPixelFormatInfo(format, false).dataType;
|
||||
}
|
||||
|
||||
inline const char* GetDataTypeStr(MetalDataType dataType)
|
||||
{
|
||||
switch (dataType)
|
||||
{
|
||||
case MetalDataType::INT:
|
||||
return "int4";
|
||||
case MetalDataType::UINT:
|
||||
return "uint4";
|
||||
case MetalDataType::FLOAT:
|
||||
return "float4";
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
return "INVALID";
|
||||
}
|
||||
}
|
||||
|
||||
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);
|
||||
|
||||
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow);
|
||||
|
||||
MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode);
|
||||
|
||||
MTL::VertexFormat GetMtlVertexFormat(uint8 format);
|
||||
|
||||
uint32 GetMtlVertexFormatSize(uint8 format);
|
||||
|
||||
MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType);
|
||||
|
||||
MTL::BlendOperation GetMtlBlendOp(Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC combineFunc);
|
||||
|
||||
MTL::BlendFactor GetMtlBlendFactor(Latte::LATTE_CB_BLENDN_CONTROL::E_BLENDFACTOR factor);
|
||||
|
||||
MTL::CompareFunction GetMtlCompareFunc(Latte::E_COMPAREFUNC func);
|
||||
|
||||
MTL::SamplerAddressMode GetMtlSamplerAddressMode(Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_CLAMP clamp);
|
||||
|
||||
MTL::TextureSwizzle GetMtlTextureSwizzle(uint32 swizzle);
|
||||
|
||||
MTL::StencilOperation GetMtlStencilOp(Latte::LATTE_DB_DEPTH_CONTROL::E_STENCILACTION action);
|
||||
|
||||
MTL::ColorWriteMask GetMtlColorWriteMask(uint8 mask);
|
48
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
Normal file
48
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
Normal file
|
@ -0,0 +1,48 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
|
||||
MetalAttachmentsInfo::MetalAttachmentsInfo(class CachedFBOMtl* fbo)
|
||||
{
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
const auto& colorBuffer = fbo->colorBuffer[i];
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
|
||||
if (!texture)
|
||||
continue;
|
||||
|
||||
colorFormats[i] = texture->format;
|
||||
}
|
||||
|
||||
// Depth stencil attachment
|
||||
if (fbo->depthBuffer.texture)
|
||||
{
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(fbo->depthBuffer.texture);
|
||||
depthFormat = texture->format;
|
||||
hasStencil = fbo->depthBuffer.hasStencil;
|
||||
}
|
||||
}
|
||||
|
||||
MetalAttachmentsInfo::MetalAttachmentsInfo(const LatteContextRegister& lcr, const LatteDecompilerShader* pixelShader)
|
||||
{
|
||||
uint8 cbMask = LatteMRT::GetActiveColorBufferMask(pixelShader, lcr);
|
||||
bool dbMask = LatteMRT::GetActiveDepthBufferMask(lcr);
|
||||
|
||||
// Color attachments
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
if ((cbMask & (1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
colorFormats[i] = LatteMRT::GetColorBufferFormat(i, lcr);
|
||||
}
|
||||
|
||||
// Depth stencil attachment
|
||||
if (dbMask)
|
||||
{
|
||||
Latte::E_GX2SURFFMT format = LatteMRT::GetDepthBufferFormat(lcr);
|
||||
depthFormat = format;
|
||||
hasStencil = GetMtlPixelFormatInfo(format, true).hasStencil;
|
||||
}
|
||||
}
|
15
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
Normal file
15
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
|
||||
class MetalAttachmentsInfo
|
||||
{
|
||||
public:
|
||||
MetalAttachmentsInfo() = default;
|
||||
MetalAttachmentsInfo(class CachedFBOMtl* fbo);
|
||||
MetalAttachmentsInfo(const LatteContextRegister& lcr, const class LatteDecompilerShader* pixelShader);
|
||||
|
||||
Latte::E_GX2SURFFMT colorFormats[LATTE_NUM_COLOR_TARGET] = {Latte::E_GX2SURFFMT::INVALID_FORMAT};
|
||||
Latte::E_GX2SURFFMT depthFormat = Latte::E_GX2SURFFMT::INVALID_FORMAT;
|
||||
bool hasStencil = false;
|
||||
};
|
217
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
Normal file
217
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
Normal file
|
@ -0,0 +1,217 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
|
||||
|
||||
MetalBufferChunkedHeap::~MetalBufferChunkedHeap()
|
||||
{
|
||||
for (auto& chunk : m_chunkBuffers)
|
||||
chunk->release();
|
||||
}
|
||||
|
||||
uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize)
|
||||
{
|
||||
size_t allocationSize = std::max<size_t>(m_minimumBufferAllocationSize, minimumAllocationSize);
|
||||
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
|
||||
cemu_assert_debug(buffer);
|
||||
cemu_assert_debug(m_chunkBuffers.size() == chunkIndex);
|
||||
m_chunkBuffers.emplace_back(buffer);
|
||||
|
||||
return allocationSize;
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset)
|
||||
{
|
||||
auto commandBuffer = m_mtlr->GetCurrentCommandBuffer();
|
||||
if (commandBuffer == buffer.lastSyncpointCommandBuffer)
|
||||
return;
|
||||
buffer.lastSyncpointCommandBuffer = commandBuffer;
|
||||
buffer.queue_syncPoints.emplace(commandBuffer, offset);
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc)
|
||||
{
|
||||
// calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc
|
||||
uint32 bufferAllocSize = m_minimumBufferAllocSize;
|
||||
while (bufferAllocSize < sizeRequiredForAlloc)
|
||||
bufferAllocSize += m_minimumBufferAllocSize;
|
||||
|
||||
AllocatorBuffer_t newBuffer{};
|
||||
newBuffer.writeIndex = 0;
|
||||
newBuffer.basePtr = nullptr;
|
||||
newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options);
|
||||
newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents();
|
||||
newBuffer.size = bufferAllocSize;
|
||||
newBuffer.index = (uint32)m_buffers.size();
|
||||
m_buffers.push_back(newBuffer);
|
||||
}
|
||||
|
||||
MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
|
||||
{
|
||||
if (alignment < 128)
|
||||
alignment = 128;
|
||||
size = (size + 127) & ~127;
|
||||
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
// align pointer
|
||||
uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment;
|
||||
uint32 distanceToSyncPoint;
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
|
||||
distanceToSyncPoint = 0xFFFFFFFF;
|
||||
else
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
|
||||
}
|
||||
else
|
||||
distanceToSyncPoint = 0xFFFFFFFF;
|
||||
uint32 spaceNeeded = alignmentPadding + size;
|
||||
if (spaceNeeded > distanceToSyncPoint)
|
||||
continue; // not enough space in current buffer
|
||||
if ((itr.writeIndex + spaceNeeded) > itr.size)
|
||||
{
|
||||
// wrap-around
|
||||
spaceNeeded = size;
|
||||
alignmentPadding = 0;
|
||||
// check if there is enough space in current buffer after wrap-around
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0;
|
||||
if (spaceNeeded > distanceToSyncPoint)
|
||||
continue;
|
||||
}
|
||||
else if (spaceNeeded > itr.size)
|
||||
continue;
|
||||
itr.writeIndex = 0;
|
||||
}
|
||||
addUploadBufferSyncPoint(itr, itr.writeIndex);
|
||||
itr.writeIndex += alignmentPadding;
|
||||
uint32 offset = itr.writeIndex;
|
||||
itr.writeIndex += size;
|
||||
itr.cleanupCounter = 0;
|
||||
MetalSynchronizedRingAllocator::AllocatorReservation_t res;
|
||||
res.mtlBuffer = itr.mtlBuffer;
|
||||
res.memPtr = itr.basePtr + offset;
|
||||
res.bufferOffset = offset;
|
||||
res.size = size;
|
||||
res.bufferIndex = itr.index;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// allocate new buffer
|
||||
allocateAdditionalUploadBuffer(size);
|
||||
|
||||
return AllocateBufferMemory(size, alignment);
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation)
|
||||
{
|
||||
if (RequiresFlush())
|
||||
{
|
||||
uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size));
|
||||
}
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer)
|
||||
{
|
||||
itr.queue_syncPoints.pop();
|
||||
}
|
||||
if (itr.queue_syncPoints.empty())
|
||||
itr.cleanupCounter++;
|
||||
}
|
||||
|
||||
// check if last buffer is available for deletion
|
||||
if (m_buffers.size() >= 2)
|
||||
{
|
||||
auto& lastBuffer = m_buffers.back();
|
||||
if (lastBuffer.cleanupCounter >= 1000)
|
||||
{
|
||||
// release buffer
|
||||
lastBuffer.mtlBuffer->release();
|
||||
m_buffers.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const
|
||||
{
|
||||
return m_buffers[index].mtlBuffer;
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
numBuffers = (uint32)m_buffers.size();
|
||||
totalBufferSize = 0;
|
||||
freeBufferSize = 0;
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
totalBufferSize += itr.size;
|
||||
// calculate free space in buffer
|
||||
uint32 distanceToSyncPoint;
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
|
||||
distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around
|
||||
else
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
|
||||
}
|
||||
else
|
||||
distanceToSyncPoint = itr.size;
|
||||
freeBufferSize += distanceToSyncPoint;
|
||||
}
|
||||
}
|
||||
|
||||
/* MetalSynchronizedHeapAllocator */
|
||||
|
||||
MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
|
||||
{
|
||||
CHAddr addr = m_chunkedHeap.alloc(size, alignment);
|
||||
m_activeAllocations.emplace_back(addr);
|
||||
AllocatorReservation* res = m_poolAllocatorReservation.allocObj();
|
||||
res->bufferIndex = addr.chunkIndex;
|
||||
res->bufferOffset = addr.offset;
|
||||
res->size = size;
|
||||
res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex);
|
||||
res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation)
|
||||
{
|
||||
// put the allocation on a delayed release queue for the current command buffer
|
||||
MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer();
|
||||
auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; });
|
||||
cemu_assert_debug(it != m_activeAllocations.end());
|
||||
m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation);
|
||||
m_activeAllocations.erase(it);
|
||||
m_poolAllocatorReservation.freeObj(uploadReservation);
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation)
|
||||
{
|
||||
if (m_chunkedHeap.RequiresFlush())
|
||||
{
|
||||
uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size));
|
||||
}
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
auto it = m_releaseQueue.find(latestFinishedCommandBuffer);
|
||||
if (it == m_releaseQueue.end())
|
||||
return;
|
||||
|
||||
// release allocations
|
||||
for (auto& addr : it->second)
|
||||
m_chunkedHeap.free(addr);
|
||||
m_releaseQueue.erase(it);
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize);
|
||||
}
|
163
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h
Normal file
163
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h
Normal file
|
@ -0,0 +1,163 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Metal/MTLResource.hpp"
|
||||
#include "util/ChunkedHeap/ChunkedHeap.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options)
|
||||
{
|
||||
if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged)
|
||||
options |= MTL::ResourceCPUCacheModeWriteCombined;
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
class MetalBufferChunkedHeap : private ChunkedHeap<>
|
||||
{
|
||||
public:
|
||||
MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { };
|
||||
~MetalBufferChunkedHeap();
|
||||
|
||||
using ChunkedHeap::alloc;
|
||||
using ChunkedHeap::free;
|
||||
|
||||
uint8* GetChunkPtr(uint32 index) const
|
||||
{
|
||||
if (index >= m_chunkBuffers.size())
|
||||
return nullptr;
|
||||
|
||||
return (uint8*)m_chunkBuffers[index]->contents();
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBufferByIndex(uint32 index) const
|
||||
{
|
||||
cemu_assert_debug(index < m_chunkBuffers.size());
|
||||
|
||||
return m_chunkBuffers[index];
|
||||
}
|
||||
|
||||
bool RequiresFlush() const
|
||||
{
|
||||
return m_options & MTL::ResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
numBuffers = m_chunkBuffers.size();
|
||||
totalBufferSize = m_numHeapBytes;
|
||||
freeBufferSize = m_numHeapBytes - m_numAllocatedBytes;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override;
|
||||
|
||||
const class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::ResourceOptions m_options;
|
||||
size_t m_minimumBufferAllocationSize;
|
||||
|
||||
std::vector<MTL::Buffer*> m_chunkBuffers;
|
||||
};
|
||||
|
||||
// a circular ring-buffer which tracks and releases memory per command-buffer
|
||||
class MetalSynchronizedRingAllocator
|
||||
{
|
||||
public:
|
||||
MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
|
||||
MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy
|
||||
|
||||
struct BufferSyncPoint_t
|
||||
{
|
||||
// todo - modularize sync point
|
||||
MTL::CommandBuffer* commandBuffer;
|
||||
uint32 offset;
|
||||
|
||||
BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {};
|
||||
};
|
||||
|
||||
struct AllocatorBuffer_t
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* basePtr;
|
||||
uint32 size;
|
||||
uint32 writeIndex;
|
||||
std::queue<BufferSyncPoint_t> queue_syncPoints;
|
||||
MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr };
|
||||
uint32 index;
|
||||
uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released
|
||||
};
|
||||
|
||||
struct AllocatorReservation_t
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* memPtr;
|
||||
uint32 bufferOffset;
|
||||
uint32 size;
|
||||
uint32 bufferIndex;
|
||||
};
|
||||
|
||||
AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment);
|
||||
void FlushReservation(AllocatorReservation_t& uploadReservation);
|
||||
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
|
||||
MTL::Buffer* GetBufferByIndex(uint32 index) const;
|
||||
|
||||
bool RequiresFlush() const
|
||||
{
|
||||
return m_options & MTL::ResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
|
||||
|
||||
private:
|
||||
void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc);
|
||||
void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset);
|
||||
|
||||
const class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::ResourceOptions m_options;
|
||||
const uint32 m_minimumBufferAllocSize;
|
||||
|
||||
std::vector<AllocatorBuffer_t> m_buffers;
|
||||
};
|
||||
|
||||
// heap style allocator with released memory being freed after the current command buffer finishes
|
||||
class MetalSynchronizedHeapAllocator
|
||||
{
|
||||
struct TrackedAllocation
|
||||
{
|
||||
TrackedAllocation(CHAddr allocation) : allocation(allocation) {};
|
||||
CHAddr allocation;
|
||||
};
|
||||
|
||||
public:
|
||||
MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {}
|
||||
MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy
|
||||
|
||||
struct AllocatorReservation
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* memPtr;
|
||||
uint32 bufferOffset;
|
||||
uint32 size;
|
||||
uint32 bufferIndex;
|
||||
};
|
||||
|
||||
AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment);
|
||||
void FreeReservation(AllocatorReservation* uploadReservation);
|
||||
void FlushReservation(AllocatorReservation* uploadReservation);
|
||||
|
||||
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
|
||||
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
|
||||
private:
|
||||
const class MetalRenderer* m_mtlr;
|
||||
MetalBufferChunkedHeap m_chunkedHeap;
|
||||
// allocations
|
||||
std::vector<TrackedAllocation> m_activeAllocations;
|
||||
MemoryPool<AllocatorReservation> m_poolAllocatorReservation{32};
|
||||
// release queue
|
||||
std::unordered_map<MTL::CommandBuffer*, std::vector<CHAddr>> m_releaseQueue;
|
||||
};
|
221
src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h
Normal file
221
src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h
Normal file
|
@ -0,0 +1,221 @@
|
|||
#pragma once
|
||||
|
||||
#include <Foundation/Foundation.hpp>
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
|
||||
struct MetalPixelFormatSupport
|
||||
{
|
||||
bool m_supportsR8Unorm_sRGB;
|
||||
bool m_supportsRG8Unorm_sRGB;
|
||||
bool m_supportsPacked16BitFormats;
|
||||
bool m_supportsDepth24Unorm_Stencil8;
|
||||
|
||||
MetalPixelFormatSupport() = default;
|
||||
MetalPixelFormatSupport(MTL::Device* device)
|
||||
{
|
||||
m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported();
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: don't define a new struct for this
|
||||
struct MetalQueryRange
|
||||
{
|
||||
uint32 begin;
|
||||
uint32 end;
|
||||
};
|
||||
|
||||
#define MAX_MTL_BUFFERS 31
|
||||
// Buffer indices 28-30 are reserved for the helper shaders
|
||||
#define MTL_RESERVED_BUFFERS 3
|
||||
#define MAX_MTL_VERTEX_BUFFERS (MAX_MTL_BUFFERS - MTL_RESERVED_BUFFERS)
|
||||
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_VERTEX_BUFFERS - index - 1)
|
||||
|
||||
#define MAX_MTL_TEXTURES 31
|
||||
#define MAX_MTL_SAMPLERS 16
|
||||
|
||||
#define GET_HELPER_BUFFER_BINDING(index) (28 + index)
|
||||
#define GET_HELPER_TEXTURE_BINDING(index) (29 + index)
|
||||
#define GET_HELPER_SAMPLER_BINDING(index) (14 + index)
|
||||
|
||||
constexpr uint32 INVALID_UINT32 = std::numeric_limits<uint32>::max();
|
||||
constexpr size_t INVALID_OFFSET = std::numeric_limits<size_t>::max();
|
||||
|
||||
inline size_t Align(size_t size, size_t alignment)
|
||||
{
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
__attribute__((unused)) static inline void ETStackAutoRelease(void* object)
|
||||
{
|
||||
(*(NS::Object**)object)->release();
|
||||
}
|
||||
|
||||
#define NS_STACK_SCOPED __attribute__((cleanup(ETStackAutoRelease))) __attribute__((unused))
|
||||
|
||||
// Cast from const char* to NS::String*
|
||||
inline NS::String* ToNSString(const char* str)
|
||||
{
|
||||
return NS::String::string(str, NS::ASCIIStringEncoding);
|
||||
}
|
||||
|
||||
// Cast from std::string to NS::String*
|
||||
inline NS::String* ToNSString(const std::string& str)
|
||||
{
|
||||
return ToNSString(str.c_str());
|
||||
}
|
||||
|
||||
// Cast from const char* to NS::URL*
|
||||
inline NS::URL* ToNSURL(const char* str)
|
||||
{
|
||||
return NS::URL::fileURLWithPath(ToNSString(str));
|
||||
}
|
||||
|
||||
// Cast from std::string to NS::URL*
|
||||
inline NS::URL* ToNSURL(const std::string& str)
|
||||
{
|
||||
return ToNSURL(str.c_str());
|
||||
}
|
||||
|
||||
inline NS::String* GetLabel(const std::string& label, const void* identifier)
|
||||
{
|
||||
return ToNSString(label + " (" + std::to_string(reinterpret_cast<uintptr_t>(identifier)) + ")");
|
||||
}
|
||||
|
||||
constexpr MTL::RenderStages ALL_MTL_RENDER_STAGES = MTL::RenderStageVertex | MTL::RenderStageObject | MTL::RenderStageMesh | MTL::RenderStageFragment;
|
||||
|
||||
inline bool IsValidDepthTextureType(Latte::E_DIM dim)
|
||||
{
|
||||
return (dim == Latte::E_DIM::DIM_2D || dim == Latte::E_DIM::DIM_2D_MSAA || dim == Latte::E_DIM::DIM_2D_ARRAY || dim == Latte::E_DIM::DIM_2D_ARRAY_MSAA || dim == Latte::E_DIM::DIM_CUBEMAP);
|
||||
}
|
||||
|
||||
inline bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer)
|
||||
{
|
||||
auto status = commandBuffer->status();
|
||||
return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError);
|
||||
}
|
||||
|
||||
inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format)
|
||||
{
|
||||
return !Latte::IsCompressedFormat(format);
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
inline bool executeCommand(fmt::format_string<T...> fmt, T&&... args) {
|
||||
std::string command = fmt::format(fmt, std::forward<T>(args)...);
|
||||
int res = system(command.c_str());
|
||||
if (res != 0)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
class MemoryMappedFile
|
||||
{
|
||||
public:
|
||||
MemoryMappedFile(const std::string& filePath)
|
||||
{
|
||||
// Open the file
|
||||
m_fd = open(filePath.c_str(), O_RDONLY);
|
||||
if (m_fd == -1) {
|
||||
cemuLog_log(LogType::Force, "failed to open file: {}", filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the file size
|
||||
// Use a loop to handle the case where the file size is 0 (more of a safety net)
|
||||
struct stat fileStat;
|
||||
while (true)
|
||||
{
|
||||
if (fstat(m_fd, &fileStat) == -1)
|
||||
{
|
||||
close(m_fd);
|
||||
cemuLog_log(LogType::Force, "failed to get file size: {}", filePath);
|
||||
return;
|
||||
}
|
||||
m_fileSize = fileStat.st_size;
|
||||
|
||||
if (m_fileSize == 0)
|
||||
{
|
||||
cemuLog_logOnce(LogType::Force, "file size is 0: {}", filePath);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Memory map the file
|
||||
m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0);
|
||||
if (m_data == MAP_FAILED)
|
||||
{
|
||||
close(m_fd);
|
||||
cemuLog_log(LogType::Force, "failed to memory map file: {}", filePath);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
~MemoryMappedFile()
|
||||
{
|
||||
if (m_data && m_data != MAP_FAILED)
|
||||
munmap(m_data, m_fileSize);
|
||||
|
||||
if (m_fd != -1)
|
||||
close(m_fd);
|
||||
}
|
||||
|
||||
uint8* data() const { return static_cast<uint8*>(m_data); }
|
||||
size_t size() const { return m_fileSize; }
|
||||
|
||||
private:
|
||||
int m_fd = -1;
|
||||
void* m_data = nullptr;
|
||||
size_t m_fileSize = 0;
|
||||
};
|
||||
*/
|
||||
|
||||
inline uint32 GetVerticesPerPrimitive(LattePrimitiveMode primitiveMode)
|
||||
{
|
||||
switch (primitiveMode)
|
||||
{
|
||||
case LattePrimitiveMode::POINTS:
|
||||
return 1;
|
||||
case LattePrimitiveMode::LINES:
|
||||
return 2;
|
||||
case LattePrimitiveMode::LINE_STRIP:
|
||||
// Same as line, but requires connection
|
||||
return 2;
|
||||
case LattePrimitiveMode::TRIANGLES:
|
||||
return 3;
|
||||
case LattePrimitiveMode::RECTS:
|
||||
return 3;
|
||||
default:
|
||||
cemuLog_log(LogType::Force, "Unimplemented primitive type {}", primitiveMode);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool PrimitiveRequiresConnection(LattePrimitiveMode primitiveMode)
|
||||
{
|
||||
if (primitiveMode == LattePrimitiveMode::LINE_STRIP)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool UseRectEmulation(const LatteContextRegister& lcr) {
|
||||
const LattePrimitiveMode primitiveMode = lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
||||
return (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||
}
|
||||
|
||||
inline bool UseGeometryShader(const LatteContextRegister& lcr, bool hasGeometryShader) {
|
||||
return hasGeometryShader || UseRectEmulation(lcr);
|
||||
}
|
6
src/Cafe/HW/Latte/Renderer/Metal/MetalCppImpl.cpp
Normal file
6
src/Cafe/HW/Latte/Renderer/Metal/MetalCppImpl.cpp
Normal file
|
@ -0,0 +1,6 @@
|
|||
#define NS_PRIVATE_IMPLEMENTATION
|
||||
#define CA_PRIVATE_IMPLEMENTATION
|
||||
#define MTL_PRIVATE_IMPLEMENTATION
|
||||
#include <Foundation/Foundation.hpp>
|
||||
#include <QuartzCore/QuartzCore.hpp>
|
||||
#include <Metal/Metal.hpp>
|
119
src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
Normal file
119
src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
Normal file
|
@ -0,0 +1,119 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "HW/Latte/ISA/RegDefines.h"
|
||||
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Metal/MTLDepthStencil.hpp"
|
||||
|
||||
MetalDepthStencilCache::~MetalDepthStencilCache()
|
||||
{
|
||||
for (auto& pair : m_depthStencilCache)
|
||||
{
|
||||
pair.second->release();
|
||||
}
|
||||
m_depthStencilCache.clear();
|
||||
}
|
||||
|
||||
MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const LatteContextRegister& lcr)
|
||||
{
|
||||
uint64 stateHash = CalculateDepthStencilHash(lcr);
|
||||
auto& depthStencilState = m_depthStencilCache[stateHash];
|
||||
if (depthStencilState)
|
||||
return depthStencilState;
|
||||
|
||||
// Depth stencil state
|
||||
bool depthEnable = lcr.DB_DEPTH_CONTROL.get_Z_ENABLE();
|
||||
auto depthFunc = lcr.DB_DEPTH_CONTROL.get_Z_FUNC();
|
||||
bool depthWriteEnable = lcr.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE();
|
||||
|
||||
NS_STACK_SCOPED MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
|
||||
if (depthEnable)
|
||||
{
|
||||
desc->setDepthWriteEnabled(depthWriteEnable);
|
||||
desc->setDepthCompareFunction(GetMtlCompareFunc(depthFunc));
|
||||
}
|
||||
|
||||
// Stencil state
|
||||
bool stencilEnable = lcr.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
|
||||
if (stencilEnable)
|
||||
{
|
||||
// get stencil control parameters
|
||||
bool backStencilEnable = lcr.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
|
||||
auto frontStencilFunc = lcr.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F();
|
||||
auto frontStencilZPass = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F();
|
||||
auto frontStencilZFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F();
|
||||
auto frontStencilFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F();
|
||||
auto backStencilFunc = lcr.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B();
|
||||
auto backStencilZPass = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B();
|
||||
auto backStencilZFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B();
|
||||
auto backStencilFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B();
|
||||
// get stencil control parameters
|
||||
uint32 stencilCompareMaskFront = lcr.DB_STENCILREFMASK.get_STENCILMASK_F();
|
||||
uint32 stencilWriteMaskFront = lcr.DB_STENCILREFMASK.get_STENCILWRITEMASK_F();
|
||||
uint32 stencilCompareMaskBack = lcr.DB_STENCILREFMASK_BF.get_STENCILMASK_B();
|
||||
uint32 stencilWriteMaskBack = lcr.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B();
|
||||
|
||||
NS_STACK_SCOPED MTL::StencilDescriptor* frontStencil = MTL::StencilDescriptor::alloc()->init();
|
||||
frontStencil->setReadMask(stencilCompareMaskFront);
|
||||
frontStencil->setWriteMask(stencilWriteMaskFront);
|
||||
frontStencil->setStencilCompareFunction(GetMtlCompareFunc(frontStencilFunc));
|
||||
frontStencil->setDepthFailureOperation(GetMtlStencilOp(frontStencilZFail));
|
||||
frontStencil->setStencilFailureOperation(GetMtlStencilOp(frontStencilFail));
|
||||
frontStencil->setDepthStencilPassOperation(GetMtlStencilOp(frontStencilZPass));
|
||||
desc->setFrontFaceStencil(frontStencil);
|
||||
|
||||
NS_STACK_SCOPED MTL::StencilDescriptor* backStencil = MTL::StencilDescriptor::alloc()->init();
|
||||
if (backStencilEnable)
|
||||
{
|
||||
backStencil->setReadMask(stencilCompareMaskBack);
|
||||
backStencil->setWriteMask(stencilWriteMaskBack);
|
||||
backStencil->setStencilCompareFunction(GetMtlCompareFunc(backStencilFunc));
|
||||
backStencil->setDepthFailureOperation(GetMtlStencilOp(backStencilZFail));
|
||||
backStencil->setStencilFailureOperation(GetMtlStencilOp(backStencilFail));
|
||||
backStencil->setDepthStencilPassOperation(GetMtlStencilOp(backStencilZPass));
|
||||
}
|
||||
else
|
||||
{
|
||||
backStencil->setReadMask(stencilCompareMaskFront);
|
||||
backStencil->setWriteMask(stencilWriteMaskFront);
|
||||
backStencil->setStencilCompareFunction(GetMtlCompareFunc(frontStencilFunc));
|
||||
backStencil->setDepthFailureOperation(GetMtlStencilOp(frontStencilZFail));
|
||||
backStencil->setStencilFailureOperation(GetMtlStencilOp(frontStencilFail));
|
||||
backStencil->setDepthStencilPassOperation(GetMtlStencilOp(frontStencilZPass));
|
||||
}
|
||||
desc->setBackFaceStencil(backStencil);
|
||||
}
|
||||
|
||||
depthStencilState = m_mtlr->GetDevice()->newDepthStencilState(desc);
|
||||
|
||||
return depthStencilState;
|
||||
}
|
||||
|
||||
uint64 MetalDepthStencilCache::CalculateDepthStencilHash(const LatteContextRegister& lcr)
|
||||
{
|
||||
uint32* ctxRegister = lcr.GetRawView();
|
||||
|
||||
// Hash
|
||||
uint64 stateHash = 0;
|
||||
uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL];
|
||||
bool stencilTestEnable = depthControl & 1;
|
||||
if (stencilTestEnable)
|
||||
{
|
||||
stateHash += ctxRegister[mmDB_STENCILREFMASK];
|
||||
stateHash = std::rotl<uint64>(stateHash, 17);
|
||||
if(depthControl & (1<<7)) // back stencil enable
|
||||
{
|
||||
stateHash += ctxRegister[mmDB_STENCILREFMASK_BF];
|
||||
stateHash = std::rotl<uint64>(stateHash, 13);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// zero out stencil related bits (8-31)
|
||||
depthControl &= 0xFF;
|
||||
}
|
||||
|
||||
stateHash = std::rotl<uint64>(stateHash, 17);
|
||||
stateHash += depthControl;
|
||||
|
||||
return stateHash;
|
||||
}
|
21
src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
Normal file
21
src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "HW/Latte/ISA/LatteReg.h"
|
||||
|
||||
class MetalDepthStencilCache
|
||||
{
|
||||
public:
|
||||
MetalDepthStencilCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
|
||||
~MetalDepthStencilCache();
|
||||
|
||||
MTL::DepthStencilState* GetDepthStencilState(const LatteContextRegister& lcr);
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
std::map<uint64, MTL::DepthStencilState*> m_depthStencilCache;
|
||||
|
||||
uint64 CalculateDepthStencilHash(const LatteContextRegister& lcr);
|
||||
};
|
3
src/Cafe/HW/Latte/Renderer/Metal/MetalLayer.h
Normal file
3
src/Cafe/HW/Latte/Renderer/Metal/MetalLayer.h
Normal file
|
@ -0,0 +1,3 @@
|
|||
#pragma once
|
||||
|
||||
void* CreateMetalLayer(void* handle, float& scaleX, float& scaleY);
|
22
src/Cafe/HW/Latte/Renderer/Metal/MetalLayer.mm
Normal file
22
src/Cafe/HW/Latte/Renderer/Metal/MetalLayer.mm
Normal file
|
@ -0,0 +1,22 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/MetalView.h"
|
||||
|
||||
void* CreateMetalLayer(void* handle, float& scaleX, float& scaleY)
|
||||
{
|
||||
NSView* view = (NSView*)handle;
|
||||
|
||||
MetalView* childView = [[MetalView alloc] initWithFrame:view.bounds];
|
||||
childView.autoresizingMask = NSViewWidthSizable | NSViewHeightSizable;
|
||||
childView.wantsLayer = YES;
|
||||
|
||||
[view addSubview:childView];
|
||||
|
||||
const NSRect points = [childView frame];
|
||||
const NSRect pixels = [childView convertRectToBacking:points];
|
||||
|
||||
scaleX = (float)(pixels.size.width / points.size.width);
|
||||
scaleY = (float)(pixels.size.height / points.size.height);
|
||||
|
||||
return childView.layer;
|
||||
}
|
46
src/Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.cpp
Normal file
46
src/Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.cpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h"
|
||||
|
||||
#include "gui/guiWrapper.h"
|
||||
|
||||
MetalLayerHandle::MetalLayerHandle(MTL::Device* device, const Vector2i& size, bool mainWindow)
|
||||
{
|
||||
const auto& windowInfo = (mainWindow ? gui_getWindowInfo().window_main : gui_getWindowInfo().window_pad);
|
||||
|
||||
m_layer = (CA::MetalLayer*)CreateMetalLayer(windowInfo.handle, m_layerScaleX, m_layerScaleY);
|
||||
m_layer->setDevice(device);
|
||||
m_layer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY});
|
||||
m_layer->setFramebufferOnly(true);
|
||||
}
|
||||
|
||||
MetalLayerHandle::~MetalLayerHandle()
|
||||
{
|
||||
if (m_layer)
|
||||
m_layer->release();
|
||||
}
|
||||
|
||||
void MetalLayerHandle::Resize(const Vector2i& size)
|
||||
{
|
||||
m_layer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY});
|
||||
}
|
||||
|
||||
bool MetalLayerHandle::AcquireDrawable()
|
||||
{
|
||||
if (m_drawable)
|
||||
return true;
|
||||
|
||||
m_drawable = m_layer->nextDrawable();
|
||||
if (!m_drawable)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "layer {} failed to acquire next drawable", (void*)this);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MetalLayerHandle::PresentDrawable(MTL::CommandBuffer* commandBuffer)
|
||||
{
|
||||
commandBuffer->presentDrawable(m_drawable);
|
||||
m_drawable = nullptr;
|
||||
}
|
31
src/Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h
Normal file
31
src/Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include <QuartzCore/QuartzCore.hpp>
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "util/math/vector2.h"
|
||||
|
||||
class MetalLayerHandle
|
||||
{
|
||||
public:
|
||||
MetalLayerHandle() = default;
|
||||
MetalLayerHandle(MTL::Device* device, const Vector2i& size, bool mainWindow);
|
||||
|
||||
~MetalLayerHandle();
|
||||
|
||||
void Resize(const Vector2i& size);
|
||||
|
||||
bool AcquireDrawable();
|
||||
|
||||
void PresentDrawable(MTL::CommandBuffer* commandBuffer);
|
||||
|
||||
CA::MetalLayer* GetLayer() const { return m_layer; }
|
||||
|
||||
CA::MetalDrawable* GetDrawable() const { return m_drawable; }
|
||||
|
||||
private:
|
||||
CA::MetalLayer* m_layer = nullptr;
|
||||
float m_layerScaleX, m_layerScaleY;
|
||||
|
||||
CA::MetalDrawable* m_drawable = nullptr;
|
||||
};
|
128
src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
Normal file
128
src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
Normal file
|
@ -0,0 +1,128 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
|
||||
|
||||
#include "CafeSystem.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/MMU/MMU.h"
|
||||
#include "config/CemuConfig.h"
|
||||
|
||||
MetalMemoryManager::~MetalMemoryManager()
|
||||
{
|
||||
if (m_bufferCache)
|
||||
{
|
||||
m_bufferCache->release();
|
||||
}
|
||||
}
|
||||
|
||||
void* MetalMemoryManager::AcquireTextureUploadBuffer(size_t size)
|
||||
{
|
||||
if (m_textureUploadBuffer.size() < size)
|
||||
{
|
||||
m_textureUploadBuffer.resize(size);
|
||||
}
|
||||
|
||||
return m_textureUploadBuffer.data();
|
||||
}
|
||||
|
||||
void MetalMemoryManager::ReleaseTextureUploadBuffer(uint8* mem)
|
||||
{
|
||||
cemu_assert_debug(m_textureUploadBuffer.data() == mem);
|
||||
m_textureUploadBuffer.clear();
|
||||
}
|
||||
|
||||
void MetalMemoryManager::InitBufferCache(size_t size)
|
||||
{
|
||||
cemu_assert_debug(!m_bufferCache);
|
||||
|
||||
m_bufferCacheMode = g_current_game_profile->GetBufferCacheMode();
|
||||
|
||||
if (m_bufferCacheMode == BufferCacheMode::Auto)
|
||||
{
|
||||
// TODO: do this for all unified memory systems?
|
||||
if (m_mtlr->IsAppleGPU())
|
||||
{
|
||||
switch (CafeSystem::GetForegroundTitleId())
|
||||
{
|
||||
// The Legend of Zelda: Wind Waker HD
|
||||
case 0x0005000010143600: // EUR
|
||||
case 0x0005000010143500: // USA
|
||||
case 0x0005000010143400: // JPN
|
||||
// TODO: use host instead?
|
||||
m_bufferCacheMode = BufferCacheMode::DeviceShared;
|
||||
break;
|
||||
default:
|
||||
m_bufferCacheMode = BufferCacheMode::DevicePrivate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_bufferCacheMode = BufferCacheMode::DevicePrivate;
|
||||
}
|
||||
}
|
||||
|
||||
// First, try to import the host memory as a buffer
|
||||
if (m_bufferCacheMode == BufferCacheMode::Host)
|
||||
{
|
||||
if (m_mtlr->HasUnifiedMemory())
|
||||
{
|
||||
m_importedMemBaseAddress = mmuRange_MEM2.getBase();
|
||||
m_hostAllocationSize = mmuRange_MEM2.getSize();
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(memory_getPointerFromVirtualOffset(m_importedMemBaseAddress), m_hostAllocationSize, MTL::ResourceStorageModeShared, nullptr);
|
||||
if (!m_bufferCache)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to import host memory as a buffer, using device shared mode instead");
|
||||
m_bufferCacheMode = BufferCacheMode::DeviceShared;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Host buffer cache mode is only available on unified memory systems, using device shared mode instead");
|
||||
m_bufferCacheMode = BufferCacheMode::DeviceShared;
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_bufferCache)
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, (m_bufferCacheMode == BufferCacheMode::DevicePrivate ? MTL::ResourceStorageModePrivate : MTL::ResourceStorageModeShared));
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
||||
#endif
|
||||
}
|
||||
|
||||
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
|
||||
{
|
||||
cemu_assert_debug(m_bufferCacheMode != BufferCacheMode::Host);
|
||||
cemu_assert_debug(m_bufferCache);
|
||||
cemu_assert_debug((offset + size) <= m_bufferCache->length());
|
||||
|
||||
if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
|
||||
{
|
||||
auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();
|
||||
|
||||
auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1);
|
||||
memcpy(allocation.memPtr, data, size);
|
||||
m_stagingAllocator.FlushReservation(allocation);
|
||||
|
||||
blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size);
|
||||
|
||||
//m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
|
||||
{
|
||||
cemu_assert_debug(m_bufferCacheMode != BufferCacheMode::Host);
|
||||
cemu_assert_debug(m_bufferCache);
|
||||
|
||||
if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
|
||||
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
|
||||
else
|
||||
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size);
|
||||
}
|
76
src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
Normal file
76
src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
|
||||
|
||||
#include "GameProfile/GameProfile.h"
|
||||
|
||||
class MetalMemoryManager
|
||||
{
|
||||
public:
|
||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {}
|
||||
~MetalMemoryManager();
|
||||
|
||||
MetalSynchronizedRingAllocator& GetStagingAllocator()
|
||||
{
|
||||
return m_stagingAllocator;
|
||||
}
|
||||
|
||||
MetalSynchronizedHeapAllocator& GetIndexAllocator()
|
||||
{
|
||||
return m_indexAllocator;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBufferCache()
|
||||
{
|
||||
return m_bufferCache;
|
||||
}
|
||||
|
||||
void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer);
|
||||
m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer);
|
||||
}
|
||||
|
||||
// Texture upload buffer
|
||||
void* AcquireTextureUploadBuffer(size_t size);
|
||||
void ReleaseTextureUploadBuffer(uint8* mem);
|
||||
|
||||
// Buffer cache
|
||||
void InitBufferCache(size_t size);
|
||||
void UploadToBufferCache(const void* data, size_t offset, size_t size);
|
||||
void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size);
|
||||
|
||||
// Getters
|
||||
bool UseHostMemoryForCache() const
|
||||
{
|
||||
return (m_bufferCacheMode == BufferCacheMode::Host);
|
||||
}
|
||||
|
||||
bool NeedsReducedLatency() const
|
||||
{
|
||||
return (m_bufferCacheMode == BufferCacheMode::DeviceShared || m_bufferCacheMode == BufferCacheMode::Host);
|
||||
}
|
||||
|
||||
MPTR GetImportedMemBaseAddress() const
|
||||
{
|
||||
return m_importedMemBaseAddress;
|
||||
}
|
||||
|
||||
size_t GetHostAllocationSize() const
|
||||
{
|
||||
return m_hostAllocationSize;
|
||||
}
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
std::vector<uint8> m_textureUploadBuffer;
|
||||
|
||||
MetalSynchronizedRingAllocator m_stagingAllocator;
|
||||
MetalSynchronizedHeapAllocator m_indexAllocator;
|
||||
|
||||
MTL::Buffer* m_bufferCache = nullptr;
|
||||
BufferCacheMode m_bufferCacheMode;
|
||||
MPTR m_importedMemBaseAddress;
|
||||
size_t m_hostAllocationSize = 0;
|
||||
};
|
37
src/Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
Normal file
37
src/Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
|
||||
MetalOutputShaderCache::~MetalOutputShaderCache()
|
||||
{
|
||||
for (uint8 i = 0; i < METAL_OUTPUT_SHADER_CACHE_SIZE; i++)
|
||||
{
|
||||
if (m_cache[i])
|
||||
m_cache[i]->release();
|
||||
}
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* MetalOutputShaderCache::GetPipeline(RendererOutputShader* shader, uint8 shaderIndex, bool usesSRGB)
|
||||
{
|
||||
uint8 cacheIndex = (usesSRGB ? METAL_SHADER_TYPE_COUNT : 0) + shaderIndex;
|
||||
auto& renderPipelineState = m_cache[cacheIndex];
|
||||
if (renderPipelineState)
|
||||
return renderPipelineState;
|
||||
|
||||
// Create a new render pipeline state
|
||||
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(shader->GetVertexShader())->GetFunction();
|
||||
auto fragmentShaderMtl = static_cast<RendererShaderMtl*>(shader->GetFragmentShader())->GetFunction();
|
||||
|
||||
NS_STACK_SCOPED auto renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
renderPipelineDescriptor->setVertexFunction(vertexShaderMtl);
|
||||
renderPipelineDescriptor->setFragmentFunction(fragmentShaderMtl);
|
||||
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(usesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm);
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
renderPipelineState = m_mtlr->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "error creating output render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||
}
|
||||
|
||||
return renderPipelineState;
|
||||
}
|
20
src/Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
Normal file
20
src/Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
|
||||
constexpr uint8 METAL_SHADER_TYPE_COUNT = 6;
|
||||
constexpr uint8 METAL_OUTPUT_SHADER_CACHE_SIZE = 2 * METAL_SHADER_TYPE_COUNT;
|
||||
|
||||
class MetalOutputShaderCache
|
||||
{
|
||||
public:
|
||||
MetalOutputShaderCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
|
||||
~MetalOutputShaderCache();
|
||||
|
||||
MTL::RenderPipelineState* GetPipeline(RendererOutputShader* shader, uint8 shaderIndex, bool usesSRGB);
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::RenderPipelineState* m_cache[METAL_OUTPUT_SHADER_CACHE_SIZE] = {nullptr};
|
||||
};
|
26
src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
Normal file
26
src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
#pragma once
|
||||
|
||||
class MetalPerformanceMonitor
|
||||
{
|
||||
public:
|
||||
// Per frame data
|
||||
uint32 m_commandBuffers = 0;
|
||||
uint32 m_renderPasses = 0;
|
||||
uint32 m_clears = 0;
|
||||
uint32 m_manualVertexFetchDraws = 0;
|
||||
uint32 m_meshDraws = 0;
|
||||
uint32 m_triangleFans = 0;
|
||||
|
||||
MetalPerformanceMonitor() = default;
|
||||
~MetalPerformanceMonitor() = default;
|
||||
|
||||
void ResetPerFrameData()
|
||||
{
|
||||
m_commandBuffers = 0;
|
||||
m_renderPasses = 0;
|
||||
m_clears = 0;
|
||||
m_manualVertexFetchDraws = 0;
|
||||
m_meshDraws = 0;
|
||||
m_triangleFans = 0;
|
||||
}
|
||||
};
|
621
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
Normal file
621
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
Normal file
|
@ -0,0 +1,621 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShaderCache.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||
#include "Cemu/FileCache/FileCache.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
|
||||
#include <openssl/sha.h>
|
||||
|
||||
static bool g_compilePipelineThreadInit{false};
|
||||
static std::mutex g_compilePipelineMutex;
|
||||
static std::condition_variable g_compilePipelineCondVar;
|
||||
static std::queue<MetalPipelineCompiler*> g_compilePipelineRequests;
|
||||
|
||||
static void compileThreadFunc(sint32 threadIndex)
|
||||
{
|
||||
SetThreadName("compilePl");
|
||||
|
||||
// one thread runs at normal priority while the others run at lower priority
|
||||
if (threadIndex != 0)
|
||||
; // TODO: set thread priority
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock lock(g_compilePipelineMutex);
|
||||
while (g_compilePipelineRequests.empty())
|
||||
g_compilePipelineCondVar.wait(lock);
|
||||
|
||||
MetalPipelineCompiler* request = g_compilePipelineRequests.front();
|
||||
|
||||
g_compilePipelineRequests.pop();
|
||||
|
||||
lock.unlock();
|
||||
|
||||
request->Compile(true, false, true);
|
||||
delete request;
|
||||
}
|
||||
}
|
||||
|
||||
static void initCompileThread()
|
||||
{
|
||||
uint32 numCompileThreads;
|
||||
|
||||
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
||||
if (cpuCoreCount <= 2)
|
||||
numCompileThreads = 1;
|
||||
else
|
||||
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
|
||||
|
||||
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
|
||||
|
||||
for (uint32 i = 0; i < numCompileThreads; i++)
|
||||
{
|
||||
std::thread compileThread(compileThreadFunc, i);
|
||||
compileThread.detach();
|
||||
}
|
||||
}
|
||||
|
||||
static void queuePipeline(MetalPipelineCompiler* v)
|
||||
{
|
||||
std::unique_lock lock(g_compilePipelineMutex);
|
||||
g_compilePipelineRequests.push(std::move(v));
|
||||
lock.unlock();
|
||||
g_compilePipelineCondVar.notify_one();
|
||||
}
|
||||
|
||||
// make a guess if a pipeline is not essential
|
||||
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
|
||||
bool IsAsyncPipelineAllowed(const MetalAttachmentsInfo& attachmentsInfo, Vector2i extend, uint32 indexCount)
|
||||
{
|
||||
if (extend.x == 1600 && extend.y == 1600)
|
||||
return false; // Splatoon ink mechanics use 1600x1600 R8 and R8G8 framebuffers, this resolution is rare enough that we can just blacklist it globally
|
||||
|
||||
if (attachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
return true; // aggressive filter but seems to work well so far
|
||||
|
||||
// small index count (3,4,5,6) is often associated with full-viewport quads (which are considered essential due to often being used to generate persistent textures)
|
||||
if (indexCount <= 6)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
MetalPipelineCache* g_mtlPipelineCache = nullptr;
|
||||
|
||||
MetalPipelineCache& MetalPipelineCache::GetInstance()
|
||||
{
|
||||
return *g_mtlPipelineCache;
|
||||
}
|
||||
|
||||
MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}
|
||||
{
|
||||
g_mtlPipelineCache = this;
|
||||
}
|
||||
|
||||
MetalPipelineCache::~MetalPipelineCache()
|
||||
{
|
||||
for (auto& [key, pipelineObj] : m_pipelineCache)
|
||||
{
|
||||
pipelineObj->m_pipeline->release();
|
||||
delete pipelineObj;
|
||||
}
|
||||
}
|
||||
|
||||
PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr)
|
||||
{
|
||||
uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||
PipelineObject*& pipelineObj = m_pipelineCache[hash];
|
||||
if (pipelineObj)
|
||||
return pipelineObj;
|
||||
|
||||
pipelineObj = new PipelineObject();
|
||||
|
||||
MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj);
|
||||
compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||
|
||||
bool allowAsyncCompile = false;
|
||||
if (GetConfig().async_compile)
|
||||
allowAsyncCompile = IsAsyncPipelineAllowed(activeAttachmentsInfo, extend, indexCount);
|
||||
|
||||
if (allowAsyncCompile)
|
||||
{
|
||||
if (!g_compilePipelineThreadInit)
|
||||
{
|
||||
initCompileThread();
|
||||
g_compilePipelineThreadInit = true;
|
||||
}
|
||||
|
||||
queuePipeline(compiler);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Also force compile to ensure that the pipeline is ready
|
||||
cemu_assert_debug(compiler->Compile(true, true, true));
|
||||
delete compiler;
|
||||
}
|
||||
|
||||
// Save to cache
|
||||
AddCurrentStateToCache(hash, lastUsedAttachmentsInfo);
|
||||
|
||||
return pipelineObj;
|
||||
}
|
||||
|
||||
uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Hash
|
||||
uint64 stateHash = 0;
|
||||
for (int i = 0; i < Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS; ++i)
|
||||
{
|
||||
Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i];
|
||||
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
continue;
|
||||
|
||||
stateHash += GetMtlPixelFormat(format, false) + i * 31;
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
stateHash += 1;
|
||||
stateHash = std::rotl<uint64>(stateHash, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
stateHash += GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true);
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
if (activeAttachmentsInfo.depthFormat == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
stateHash += 1;
|
||||
stateHash = std::rotl<uint64>(stateHash, 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& group : fetchShader->bufferGroups)
|
||||
{
|
||||
uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView());
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
stateHash += bufferStride * 3;
|
||||
}
|
||||
|
||||
stateHash += fetchShader->getVkPipelineHashFragment();
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
if(lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL())
|
||||
stateHash += 0x333333;
|
||||
|
||||
stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull;
|
||||
|
||||
uint32* ctxRegister = lcr.GetRawView();
|
||||
|
||||
if (vertexShader)
|
||||
stateHash += vertexShader->baseHash;
|
||||
|
||||
stateHash = std::rotl<uint64>(stateHash, 13);
|
||||
|
||||
if (pixelShader)
|
||||
stateHash += pixelShader->baseHash + pixelShader->auxHash;
|
||||
|
||||
stateHash = std::rotl<uint64>(stateHash, 13);
|
||||
|
||||
uint32 polygonCtrl = lcr.PA_SU_SC_MODE_CNTL.getRawValue();
|
||||
stateHash += polygonCtrl;
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
stateHash += ctxRegister[Latte::REGADDR::PA_CL_CLIP_CNTL];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
const auto colorControlReg = ctxRegister[Latte::REGADDR::CB_COLOR_CONTROL];
|
||||
stateHash += colorControlReg;
|
||||
|
||||
stateHash += ctxRegister[Latte::REGADDR::CB_TARGET_MASK];
|
||||
|
||||
const uint32 blendEnableMask = (colorControlReg >> 8) & 0xFF;
|
||||
if (blendEnableMask)
|
||||
{
|
||||
for (auto i = 0; i < 8; ++i)
|
||||
{
|
||||
if (((blendEnableMask & (1 << i))) == 0)
|
||||
continue;
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
stateHash += ctxRegister[Latte::REGADDR::CB_BLEND0_CONTROL + i];
|
||||
}
|
||||
}
|
||||
|
||||
// Mesh pipeline
|
||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||
|
||||
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
||||
|
||||
if (usesGeometryShader)
|
||||
{
|
||||
stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
}
|
||||
|
||||
return stateHash;
|
||||
}
|
||||
|
||||
struct
|
||||
{
|
||||
uint32 pipelineLoadIndex;
|
||||
uint32 pipelineMaxFileIndex;
|
||||
|
||||
std::atomic_uint32_t pipelinesQueued;
|
||||
std::atomic_uint32_t pipelinesLoaded;
|
||||
} g_mtlCacheState;
|
||||
|
||||
uint32 MetalPipelineCache::BeginLoading(uint64 cacheTitleId)
|
||||
{
|
||||
std::error_code ec;
|
||||
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/transferable"), ec);
|
||||
const auto pathCacheFile = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_mtlpipeline.bin", cacheTitleId);
|
||||
|
||||
// init cache loader state
|
||||
g_mtlCacheState.pipelineLoadIndex = 0;
|
||||
g_mtlCacheState.pipelineMaxFileIndex = 0;
|
||||
g_mtlCacheState.pipelinesLoaded = 0;
|
||||
g_mtlCacheState.pipelinesQueued = 0;
|
||||
|
||||
// start async compilation threads
|
||||
m_compilationCount.store(0);
|
||||
m_compilationQueue.clear();
|
||||
|
||||
// get core count
|
||||
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
||||
m_numCompilationThreads = std::clamp(cpuCoreCount, 1u, 8u);
|
||||
// TODO: uncomment?
|
||||
//if (VulkanRenderer::GetInstance()->GetDisableMultithreadedCompilation())
|
||||
// m_numCompilationThreads = 1;
|
||||
|
||||
for (uint32 i = 0; i < m_numCompilationThreads; i++)
|
||||
{
|
||||
std::thread compileThread(&MetalPipelineCache::CompilerThread, this);
|
||||
compileThread.detach();
|
||||
}
|
||||
|
||||
// open cache file or create it
|
||||
cemu_assert_debug(s_cache == nullptr);
|
||||
s_cache = FileCache::Open(pathCacheFile, true, LatteShaderCache_getPipelineCacheExtraVersion(cacheTitleId));
|
||||
if (!s_cache)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to open or create Metal pipeline cache file: {}", _pathToUtf8(pathCacheFile));
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
s_cache->UseCompression(false);
|
||||
g_mtlCacheState.pipelineMaxFileIndex = s_cache->GetMaximumFileIndex();
|
||||
}
|
||||
return s_cache->GetFileCount();
|
||||
}
|
||||
|
||||
bool MetalPipelineCache::UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders)
|
||||
{
|
||||
pipelinesLoadedTotal = g_mtlCacheState.pipelinesLoaded;
|
||||
pipelinesMissingShaders = 0;
|
||||
while (g_mtlCacheState.pipelineLoadIndex <= g_mtlCacheState.pipelineMaxFileIndex)
|
||||
{
|
||||
if (m_compilationQueue.size() >= 50)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
return true; // queue up to 50 entries at a time
|
||||
}
|
||||
|
||||
uint64 fileNameA, fileNameB;
|
||||
std::vector<uint8> fileData;
|
||||
if (s_cache->GetFileByIndex(g_mtlCacheState.pipelineLoadIndex, &fileNameA, &fileNameB, fileData))
|
||||
{
|
||||
// queue for async compilation
|
||||
g_mtlCacheState.pipelinesQueued++;
|
||||
m_compilationQueue.push(std::move(fileData));
|
||||
g_mtlCacheState.pipelineLoadIndex++;
|
||||
return true;
|
||||
}
|
||||
g_mtlCacheState.pipelineLoadIndex++;
|
||||
}
|
||||
if (g_mtlCacheState.pipelinesLoaded != g_mtlCacheState.pipelinesQueued)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
return true; // pipelines still compiling
|
||||
}
|
||||
return false; // done
|
||||
}
|
||||
|
||||
void MetalPipelineCache::EndLoading()
|
||||
{
|
||||
// shut down compilation threads
|
||||
uint32 threadCount = m_numCompilationThreads;
|
||||
m_numCompilationThreads = 0; // signal thread shutdown
|
||||
for (uint32 i = 0; i < threadCount; i++)
|
||||
{
|
||||
m_compilationQueue.push({}); // push empty workload for every thread. Threads then will shutdown after checking for m_numCompilationThreads == 0
|
||||
}
|
||||
// keep cache file open for writing of new pipelines
|
||||
}
|
||||
|
||||
void MetalPipelineCache::Close()
|
||||
{
|
||||
if(s_cache)
|
||||
{
|
||||
delete s_cache;
|
||||
s_cache = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
struct CachedPipeline
|
||||
{
|
||||
struct ShaderHash
|
||||
{
|
||||
uint64 baseHash;
|
||||
uint64 auxHash;
|
||||
bool isPresent{};
|
||||
|
||||
void set(uint64 baseHash, uint64 auxHash)
|
||||
{
|
||||
this->baseHash = baseHash;
|
||||
this->auxHash = auxHash;
|
||||
this->isPresent = true;
|
||||
}
|
||||
};
|
||||
|
||||
ShaderHash vsHash; // includes fetch shader
|
||||
ShaderHash gsHash;
|
||||
ShaderHash psHash;
|
||||
|
||||
MetalAttachmentsInfo lastUsedAttachmentsInfo;
|
||||
|
||||
Latte::GPUCompactedRegisterState gpuState;
|
||||
};
|
||||
|
||||
void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
|
||||
{
|
||||
static FSpinlock s_spinlockSharedInternal;
|
||||
|
||||
// deserialize file
|
||||
LatteContextRegister* lcr = new LatteContextRegister();
|
||||
s_spinlockSharedInternal.lock();
|
||||
CachedPipeline* cachedPipeline = new CachedPipeline();
|
||||
s_spinlockSharedInternal.unlock();
|
||||
|
||||
MemStreamReader streamReader(fileData.data(), fileData.size());
|
||||
if (!DeserializePipeline(streamReader, *cachedPipeline))
|
||||
{
|
||||
// failed to deserialize
|
||||
s_spinlockSharedInternal.lock();
|
||||
delete lcr;
|
||||
delete cachedPipeline;
|
||||
s_spinlockSharedInternal.unlock();
|
||||
return;
|
||||
}
|
||||
// restored register view from compacted state
|
||||
Latte::LoadGPURegisterState(*lcr, cachedPipeline->gpuState);
|
||||
|
||||
LatteDecompilerShader* vertexShader = nullptr;
|
||||
LatteDecompilerShader* geometryShader = nullptr;
|
||||
LatteDecompilerShader* pixelShader = nullptr;
|
||||
// find vertex shader
|
||||
if (cachedPipeline->vsHash.isPresent)
|
||||
{
|
||||
vertexShader = LatteSHRC_FindVertexShader(cachedPipeline->vsHash.baseHash, cachedPipeline->vsHash.auxHash);
|
||||
if (!vertexShader)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Vertex shader not found in cache");
|
||||
return;
|
||||
}
|
||||
}
|
||||
// find geometry shader
|
||||
if (cachedPipeline->gsHash.isPresent)
|
||||
{
|
||||
geometryShader = LatteSHRC_FindGeometryShader(cachedPipeline->gsHash.baseHash, cachedPipeline->gsHash.auxHash);
|
||||
if (!geometryShader)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Geometry shader not found in cache");
|
||||
return;
|
||||
}
|
||||
}
|
||||
// find pixel shader
|
||||
if (cachedPipeline->psHash.isPresent)
|
||||
{
|
||||
pixelShader = LatteSHRC_FindPixelShader(cachedPipeline->psHash.baseHash, cachedPipeline->psHash.auxHash);
|
||||
if (!pixelShader)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Pixel shader not found in cache");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pixelShader)
|
||||
{
|
||||
cemu_assert_debug(false);
|
||||
return;
|
||||
}
|
||||
|
||||
MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader);
|
||||
|
||||
PipelineObject* pipelineObject = new PipelineObject();
|
||||
|
||||
// compile
|
||||
{
|
||||
MetalPipelineCompiler pp(m_mtlr, *pipelineObject);
|
||||
pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
|
||||
pp.Compile(true, true, false);
|
||||
// destroy pp early
|
||||
}
|
||||
|
||||
// Cache the pipeline
|
||||
uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
|
||||
m_pipelineCacheLock.lock();
|
||||
m_pipelineCache[pipelineStateHash] = pipelineObject;
|
||||
m_pipelineCacheLock.unlock();
|
||||
|
||||
// clean up
|
||||
s_spinlockSharedInternal.lock();
|
||||
delete lcr;
|
||||
delete cachedPipeline;
|
||||
s_spinlockSharedInternal.unlock();
|
||||
}
|
||||
|
||||
ConcurrentQueue<CachedPipeline*> g_mtlPipelineCachingQueue;
|
||||
|
||||
void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash, const MetalAttachmentsInfo& lastUsedAttachmentsInfo)
|
||||
{
|
||||
if (!m_pipelineCacheStoreThread)
|
||||
{
|
||||
m_pipelineCacheStoreThread = new std::thread(&MetalPipelineCache::WorkerThread, this);
|
||||
m_pipelineCacheStoreThread->detach();
|
||||
}
|
||||
// fill job structure with cached GPU state
|
||||
// for each cached pipeline we store:
|
||||
// - Active shaders (referenced by hash)
|
||||
// - An almost-complete register state of the GPU (minus some ALU uniform constants which aren't relevant)
|
||||
CachedPipeline* job = new CachedPipeline();
|
||||
auto vs = LatteSHRC_GetActiveVertexShader();
|
||||
auto gs = LatteSHRC_GetActiveGeometryShader();
|
||||
auto ps = LatteSHRC_GetActivePixelShader();
|
||||
if (vs)
|
||||
job->vsHash.set(vs->baseHash, vs->auxHash);
|
||||
if (gs)
|
||||
job->gsHash.set(gs->baseHash, gs->auxHash);
|
||||
if (ps)
|
||||
job->psHash.set(ps->baseHash, ps->auxHash);
|
||||
job->lastUsedAttachmentsInfo = lastUsedAttachmentsInfo;
|
||||
Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState);
|
||||
// queue job
|
||||
g_mtlPipelineCachingQueue.push(job);
|
||||
}
|
||||
|
||||
bool MetalPipelineCache::SerializePipeline(MemStreamWriter& memWriter, CachedPipeline& cachedPipeline)
|
||||
{
|
||||
memWriter.writeBE<uint8>(0x01); // version
|
||||
uint8 presentMask = 0;
|
||||
if (cachedPipeline.vsHash.isPresent)
|
||||
presentMask |= 1;
|
||||
if (cachedPipeline.gsHash.isPresent)
|
||||
presentMask |= 2;
|
||||
if (cachedPipeline.psHash.isPresent)
|
||||
presentMask |= 4;
|
||||
memWriter.writeBE<uint8>(presentMask);
|
||||
if (cachedPipeline.vsHash.isPresent)
|
||||
{
|
||||
memWriter.writeBE<uint64>(cachedPipeline.vsHash.baseHash);
|
||||
memWriter.writeBE<uint64>(cachedPipeline.vsHash.auxHash);
|
||||
}
|
||||
if (cachedPipeline.gsHash.isPresent)
|
||||
{
|
||||
memWriter.writeBE<uint64>(cachedPipeline.gsHash.baseHash);
|
||||
memWriter.writeBE<uint64>(cachedPipeline.gsHash.auxHash);
|
||||
}
|
||||
if (cachedPipeline.psHash.isPresent)
|
||||
{
|
||||
memWriter.writeBE<uint64>(cachedPipeline.psHash.baseHash);
|
||||
memWriter.writeBE<uint64>(cachedPipeline.psHash.auxHash);
|
||||
}
|
||||
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i]);
|
||||
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.depthFormat);
|
||||
|
||||
Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedPipeline& cachedPipeline)
|
||||
{
|
||||
// version
|
||||
if (memReader.readBE<uint8>() != 1)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Cached Metal pipeline corrupted or has unknown version");
|
||||
return false;
|
||||
}
|
||||
// shader hashes
|
||||
uint8 presentMask = memReader.readBE<uint8>();
|
||||
if (presentMask & 1)
|
||||
{
|
||||
uint64 baseHash = memReader.readBE<uint64>();
|
||||
uint64 auxHash = memReader.readBE<uint64>();
|
||||
cachedPipeline.vsHash.set(baseHash, auxHash);
|
||||
}
|
||||
if (presentMask & 2)
|
||||
{
|
||||
uint64 baseHash = memReader.readBE<uint64>();
|
||||
uint64 auxHash = memReader.readBE<uint64>();
|
||||
cachedPipeline.gsHash.set(baseHash, auxHash);
|
||||
}
|
||||
if (presentMask & 4)
|
||||
{
|
||||
uint64 baseHash = memReader.readBE<uint64>();
|
||||
uint64 auxHash = memReader.readBE<uint64>();
|
||||
cachedPipeline.psHash.set(baseHash, auxHash);
|
||||
}
|
||||
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i] = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
|
||||
cachedPipeline.lastUsedAttachmentsInfo.depthFormat = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
|
||||
|
||||
// deserialize GPU state
|
||||
if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cemu_assert_debug(!memReader.hasError());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int MetalPipelineCache::CompilerThread()
|
||||
{
|
||||
SetThreadName("plCacheCompiler");
|
||||
while (m_numCompilationThreads != 0)
|
||||
{
|
||||
std::vector<uint8> pipelineData = m_compilationQueue.pop();
|
||||
if(pipelineData.empty())
|
||||
continue;
|
||||
LoadPipelineFromCache(pipelineData);
|
||||
++g_mtlCacheState.pipelinesLoaded;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MetalPipelineCache::WorkerThread()
|
||||
{
|
||||
SetThreadName("plCacheWriter");
|
||||
while (true)
|
||||
{
|
||||
CachedPipeline* job;
|
||||
g_mtlPipelineCachingQueue.pop(job);
|
||||
if (!s_cache)
|
||||
{
|
||||
delete job;
|
||||
continue;
|
||||
}
|
||||
// serialize
|
||||
MemStreamWriter memWriter(1024 * 4);
|
||||
SerializePipeline(memWriter, *job);
|
||||
auto blob = memWriter.getResult();
|
||||
// file name is derived from data hash
|
||||
uint8 hash[SHA256_DIGEST_LENGTH];
|
||||
SHA256(blob.data(), blob.size(), hash);
|
||||
uint64 nameA = *(uint64be*)(hash + 0);
|
||||
uint64 nameB = *(uint64be*)(hash + 8);
|
||||
s_cache->AddFileAsync({ nameA, nameB }, blob.data(), blob.size());
|
||||
delete job;
|
||||
}
|
||||
}
|
52
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h
Normal file
52
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||
#include "util/helpers/ConcurrentQueue.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/math/vector2.h"
|
||||
|
||||
class MetalPipelineCache
|
||||
{
|
||||
public:
|
||||
static MetalPipelineCache& GetInstance();
|
||||
|
||||
MetalPipelineCache(class MetalRenderer* metalRenderer);
|
||||
~MetalPipelineCache();
|
||||
|
||||
PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr);
|
||||
|
||||
// Cache loading
|
||||
uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache
|
||||
bool UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders);
|
||||
void EndLoading();
|
||||
void LoadPipelineFromCache(std::span<uint8> fileData);
|
||||
void Close(); // called on title exit
|
||||
|
||||
// Debug
|
||||
size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); }
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
std::map<uint64, PipelineObject*> m_pipelineCache;
|
||||
FSpinlock m_pipelineCacheLock;
|
||||
|
||||
std::thread* m_pipelineCacheStoreThread;
|
||||
|
||||
class FileCache* s_cache;
|
||||
|
||||
std::atomic_uint32_t m_numCompilationThreads{ 0 };
|
||||
ConcurrentQueue<std::vector<uint8>> m_compilationQueue;
|
||||
std::atomic_uint32_t m_compilationCount;
|
||||
|
||||
static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||
|
||||
void AddCurrentStateToCache(uint64 pipelineStateHash, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo);
|
||||
|
||||
// pipeline serialization for file
|
||||
bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline);
|
||||
bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline);
|
||||
|
||||
int CompilerThread();
|
||||
void WorkerThread();
|
||||
};
|
484
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
Normal file
484
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
Normal file
|
@ -0,0 +1,484 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
extern std::atomic_int g_compiling_pipelines;
|
||||
extern std::atomic_int g_compiling_pipelines_async;
|
||||
extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum;
|
||||
|
||||
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
// make sure PS has matching input
|
||||
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
|
||||
continue;
|
||||
gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId));
|
||||
}
|
||||
gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx));
|
||||
gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx));
|
||||
}
|
||||
|
||||
static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, const char* variant, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
// make sure PS has matching input
|
||||
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
|
||||
continue;
|
||||
gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
|
||||
}
|
||||
gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant));
|
||||
gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n"));
|
||||
}
|
||||
|
||||
static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
sint32 pList[4] = { p0, p1, p2, p3 };
|
||||
for (sint32 i = 0; i < 4; i++)
|
||||
{
|
||||
if (pList[i] == 3)
|
||||
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
|
||||
else
|
||||
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
|
||||
}
|
||||
gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3]));
|
||||
}
|
||||
|
||||
static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
std::string gsSrc;
|
||||
gsSrc.append("#include <metal_stdlib>\r\n");
|
||||
gsSrc.append("using namespace metal;\r\n");
|
||||
|
||||
LatteShaderPSInputTable psInputTable;
|
||||
LatteShader_CreatePSInputTable(&psInputTable, latteRegister.GetRawView());
|
||||
|
||||
// inputs & outputs
|
||||
std::string vertexOutDefinition = "struct VertexOut {\r\n";
|
||||
vertexOutDefinition += "float4 position;\r\n";
|
||||
std::string geometryOutDefinition = "struct GeometryOut {\r\n";
|
||||
geometryOutDefinition += "float4 position [[position]];\r\n";
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
auto psImport = psInputTable.getPSImportBySemanticId(vsSemanticId);
|
||||
if (psImport == nullptr)
|
||||
continue;
|
||||
|
||||
// VertexOut
|
||||
vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId);
|
||||
|
||||
// GeometryOut
|
||||
geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId);
|
||||
|
||||
geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable.getPSImportLocationBySemanticId(vsSemanticId));
|
||||
if (psImport->isFlat)
|
||||
geometryOutDefinition += " [[flat]]";
|
||||
if (psImport->isNoPerspective)
|
||||
geometryOutDefinition += " [[center_no_perspective]]";
|
||||
geometryOutDefinition += ";\r\n";
|
||||
}
|
||||
vertexOutDefinition += "};\r\n";
|
||||
geometryOutDefinition += "};\r\n";
|
||||
|
||||
gsSrc.append(vertexOutDefinition);
|
||||
gsSrc.append(geometryOutDefinition);
|
||||
|
||||
gsSrc.append("struct ObjectPayload {\r\n");
|
||||
gsSrc.append("VertexOut vertexOut[3];\r\n");
|
||||
gsSrc.append("};\r\n");
|
||||
|
||||
// gen function
|
||||
gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return b - (c - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return c - (b - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return c + (b - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
// main
|
||||
gsSrc.append("using MeshType = mesh<GeometryOut, void, 4, 2, topology::triangle>;\r\n");
|
||||
gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n");
|
||||
gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("GeometryOut out;\r\n");
|
||||
|
||||
// there are two possible winding orders that need different triangle generation:
|
||||
// 0 1
|
||||
// 2 3
|
||||
// and
|
||||
// 0 1
|
||||
// 3 2
|
||||
// all others are just symmetries of these cases
|
||||
|
||||
// we can determine the case by comparing the distance 0<->1 and 0<->2
|
||||
|
||||
gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||
gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||
gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n");
|
||||
|
||||
// emit vertices
|
||||
gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
// p0 to p1 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister);
|
||||
gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n");
|
||||
// p0 to p2 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister);
|
||||
gsSrc.append("} else {\r\n");
|
||||
// p1 to p2 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister);
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("mesh.set_primitive_count(2);\r\n");
|
||||
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc);
|
||||
mtlShader->PreponeCompilation(true);
|
||||
|
||||
return mtlShader;
|
||||
}
|
||||
|
||||
#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
uint64 s_cacheTitleId = INVALID_TITLE_ID;
|
||||
|
||||
extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
|
||||
template<typename T>
|
||||
void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr)
|
||||
{
|
||||
// TODO: check if the pixel shader is valid as well?
|
||||
if (!rasterizationEnabled/* || !pixelShaderMtl*/)
|
||||
{
|
||||
desc->setRasterizationEnabled(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Color attachments
|
||||
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
|
||||
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
|
||||
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i];
|
||||
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
continue;
|
||||
|
||||
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(format, false);
|
||||
auto colorAttachment = desc->colorAttachments()->object(i);
|
||||
colorAttachment->setPixelFormat(pixelFormat);
|
||||
|
||||
// Disable writes if not in the active FBO
|
||||
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
colorAttachment->setWriteMask(MTL::ColorWriteMaskNone);
|
||||
continue;
|
||||
}
|
||||
|
||||
colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF));
|
||||
|
||||
// Blending
|
||||
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
|
||||
// Only float data type is blendable
|
||||
if (blendEnabled && GetMtlPixelFormatInfo(format, false).dataType == MetalDataType::FLOAT)
|
||||
{
|
||||
colorAttachment->setBlendingEnabled(true);
|
||||
|
||||
const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i];
|
||||
|
||||
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
|
||||
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());
|
||||
auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND());
|
||||
|
||||
colorAttachment->setRgbBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor);
|
||||
if (blendControlReg.get_SEPARATE_ALPHA_BLEND())
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN()));
|
||||
colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()));
|
||||
colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()));
|
||||
}
|
||||
else
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Depth stencil attachment
|
||||
if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true);
|
||||
desc->setDepthAttachmentPixelFormat(pixelFormat);
|
||||
if (lastUsedAttachmentsInfo.hasStencil)
|
||||
desc->setStencilAttachmentPixelFormat(pixelFormat);
|
||||
}
|
||||
}
|
||||
|
||||
MetalPipelineCompiler::~MetalPipelineCompiler()
|
||||
{
|
||||
/*
|
||||
for (auto& pair : m_pipelineCache)
|
||||
{
|
||||
pair.second->release();
|
||||
}
|
||||
m_pipelineCache.clear();
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error);
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
m_binaryArchive->release();
|
||||
|
||||
m_binaryArchiveURL->release();
|
||||
*/
|
||||
if (m_pipelineDescriptor)
|
||||
m_pipelineDescriptor->release();
|
||||
}
|
||||
|
||||
void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||
{
|
||||
m_usesGeometryShader = UseGeometryShader(lcr, geometryShader != nullptr);
|
||||
if (m_usesGeometryShader && !m_mtlr->SupportsMeshShaders())
|
||||
return;
|
||||
|
||||
// Rasterization
|
||||
m_rasterizationEnabled = lcr.IsRasterizationEnabled();
|
||||
|
||||
// Shaders
|
||||
m_vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
if (geometryShader)
|
||||
m_geometryShaderMtl = static_cast<RendererShaderMtl*>(geometryShader->shader);
|
||||
else if (UseRectEmulation(lcr))
|
||||
m_geometryShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||
else
|
||||
m_geometryShaderMtl = nullptr;
|
||||
m_pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
|
||||
if (m_usesGeometryShader)
|
||||
InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||
else
|
||||
InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||
}
|
||||
|
||||
bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay)
|
||||
{
|
||||
if (m_usesGeometryShader && !m_mtlr->SupportsMeshShaders())
|
||||
return false;
|
||||
|
||||
if (forceCompile)
|
||||
{
|
||||
// if some shader stages are not compiled yet, compile them now
|
||||
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
|
||||
m_vertexShaderMtl->PreponeCompilation(isRenderThread);
|
||||
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
|
||||
m_geometryShaderMtl->PreponeCompilation(isRenderThread);
|
||||
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
|
||||
m_pixelShaderMtl->PreponeCompilation(isRenderThread);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fail early if some shader stages are not compiled
|
||||
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
|
||||
return false;
|
||||
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
|
||||
return false;
|
||||
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compile
|
||||
MTL::RenderPipelineState* pipeline = nullptr;
|
||||
NS::Error* error = nullptr;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
if (m_usesGeometryShader)
|
||||
{
|
||||
auto desc = static_cast<MTL::MeshRenderPipelineDescriptor*>(m_pipelineDescriptor);
|
||||
|
||||
// Shaders
|
||||
desc->setObjectFunction(m_vertexShaderMtl->GetFunction());
|
||||
desc->setMeshFunction(m_geometryShaderMtl->GetFunction());
|
||||
if (m_rasterizationEnabled)
|
||||
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
desc->setLabel(GetLabel("Mesh render pipeline state", desc));
|
||||
#endif
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto desc = static_cast<MTL::RenderPipelineDescriptor*>(m_pipelineDescriptor);
|
||||
|
||||
// Shaders
|
||||
desc->setVertexFunction(m_vertexShaderMtl->GetFunction());
|
||||
if (m_rasterizationEnabled)
|
||||
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
desc->setLabel(GetLabel("Render pipeline state", desc));
|
||||
#endif
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||
}
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto creationDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
|
||||
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||
}
|
||||
|
||||
if (showInOverlay)
|
||||
{
|
||||
if (isRenderThread)
|
||||
g_compiling_pipelines_syncTimeSum += creationDuration;
|
||||
else
|
||||
g_compiling_pipelines_async++;
|
||||
g_compiling_pipelines++;
|
||||
}
|
||||
|
||||
m_pipelineObj.m_pipeline = pipeline;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Render pipeline state
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
|
||||
// Vertex descriptor
|
||||
if (!fetchShader->mtlFetchVertexManually)
|
||||
{
|
||||
NS_STACK_SCOPED MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
|
||||
for (auto& bufferGroup : fetchShader->bufferGroups)
|
||||
{
|
||||
std::optional<LatteConst::VertexFetchType2> fetchType;
|
||||
|
||||
uint32 minBufferStride = 0;
|
||||
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
||||
{
|
||||
auto& attr = bufferGroup.attrib[j];
|
||||
|
||||
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
|
||||
if (semanticId == (uint32)-1)
|
||||
continue; // attribute not used?
|
||||
|
||||
auto attribute = vertexDescriptor->attributes()->object(semanticId);
|
||||
attribute->setOffset(attr.offset);
|
||||
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
|
||||
attribute->setFormat(GetMtlVertexFormat(attr.format));
|
||||
|
||||
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
|
||||
|
||||
if (fetchType.has_value())
|
||||
cemu_assert_debug(fetchType == attr.fetchType);
|
||||
else
|
||||
fetchType = attr.fetchType;
|
||||
|
||||
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
||||
{
|
||||
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
||||
}
|
||||
}
|
||||
|
||||
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
||||
if (bufferStride == 0)
|
||||
{
|
||||
// Buffer stride cannot be zero, let's use the minimum stride
|
||||
bufferStride = minBufferStride;
|
||||
|
||||
// Additionally, constant vertex function must be used
|
||||
layout->setStepFunction(MTL::VertexStepFunctionConstant);
|
||||
layout->setStepRate(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
|
||||
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
|
||||
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
|
||||
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "unimplemented vertex fetch type {}", (uint32)fetchType.value());
|
||||
cemu_assert(false);
|
||||
}
|
||||
}
|
||||
bufferStride = Align(bufferStride, 4);
|
||||
layout->setStride(bufferStride);
|
||||
}
|
||||
|
||||
desc->setVertexDescriptor(vertexDescriptor);
|
||||
}
|
||||
|
||||
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
|
||||
|
||||
m_pipelineDescriptor = desc;
|
||||
}
|
||||
|
||||
void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Render pipeline state
|
||||
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
|
||||
|
||||
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
|
||||
|
||||
m_pipelineDescriptor = desc;
|
||||
}
|
38
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
Normal file
38
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||
|
||||
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||
|
||||
struct PipelineObject
|
||||
{
|
||||
MTL::RenderPipelineState* m_pipeline = nullptr;
|
||||
};
|
||||
|
||||
class MetalPipelineCompiler
|
||||
{
|
||||
public:
|
||||
MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {}
|
||||
~MetalPipelineCompiler();
|
||||
|
||||
void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||
|
||||
bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay);
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
PipelineObject& m_pipelineObj;
|
||||
|
||||
class RendererShaderMtl* m_vertexShaderMtl;
|
||||
class RendererShaderMtl* m_geometryShaderMtl;
|
||||
class RendererShaderMtl* m_pixelShaderMtl;
|
||||
bool m_usesGeometryShader;
|
||||
bool m_rasterizationEnabled;
|
||||
|
||||
NS::Object* m_pipelineDescriptor = nullptr;
|
||||
|
||||
void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||
|
||||
void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||
};
|
38
src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp
Normal file
38
src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp
Normal file
|
@ -0,0 +1,38 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
|
||||
bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed)
|
||||
{
|
||||
if (m_commandBuffer && !CommandBufferCompleted(m_commandBuffer))
|
||||
return false;
|
||||
|
||||
uint64* resultPtr = m_mtlr->GetOcclusionQueryResultsPtr();
|
||||
|
||||
numSamplesPassed = 0;
|
||||
for (uint32 i = m_range.begin; i != m_range.end; i = (i + 1) % MetalRenderer::OCCLUSION_QUERY_POOL_SIZE)
|
||||
numSamplesPassed += resultPtr[i];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
LatteQueryObjectMtl::~LatteQueryObjectMtl()
|
||||
{
|
||||
if (m_commandBuffer)
|
||||
m_commandBuffer->release();
|
||||
}
|
||||
|
||||
void LatteQueryObjectMtl::begin()
|
||||
{
|
||||
m_range.begin = m_mtlr->GetOcclusionQueryIndex();
|
||||
m_mtlr->BeginOcclusionQuery();
|
||||
}
|
||||
|
||||
void LatteQueryObjectMtl::end()
|
||||
{
|
||||
m_range.end = m_mtlr->GetOcclusionQueryIndex();
|
||||
m_mtlr->EndOcclusionQuery();
|
||||
|
||||
m_commandBuffer = m_mtlr->GetAndRetainCurrentCommandBufferIfNotCompleted();
|
||||
if (m_commandBuffer)
|
||||
m_mtlr->RequestSoonCommit();
|
||||
}
|
28
src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h
Normal file
28
src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteQueryObject.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
|
||||
class LatteQueryObjectMtl : public LatteQueryObject
|
||||
{
|
||||
public:
|
||||
LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {}
|
||||
~LatteQueryObjectMtl();
|
||||
|
||||
bool getResult(uint64& numSamplesPassed) override;
|
||||
void begin() override;
|
||||
void end() override;
|
||||
|
||||
void GrowRange()
|
||||
{
|
||||
m_range.end++;
|
||||
}
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
MetalQueryRange m_range = {INVALID_UINT32, INVALID_UINT32};
|
||||
// TODO: make this a list of command buffers?
|
||||
MTL::CommandBuffer* m_commandBuffer = nullptr;
|
||||
};
|
2323
src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
Normal file
2323
src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
Normal file
File diff suppressed because it is too large
Load diff
570
src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h
Normal file
570
src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h
Normal file
|
@ -0,0 +1,570 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||
|
||||
enum MetalGeneralShaderType
|
||||
{
|
||||
METAL_GENERAL_SHADER_TYPE_VERTEX,
|
||||
METAL_GENERAL_SHADER_TYPE_GEOMETRY,
|
||||
METAL_GENERAL_SHADER_TYPE_FRAGMENT,
|
||||
|
||||
METAL_GENERAL_SHADER_TYPE_TOTAL
|
||||
};
|
||||
|
||||
inline MetalGeneralShaderType GetMtlGeneralShaderType(LatteConst::ShaderType shaderType)
|
||||
{
|
||||
switch (shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
return METAL_GENERAL_SHADER_TYPE_VERTEX;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
return METAL_GENERAL_SHADER_TYPE_GEOMETRY;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
return METAL_GENERAL_SHADER_TYPE_FRAGMENT;
|
||||
default:
|
||||
return METAL_GENERAL_SHADER_TYPE_TOTAL;
|
||||
}
|
||||
}
|
||||
|
||||
enum MetalShaderType
|
||||
{
|
||||
METAL_SHADER_TYPE_VERTEX,
|
||||
METAL_SHADER_TYPE_OBJECT,
|
||||
METAL_SHADER_TYPE_MESH,
|
||||
METAL_SHADER_TYPE_FRAGMENT,
|
||||
|
||||
METAL_SHADER_TYPE_TOTAL
|
||||
};
|
||||
|
||||
inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType, bool usesGeometryShader)
|
||||
{
|
||||
switch (shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
if (usesGeometryShader)
|
||||
return METAL_SHADER_TYPE_OBJECT;
|
||||
else
|
||||
return METAL_SHADER_TYPE_VERTEX;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
return METAL_SHADER_TYPE_MESH;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
return METAL_SHADER_TYPE_FRAGMENT;
|
||||
default:
|
||||
return METAL_SHADER_TYPE_TOTAL;
|
||||
}
|
||||
}
|
||||
|
||||
struct MetalEncoderState
|
||||
{
|
||||
MTL::RenderPipelineState* m_renderPipelineState = nullptr;
|
||||
MTL::DepthStencilState* m_depthStencilState = nullptr;
|
||||
MTL::CullMode m_cullMode = MTL::CullModeNone;
|
||||
MTL::Winding m_frontFaceWinding = MTL::WindingClockwise;
|
||||
MTL::Viewport m_viewport;
|
||||
MTL::ScissorRect m_scissor;
|
||||
uint32 m_stencilRefFront = 0;
|
||||
uint32 m_stencilRefBack = 0;
|
||||
uint32 m_blendColor[4] = {0};
|
||||
uint32 m_depthBias = 0;
|
||||
uint32 m_depthSlope = 0;
|
||||
uint32 m_depthClamp = 0;
|
||||
bool m_depthClipEnable = true;
|
||||
struct {
|
||||
MTL::Buffer* m_buffer;
|
||||
size_t m_offset;
|
||||
} m_buffers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
MTL::Texture* m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
|
||||
MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS];
|
||||
};
|
||||
|
||||
struct MetalStreamoutState
|
||||
{
|
||||
struct
|
||||
{
|
||||
bool enabled;
|
||||
uint32 ringBufferOffset;
|
||||
} buffers[LATTE_NUM_STREAMOUT_BUFFER];
|
||||
sint32 verticesPerInstance;
|
||||
};
|
||||
|
||||
struct MetalActiveFBOState
|
||||
{
|
||||
class CachedFBOMtl* m_fbo = nullptr;
|
||||
MetalAttachmentsInfo m_attachmentsInfo;
|
||||
};
|
||||
|
||||
struct MetalState
|
||||
{
|
||||
MetalEncoderState m_encoderState{};
|
||||
|
||||
bool m_usesSRGB = false;
|
||||
|
||||
bool m_skipDrawSequence = false;
|
||||
bool m_isFirstDrawInRenderPass = true;
|
||||
|
||||
MetalActiveFBOState m_activeFBO;
|
||||
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change
|
||||
MetalActiveFBOState m_lastUsedFBO;
|
||||
bool m_fboChanged = false;
|
||||
|
||||
size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS];
|
||||
class LatteTextureViewMtl* m_textures[LATTE_NUM_MAX_TEX_UNITS * 3] = {nullptr};
|
||||
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
|
||||
MTL::Viewport m_viewport;
|
||||
MTL::ScissorRect m_scissor;
|
||||
|
||||
MetalStreamoutState m_streamoutState;
|
||||
};
|
||||
|
||||
struct MetalCommandBuffer
|
||||
{
|
||||
MTL::CommandBuffer* m_commandBuffer = nullptr;
|
||||
bool m_commited = false;
|
||||
};
|
||||
|
||||
enum class MetalEncoderType
|
||||
{
|
||||
None,
|
||||
Render,
|
||||
Compute,
|
||||
Blit,
|
||||
};
|
||||
|
||||
class MetalRenderer : public Renderer
|
||||
{
|
||||
public:
|
||||
static constexpr uint32 OCCLUSION_QUERY_POOL_SIZE = 1024;
|
||||
static constexpr uint32 TEXTURE_READBACK_SIZE = 32 * 1024 * 1024; // 32 MB
|
||||
|
||||
struct DeviceInfo
|
||||
{
|
||||
std::string name;
|
||||
uint64 uuid;
|
||||
};
|
||||
|
||||
static std::vector<DeviceInfo> GetDevices();
|
||||
|
||||
MetalRenderer();
|
||||
~MetalRenderer() override;
|
||||
|
||||
RendererAPI GetType() override
|
||||
{
|
||||
return RendererAPI::Metal;
|
||||
}
|
||||
|
||||
static MetalRenderer* GetInstance() {
|
||||
return static_cast<MetalRenderer*>(g_renderer.get());
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
MTL::Device* GetDevice() const {
|
||||
return m_device;
|
||||
}
|
||||
|
||||
void InitializeLayer(const Vector2i& size, bool mainWindow);
|
||||
void ShutdownLayer(bool mainWindow);
|
||||
void ResizeLayer(const Vector2i& size, bool mainWindow);
|
||||
|
||||
void Initialize() override;
|
||||
void Shutdown() override;
|
||||
bool IsPadWindowActive() override;
|
||||
|
||||
bool GetVRAMInfo(int& usageInMB, int& totalInMB) const override;
|
||||
|
||||
void ClearColorbuffer(bool padView) override;
|
||||
void DrawEmptyFrame(bool mainWindow) override;
|
||||
void SwapBuffers(bool swapTV, bool swapDRC) override;
|
||||
|
||||
void HandleScreenshotRequest(LatteTextureView* texView, bool padView) override;
|
||||
|
||||
void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter,
|
||||
sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight,
|
||||
bool padView, bool clearBackground) override;
|
||||
bool BeginFrame(bool mainWindow) override;
|
||||
|
||||
// flush control
|
||||
void Flush(bool waitIdle = false) override; // called when explicit flush is required (e.g. by imgui)
|
||||
void NotifyLatteCommandProcessorIdle() override; // called when command processor has no more commands available or when stalled
|
||||
|
||||
// imgui
|
||||
bool ImguiBegin(bool mainWindow) override;
|
||||
void ImguiEnd() override;
|
||||
ImTextureID GenerateTexture(const std::vector<uint8>& data, const Vector2i& size) override;
|
||||
void DeleteTexture(ImTextureID id) override;
|
||||
void DeleteFontTextures() override;
|
||||
|
||||
bool UseTFViaSSBO() const override { return true; }
|
||||
void AppendOverlayDebugInfo() override;
|
||||
|
||||
// rendertarget
|
||||
void renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ = false) override;
|
||||
void renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) override;
|
||||
|
||||
LatteCachedFBO* rendertarget_createCachedFBO(uint64 key) override;
|
||||
void rendertarget_deleteCachedFBO(LatteCachedFBO* fbo) override;
|
||||
void rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) override;
|
||||
|
||||
// texture functions
|
||||
void* texture_acquireTextureUploadBuffer(uint32 size) override;
|
||||
void texture_releaseTextureUploadBuffer(uint8* mem) override;
|
||||
|
||||
TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override;
|
||||
|
||||
void texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) override;
|
||||
void texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) override;
|
||||
void texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) override;
|
||||
void texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue) override;
|
||||
|
||||
LatteTexture* texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) override;
|
||||
|
||||
void texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) override;
|
||||
void texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth) override;
|
||||
|
||||
LatteTextureReadbackInfo* texture_createReadback(LatteTextureView* textureView) override;
|
||||
|
||||
// surface copy
|
||||
void surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) override;
|
||||
|
||||
// buffer cache
|
||||
void bufferCache_init(const sint32 bufferSize) override;
|
||||
void bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) override;
|
||||
void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
|
||||
void bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
|
||||
|
||||
void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) override;
|
||||
void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override;
|
||||
|
||||
// shader
|
||||
RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool compileAsync, bool isGfxPackSource) override;
|
||||
|
||||
// streamout
|
||||
void streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize) override;
|
||||
void streamout_begin() override;
|
||||
void streamout_rendererFinishDrawcall() override;
|
||||
|
||||
// core drawing logic
|
||||
void draw_beginSequence() override;
|
||||
void draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) override;
|
||||
void draw_endSequence() override;
|
||||
|
||||
void draw_updateVertexBuffersDirectAccess();
|
||||
void draw_updateUniformBuffersDirectAccess(LatteDecompilerShader* shader, const uint32 uniformBufferRegOffset);
|
||||
|
||||
void draw_handleSpecialState5();
|
||||
|
||||
// index
|
||||
IndexAllocation indexData_reserveIndexMemory(uint32 size) override;
|
||||
void indexData_releaseIndexMemory(IndexAllocation& allocation) override;
|
||||
void indexData_uploadIndexMemory(IndexAllocation& allocation) override;
|
||||
|
||||
// occlusion queries
|
||||
LatteQueryObject* occlusionQuery_create() override;
|
||||
void occlusionQuery_destroy(LatteQueryObject* queryObj) override;
|
||||
void occlusionQuery_flush() override;
|
||||
void occlusionQuery_updateState() override;
|
||||
|
||||
// Helpers
|
||||
MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; }
|
||||
|
||||
void SetShouldMaximizeConcurrentCompilation(bool shouldMaximizeConcurrentCompilation)
|
||||
{
|
||||
if (m_supportsMetal3)
|
||||
m_device->setShouldMaximizeConcurrentCompilation(shouldMaximizeConcurrentCompilation);
|
||||
}
|
||||
|
||||
bool IsCommandBufferActive() const
|
||||
{
|
||||
return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited);
|
||||
}
|
||||
|
||||
MTL::CommandBuffer* GetCurrentCommandBuffer() const
|
||||
{
|
||||
cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer);
|
||||
|
||||
return m_currentCommandBuffer.m_commandBuffer;
|
||||
}
|
||||
|
||||
MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const
|
||||
{
|
||||
// The command buffer has been commited and has finished execution
|
||||
if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0)
|
||||
return nullptr;
|
||||
|
||||
return GetCurrentCommandBuffer()->retain();
|
||||
}
|
||||
|
||||
void RequestSoonCommit()
|
||||
{
|
||||
m_commitTreshold = m_recordedDrawcalls + 8;
|
||||
}
|
||||
|
||||
MTL::CommandEncoder* GetCommandEncoder()
|
||||
{
|
||||
return m_commandEncoder;
|
||||
}
|
||||
|
||||
MetalEncoderType GetEncoderType()
|
||||
{
|
||||
return m_encoderType;
|
||||
}
|
||||
|
||||
void ResetEncoderState()
|
||||
{
|
||||
m_state.m_encoderState = {};
|
||||
|
||||
// TODO: set viewport and scissor to render target dimensions if render commands
|
||||
|
||||
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
|
||||
{
|
||||
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
|
||||
m_state.m_encoderState.m_buffers[i][j] = {nullptr};
|
||||
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
|
||||
m_state.m_encoderState.m_textures[i][j] = nullptr;
|
||||
for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++)
|
||||
m_state.m_encoderState.m_samplers[i][j] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
MetalEncoderState& GetEncoderState()
|
||||
{
|
||||
return m_state.m_encoderState;
|
||||
}
|
||||
|
||||
void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index);
|
||||
void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index);
|
||||
void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index);
|
||||
|
||||
MTL::CommandBuffer* GetCommandBuffer();
|
||||
MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor);
|
||||
MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false);
|
||||
MTL::ComputeCommandEncoder* GetComputeCommandEncoder();
|
||||
MTL::BlitCommandEncoder* GetBlitCommandEncoder();
|
||||
void EndEncoding();
|
||||
void CommitCommandBuffer();
|
||||
void ProcessFinishedCommandBuffers();
|
||||
|
||||
bool AcquireDrawable(bool mainWindow);
|
||||
|
||||
//bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
|
||||
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
|
||||
|
||||
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
||||
|
||||
void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before);
|
||||
|
||||
// Getters
|
||||
bool GetPositionInvariance() const
|
||||
{
|
||||
return m_positionInvariance;
|
||||
}
|
||||
|
||||
bool IsAppleGPU() const
|
||||
{
|
||||
return m_isAppleGPU;
|
||||
}
|
||||
|
||||
bool SupportsFramebufferFetch() const
|
||||
{
|
||||
return m_supportsFramebufferFetch;
|
||||
}
|
||||
|
||||
bool HasUnifiedMemory() const
|
||||
{
|
||||
return m_hasUnifiedMemory;
|
||||
}
|
||||
|
||||
bool SupportsMetal3() const
|
||||
{
|
||||
return m_supportsMetal3;
|
||||
}
|
||||
|
||||
bool SupportsMeshShaders() const
|
||||
{
|
||||
return m_supportsMeshShaders;
|
||||
}
|
||||
|
||||
//MTL::StorageMode GetOptimalTextureStorageMode() const
|
||||
//{
|
||||
// return (m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModePrivate);
|
||||
//}
|
||||
|
||||
MTL::ResourceOptions GetOptimalBufferStorageMode() const
|
||||
{
|
||||
return (m_hasUnifiedMemory ? MTL::ResourceStorageModeShared : MTL::ResourceStorageModeManaged);
|
||||
}
|
||||
|
||||
MTL::Texture* GetNullTexture2D() const
|
||||
{
|
||||
return m_nullTexture2D;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetTextureReadbackBuffer()
|
||||
{
|
||||
if (!m_readbackBuffer)
|
||||
{
|
||||
m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::ResourceStorageModeShared);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
m_readbackBuffer->setLabel(GetLabel("Texture readback buffer", m_readbackBuffer));
|
||||
#endif
|
||||
}
|
||||
|
||||
return m_readbackBuffer;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetXfbRingBuffer()
|
||||
{
|
||||
if (!m_xfbRingBuffer)
|
||||
{
|
||||
// HACK: using just LatteStreamout_GetRingBufferSize will cause page faults
|
||||
m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize() * 4, MTL::ResourceStorageModePrivate);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer));
|
||||
#endif
|
||||
}
|
||||
|
||||
return m_xfbRingBuffer;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetOcclusionQueryResultBuffer() const
|
||||
{
|
||||
return m_occlusionQuery.m_resultBuffer;
|
||||
}
|
||||
|
||||
uint64* GetOcclusionQueryResultsPtr()
|
||||
{
|
||||
return m_occlusionQuery.m_resultsPtr;
|
||||
}
|
||||
|
||||
uint32 GetOcclusionQueryIndex()
|
||||
{
|
||||
return m_occlusionQuery.m_currentIndex;
|
||||
}
|
||||
|
||||
void BeginOcclusionQuery()
|
||||
{
|
||||
m_occlusionQuery.m_active = true;
|
||||
}
|
||||
|
||||
void EndOcclusionQuery()
|
||||
{
|
||||
m_occlusionQuery.m_active = false;
|
||||
|
||||
// Release the old command buffer
|
||||
if (m_occlusionQuery.m_lastCommandBuffer)
|
||||
m_occlusionQuery.m_lastCommandBuffer->release();
|
||||
|
||||
// Get and retain the current command buffer
|
||||
m_occlusionQuery.m_lastCommandBuffer = GetAndRetainCurrentCommandBufferIfNotCompleted();
|
||||
}
|
||||
|
||||
// GPU capture
|
||||
void CaptureFrame()
|
||||
{
|
||||
m_captureFrame = true;
|
||||
}
|
||||
|
||||
private:
|
||||
MetalLayerHandle m_mainLayer;
|
||||
MetalLayerHandle m_padLayer;
|
||||
|
||||
MetalPerformanceMonitor m_performanceMonitor;
|
||||
|
||||
// Options
|
||||
bool m_positionInvariance;
|
||||
|
||||
// Metal objects
|
||||
MTL::Device* m_device = nullptr;
|
||||
MTL::CommandQueue* m_commandQueue;
|
||||
|
||||
// Feature support
|
||||
bool m_isAppleGPU;
|
||||
bool m_supportsFramebufferFetch;
|
||||
bool m_hasUnifiedMemory;
|
||||
bool m_supportsMetal3;
|
||||
bool m_supportsMeshShaders;
|
||||
uint32 m_recommendedMaxVRAMUsage;
|
||||
MetalPixelFormatSupport m_pixelFormatSupport;
|
||||
|
||||
// Managers and caches
|
||||
class MetalMemoryManager* m_memoryManager;
|
||||
class MetalOutputShaderCache* m_outputShaderCache;
|
||||
class MetalPipelineCache* m_pipelineCache;
|
||||
class MetalDepthStencilCache* m_depthStencilCache;
|
||||
class MetalSamplerCache* m_samplerCache;
|
||||
|
||||
// Pipelines
|
||||
MTL::RenderPipelineDescriptor* m_copyDepthToColorDesc;
|
||||
std::map<MTL::PixelFormat, MTL::RenderPipelineState*> m_copyDepthToColorPipelines;
|
||||
|
||||
// Void vertex pipelines
|
||||
class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline;
|
||||
|
||||
// Synchronization resources
|
||||
MTL::Event* m_event;
|
||||
int32_t m_eventValue = -1;
|
||||
|
||||
// Resources
|
||||
MTL::SamplerState* m_nearestSampler;
|
||||
MTL::SamplerState* m_linearSampler;
|
||||
|
||||
// Null resources
|
||||
MTL::Texture* m_nullTexture1D;
|
||||
MTL::Texture* m_nullTexture2D;
|
||||
|
||||
// Texture readback
|
||||
MTL::Buffer* m_readbackBuffer = nullptr;
|
||||
uint32 m_readbackBufferWriteOffset = 0;
|
||||
|
||||
// Transform feedback
|
||||
MTL::Buffer* m_xfbRingBuffer = nullptr;
|
||||
|
||||
// Occlusion queries
|
||||
struct
|
||||
{
|
||||
MTL::Buffer* m_resultBuffer;
|
||||
uint64* m_resultsPtr;
|
||||
uint32 m_currentIndex = 0;
|
||||
bool m_active = false;
|
||||
MTL::CommandBuffer* m_lastCommandBuffer = nullptr;
|
||||
} m_occlusionQuery;
|
||||
|
||||
// Autorelease pool
|
||||
NS::AutoreleasePool* m_autoreleasePool;
|
||||
|
||||
// Active objects
|
||||
MetalCommandBuffer m_currentCommandBuffer{};
|
||||
std::vector<MTL::CommandBuffer*> m_executingCommandBuffers;
|
||||
MetalEncoderType m_encoderType = MetalEncoderType::None;
|
||||
MTL::CommandEncoder* m_commandEncoder = nullptr;
|
||||
|
||||
uint32 m_recordedDrawcalls;
|
||||
uint32 m_defaultCommitTreshlod;
|
||||
uint32 m_commitTreshold;
|
||||
|
||||
// State
|
||||
MetalState m_state;
|
||||
|
||||
// GPU capture
|
||||
bool m_captureFrame = false;
|
||||
bool m_capturing = false;
|
||||
|
||||
// Helpers
|
||||
MetalLayerHandle& GetLayer(bool mainWindow)
|
||||
{
|
||||
return (mainWindow ? m_mainLayer : m_padLayer);
|
||||
}
|
||||
|
||||
void SwapBuffer(bool mainWindow);
|
||||
|
||||
void EnsureImGuiBackend();
|
||||
|
||||
// GPU capture
|
||||
void StartCapture();
|
||||
void EndCapture();
|
||||
};
|
190
src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.cpp
Normal file
190
src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.cpp
Normal file
|
@ -0,0 +1,190 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
|
||||
MTL::SamplerBorderColor GetBorderColor(LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords, bool logWorkaround = false)
|
||||
{
|
||||
auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE();
|
||||
|
||||
MTL::SamplerBorderColor borderColor;
|
||||
if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK)
|
||||
borderColor = MTL::SamplerBorderColorTransparentBlack;
|
||||
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK)
|
||||
borderColor = MTL::SamplerBorderColorOpaqueBlack;
|
||||
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE)
|
||||
borderColor = MTL::SamplerBorderColorOpaqueWhite;
|
||||
else [[unlikely]]
|
||||
{
|
||||
_LatteRegisterSetSamplerBorderColor* borderColorReg;
|
||||
if (shaderType == LatteConst::ShaderType::Vertex)
|
||||
borderColorReg = LatteGPUState.contextNew.TD_VS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
|
||||
else if (shaderType == LatteConst::ShaderType::Pixel)
|
||||
borderColorReg = LatteGPUState.contextNew.TD_PS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
|
||||
else // geometry
|
||||
borderColorReg = LatteGPUState.contextNew.TD_GS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
|
||||
float r = borderColorReg->red.get_channelValue();
|
||||
float g = borderColorReg->green.get_channelValue();
|
||||
float b = borderColorReg->blue.get_channelValue();
|
||||
float a = borderColorReg->alpha.get_channelValue();
|
||||
|
||||
// Metal doesn't support custom border color
|
||||
// Let's find the best match
|
||||
bool opaque = (a == 1.0f);
|
||||
bool white = (r == 1.0f);
|
||||
if (opaque)
|
||||
{
|
||||
if (white)
|
||||
borderColor = MTL::SamplerBorderColorOpaqueWhite;
|
||||
else
|
||||
borderColor = MTL::SamplerBorderColorOpaqueBlack;
|
||||
}
|
||||
else
|
||||
{
|
||||
borderColor = MTL::SamplerBorderColorTransparentBlack;
|
||||
}
|
||||
|
||||
if (logWorkaround)
|
||||
{
|
||||
float newR, newG, newB, newA;
|
||||
switch (borderColor)
|
||||
{
|
||||
case MTL::SamplerBorderColorTransparentBlack:
|
||||
newR = 0.0f;
|
||||
newG = 0.0f;
|
||||
newB = 0.0f;
|
||||
newA = 0.0f;
|
||||
break;
|
||||
case MTL::SamplerBorderColorOpaqueBlack:
|
||||
newR = 0.0f;
|
||||
newG = 0.0f;
|
||||
newB = 0.0f;
|
||||
newA = 1.0f;
|
||||
break;
|
||||
case MTL::SamplerBorderColorOpaqueWhite:
|
||||
newR = 1.0f;
|
||||
newG = 1.0f;
|
||||
newB = 1.0f;
|
||||
newA = 1.0f;
|
||||
break;
|
||||
}
|
||||
|
||||
if (r != newR || g != newG || b != newB || a != newA)
|
||||
cemuLog_log(LogType::Force, "Custom border color ({}, {}, {}, {}) is not supported on Metal, using ({}, {}, {}, {}) instead", r, g, b, a, newR, newG, newB, newA);
|
||||
}
|
||||
}
|
||||
|
||||
return borderColor;
|
||||
}
|
||||
|
||||
MetalSamplerCache::~MetalSamplerCache()
|
||||
{
|
||||
for (auto& pair : m_samplerCache)
|
||||
{
|
||||
pair.second->release();
|
||||
}
|
||||
m_samplerCache.clear();
|
||||
}
|
||||
|
||||
MTL::SamplerState* MetalSamplerCache::GetSamplerState(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords)
|
||||
{
|
||||
uint64 stateHash = CalculateSamplerHash(lcr, shaderType, stageSamplerIndex, samplerWords);
|
||||
auto& samplerState = m_samplerCache[stateHash];
|
||||
if (samplerState)
|
||||
return samplerState;
|
||||
|
||||
// Sampler state
|
||||
|
||||
|
||||
NS_STACK_SCOPED MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
|
||||
|
||||
// lod
|
||||
uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD();
|
||||
uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD();
|
||||
//sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS();
|
||||
|
||||
auto filterMip = samplerWords->WORD0.get_MIP_FILTER();
|
||||
if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE)
|
||||
{
|
||||
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
|
||||
samplerDescriptor->setLodMinClamp(0.0f);
|
||||
samplerDescriptor->setLodMaxClamp(0.25f);
|
||||
}
|
||||
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT)
|
||||
{
|
||||
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
|
||||
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
|
||||
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
|
||||
}
|
||||
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR)
|
||||
{
|
||||
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
|
||||
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
|
||||
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fallback for invalid constants
|
||||
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
|
||||
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
|
||||
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
|
||||
}
|
||||
|
||||
auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER();
|
||||
cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo
|
||||
samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
|
||||
|
||||
auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER();
|
||||
samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
|
||||
|
||||
auto filterZ = samplerWords->WORD0.get_Z_FILTER();
|
||||
// todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality?
|
||||
|
||||
auto clampX = samplerWords->WORD0.get_CLAMP_X();
|
||||
auto clampY = samplerWords->WORD0.get_CLAMP_Y();
|
||||
auto clampZ = samplerWords->WORD0.get_CLAMP_Z();
|
||||
|
||||
samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX));
|
||||
samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY));
|
||||
samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ));
|
||||
|
||||
auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO();
|
||||
|
||||
if (maxAniso > 0)
|
||||
samplerDescriptor->setMaxAnisotropy(1 << maxAniso);
|
||||
|
||||
// TODO: set lod bias
|
||||
//samplerInfo.mipLodBias = (float)iLodBias / 64.0f;
|
||||
|
||||
// depth compare
|
||||
//uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0;
|
||||
// TODO: is it okay to just cast?
|
||||
samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION()));
|
||||
|
||||
// Border color
|
||||
auto borderColor = GetBorderColor(shaderType, stageSamplerIndex, samplerWords, true);
|
||||
samplerDescriptor->setBorderColor(borderColor);
|
||||
|
||||
samplerState = m_mtlr->GetDevice()->newSamplerState(samplerDescriptor);
|
||||
|
||||
return samplerState;
|
||||
}
|
||||
|
||||
uint64 MetalSamplerCache::CalculateSamplerHash(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords)
|
||||
{
|
||||
uint64 hash = 0;
|
||||
hash = std::rotl<uint64>(hash, 17);
|
||||
hash += (uint64)samplerWords->WORD0.getRawValue();
|
||||
hash = std::rotl<uint64>(hash, 17);
|
||||
hash += (uint64)samplerWords->WORD1.getRawValue();
|
||||
hash = std::rotl<uint64>(hash, 17);
|
||||
hash += (uint64)samplerWords->WORD2.getRawValue();
|
||||
|
||||
auto borderColor = GetBorderColor(shaderType, stageSamplerIndex, samplerWords);
|
||||
|
||||
hash = std::rotl<uint64>(hash, 5);
|
||||
hash += (uint64)borderColor;
|
||||
|
||||
// TODO: check this
|
||||
return hash;
|
||||
}
|
22
src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h
Normal file
22
src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "HW/Latte/Core/LatteConst.h"
|
||||
#include "HW/Latte/ISA/LatteReg.h"
|
||||
|
||||
class MetalSamplerCache
|
||||
{
|
||||
public:
|
||||
MetalSamplerCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
|
||||
~MetalSamplerCache();
|
||||
|
||||
MTL::SamplerState* GetSamplerState(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords);
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
std::map<uint64, MTL::SamplerState*> m_samplerCache;
|
||||
|
||||
uint64 CalculateSamplerHash(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords);
|
||||
};
|
23
src/Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.cpp
Normal file
23
src/Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.cpp
Normal file
|
@ -0,0 +1,23 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
|
||||
|
||||
MetalVoidVertexPipeline::MetalVoidVertexPipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const std::string& vertexFunctionName)
|
||||
{
|
||||
// Render pipeline state
|
||||
NS_STACK_SCOPED MTL::Function* vertexFunction = library->newFunction(ToNSString(vertexFunctionName));
|
||||
|
||||
NS_STACK_SCOPED MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
renderPipelineDescriptor->setVertexFunction(vertexFunction);
|
||||
renderPipelineDescriptor->setRasterizationEnabled(false);
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
m_renderPipelineState = mtlRenderer->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "error creating hybrid render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||
}
|
||||
}
|
||||
|
||||
MetalVoidVertexPipeline::~MetalVoidVertexPipeline()
|
||||
{
|
||||
m_renderPipelineState->release();
|
||||
}
|
16
src/Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h
Normal file
16
src/Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Metal/MTLLibrary.hpp"
|
||||
#include "Metal/MTLRenderPipeline.hpp"
|
||||
|
||||
class MetalVoidVertexPipeline
|
||||
{
|
||||
public:
|
||||
MetalVoidVertexPipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const std::string& vertexFunctionName);
|
||||
~MetalVoidVertexPipeline();
|
||||
|
||||
MTL::RenderPipelineState* GetRenderPipelineState() const { return m_renderPipelineState; }
|
||||
|
||||
private:
|
||||
MTL::RenderPipelineState* m_renderPipelineState;
|
||||
};
|
407
src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
Normal file
407
src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
Normal file
|
@ -0,0 +1,407 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
|
||||
//#include "Cemu/FileCache/FileCache.h"
|
||||
//#include "config/ActiveSettings.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "GameProfile/GameProfile.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
|
||||
#define METAL_AIR_CACHE_NAME "Cemu_AIR_cache"
|
||||
#define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME
|
||||
#define METAL_AIR_CACHE_SIZE (16 * 1024 * 1024)
|
||||
#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512)
|
||||
|
||||
static bool s_isLoadingShadersMtl{false};
|
||||
//static bool s_hasRAMFilesystem{false};
|
||||
//class FileCache* s_airCache{nullptr};
|
||||
|
||||
extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
|
||||
class ShaderMtlThreadPool
|
||||
{
|
||||
public:
|
||||
void StartThreads()
|
||||
{
|
||||
if (m_threadsActive.exchange(true))
|
||||
return;
|
||||
|
||||
// Create thread pool
|
||||
const uint32 threadCount = 2;
|
||||
for (uint32 i = 0; i < threadCount; ++i)
|
||||
s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this);
|
||||
|
||||
// Create AIR cache thread
|
||||
/*
|
||||
s_airCacheThread = new std::thread(&ShaderMtlThreadPool::AIRCacheThreadFunc, this);
|
||||
|
||||
// Set priority
|
||||
sched_param schedParam;
|
||||
schedParam.sched_priority = 20;
|
||||
if (pthread_setschedparam(s_airCacheThread->native_handle(), SCHED_FIFO, &schedParam) != 0) {
|
||||
cemuLog_log(LogType::Force, "failed to set FIFO thread priority");
|
||||
}
|
||||
|
||||
if (pthread_setschedparam(s_airCacheThread->native_handle(), SCHED_RR, &schedParam) != 0) {
|
||||
cemuLog_log(LogType::Force, "failed to set RR thread priority");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void StopThreads()
|
||||
{
|
||||
if (!m_threadsActive.exchange(false))
|
||||
return;
|
||||
for (uint32 i = 0; i < s_threads.size(); ++i)
|
||||
s_compilationQueueCount.increment();
|
||||
for (auto& it : s_threads)
|
||||
it.join();
|
||||
s_threads.clear();
|
||||
|
||||
/*
|
||||
if (s_airCacheThread)
|
||||
{
|
||||
s_airCacheQueueCount.increment();
|
||||
s_airCacheThread->join();
|
||||
delete s_airCacheThread;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
~ShaderMtlThreadPool()
|
||||
{
|
||||
StopThreads();
|
||||
}
|
||||
|
||||
void CompilerThreadFunc()
|
||||
{
|
||||
SetThreadName("mtlShaderComp");
|
||||
while (m_threadsActive.load(std::memory_order::relaxed))
|
||||
{
|
||||
s_compilationQueueCount.decrementWithWait();
|
||||
s_compilationQueueMutex.lock();
|
||||
if (s_compilationQueue.empty())
|
||||
{
|
||||
// queue empty again, shaders compiled synchronously via PreponeCompilation()
|
||||
s_compilationQueueMutex.unlock();
|
||||
continue;
|
||||
}
|
||||
RendererShaderMtl* job = s_compilationQueue.front();
|
||||
s_compilationQueue.pop_front();
|
||||
// set compilation state
|
||||
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED);
|
||||
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING);
|
||||
s_compilationQueueMutex.unlock();
|
||||
// compile
|
||||
job->CompileInternal();
|
||||
if (job->ShouldCountCompilation())
|
||||
++g_compiled_shaders_async;
|
||||
// mark as compiled
|
||||
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING);
|
||||
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void AIRCacheThreadFunc()
|
||||
{
|
||||
SetThreadName("mtlAIRCache");
|
||||
while (m_threadsActive.load(std::memory_order::relaxed))
|
||||
{
|
||||
s_airCacheQueueCount.decrementWithWait();
|
||||
s_airCacheQueueMutex.lock();
|
||||
if (s_airCacheQueue.empty())
|
||||
{
|
||||
s_airCacheQueueMutex.unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create RAM filesystem
|
||||
if (!s_hasRAMFilesystem)
|
||||
{
|
||||
executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT);
|
||||
s_hasRAMFilesystem = true;
|
||||
}
|
||||
|
||||
RendererShaderMtl* job = s_airCacheQueue.front();
|
||||
s_airCacheQueue.pop_front();
|
||||
s_airCacheQueueMutex.unlock();
|
||||
// compile
|
||||
job->CompileToAIR();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
bool HasThreadsRunning() const { return m_threadsActive; }
|
||||
|
||||
public:
|
||||
std::vector<std::thread> s_threads;
|
||||
//std::thread* s_airCacheThread{nullptr};
|
||||
|
||||
std::deque<RendererShaderMtl*> s_compilationQueue;
|
||||
CounterSemaphore s_compilationQueueCount;
|
||||
std::mutex s_compilationQueueMutex;
|
||||
|
||||
/*
|
||||
std::deque<RendererShaderMtl*> s_airCacheQueue;
|
||||
CounterSemaphore s_airCacheQueueCount;
|
||||
std::mutex s_airCacheQueueMutex;
|
||||
*/
|
||||
|
||||
private:
|
||||
std::atomic<bool> m_threadsActive;
|
||||
} shaderMtlThreadPool;
|
||||
|
||||
// TODO: find out if it would be possible to cache compiled Metal shaders
|
||||
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
|
||||
{
|
||||
s_isLoadingShadersMtl = true;
|
||||
|
||||
// Open AIR cache
|
||||
/*
|
||||
if (s_airCache)
|
||||
{
|
||||
delete s_airCache;
|
||||
s_airCache = nullptr;
|
||||
}
|
||||
uint32 airCacheMagic = GeneratePrecompiledCacheId();
|
||||
const std::string cacheFilename = fmt::format("{:016x}_air.bin", cacheTitleId);
|
||||
const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename);
|
||||
s_airCache = FileCache::Open(cachePath, true, airCacheMagic);
|
||||
if (!s_airCache)
|
||||
cemuLog_log(LogType::Force, "Unable to open AIR cache {}", cacheFilename);
|
||||
*/
|
||||
|
||||
// Maximize shader compilation speed
|
||||
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true);
|
||||
}
|
||||
|
||||
void RendererShaderMtl::ShaderCacheLoading_end()
|
||||
{
|
||||
s_isLoadingShadersMtl = false;
|
||||
|
||||
// Reset shader compilation speed
|
||||
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false);
|
||||
}
|
||||
|
||||
void RendererShaderMtl::ShaderCacheLoading_Close()
|
||||
{
|
||||
// Close the AIR cache
|
||||
/*
|
||||
if (s_airCache)
|
||||
{
|
||||
delete s_airCache;
|
||||
s_airCache = nullptr;
|
||||
}
|
||||
|
||||
// Close RAM filesystem
|
||||
if (s_hasRAMFilesystem)
|
||||
executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH);
|
||||
*/
|
||||
}
|
||||
|
||||
void RendererShaderMtl::Initialize()
|
||||
{
|
||||
shaderMtlThreadPool.StartThreads();
|
||||
}
|
||||
|
||||
void RendererShaderMtl::Shutdown()
|
||||
{
|
||||
shaderMtlThreadPool.StopThreads();
|
||||
}
|
||||
|
||||
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
|
||||
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode}
|
||||
{
|
||||
// start async compilation
|
||||
shaderMtlThreadPool.s_compilationQueueMutex.lock();
|
||||
m_compilationState.setValue(COMPILATION_STATE::QUEUED);
|
||||
shaderMtlThreadPool.s_compilationQueue.push_back(this);
|
||||
shaderMtlThreadPool.s_compilationQueueCount.increment();
|
||||
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
|
||||
cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called
|
||||
}
|
||||
|
||||
RendererShaderMtl::~RendererShaderMtl()
|
||||
{
|
||||
if (m_function)
|
||||
m_function->release();
|
||||
}
|
||||
|
||||
void RendererShaderMtl::PreponeCompilation(bool isRenderThread)
|
||||
{
|
||||
shaderMtlThreadPool.s_compilationQueueMutex.lock();
|
||||
bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED);
|
||||
if (isStillQueued)
|
||||
{
|
||||
// remove from queue
|
||||
shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end());
|
||||
m_compilationState.setValue(COMPILATION_STATE::COMPILING);
|
||||
}
|
||||
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
|
||||
if (!isStillQueued)
|
||||
{
|
||||
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
|
||||
if (ShouldCountCompilation())
|
||||
--g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// compile synchronously
|
||||
CompileInternal();
|
||||
m_compilationState.setValue(COMPILATION_STATE::DONE);
|
||||
}
|
||||
}
|
||||
|
||||
bool RendererShaderMtl::IsCompiled()
|
||||
{
|
||||
return m_compilationState.hasState(COMPILATION_STATE::DONE);
|
||||
};
|
||||
|
||||
bool RendererShaderMtl::WaitForCompiled()
|
||||
{
|
||||
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RendererShaderMtl::ShouldCountCompilation() const
|
||||
{
|
||||
return !s_isLoadingShadersMtl && m_isGameShader;
|
||||
}
|
||||
|
||||
MTL::Library* RendererShaderMtl::LibraryFromSource()
|
||||
{
|
||||
// Compile from source
|
||||
NS_STACK_SCOPED MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init();
|
||||
if (g_current_game_profile->GetFastMath())
|
||||
options->setFastMathEnabled(true);
|
||||
|
||||
if (m_mtlr->GetPositionInvariance())
|
||||
{
|
||||
// TODO: filter out based on GPU state
|
||||
options->setPreserveInvariance(true);
|
||||
}
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error);
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return library;
|
||||
}
|
||||
|
||||
/*
|
||||
MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span<uint8> data)
|
||||
{
|
||||
dispatch_data_t dispatchData = dispatch_data_create(data.data(), data.size(), nullptr, DISPATCH_DATA_DESTRUCTOR_DEFAULT);
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error);
|
||||
if (error)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return library;
|
||||
}
|
||||
*/
|
||||
|
||||
void RendererShaderMtl::CompileInternal()
|
||||
{
|
||||
MTL::Library* library = nullptr;
|
||||
|
||||
// First, try to retrieve the compiled shader from the AIR cache
|
||||
/*
|
||||
if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache)
|
||||
{
|
||||
cemu_assert_debug(m_baseHash != 0);
|
||||
uint64 h1, h2;
|
||||
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
|
||||
std::vector<uint8> cacheFileData;
|
||||
if (s_airCache->GetFile({ h1, h2 }, cacheFileData))
|
||||
{
|
||||
library = LibraryFromAIR(std::span<uint8>(cacheFileData.data(), cacheFileData.size()));
|
||||
FinishCompilation();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Not in the cache, compile from source
|
||||
if (!library)
|
||||
{
|
||||
// Compile from source
|
||||
library = LibraryFromSource();
|
||||
FinishCompilation();
|
||||
if (!library)
|
||||
return;
|
||||
|
||||
// Store in the AIR cache
|
||||
/*
|
||||
shaderMtlThreadPool.s_airCacheQueueMutex.lock();
|
||||
shaderMtlThreadPool.s_airCacheQueue.push_back(this);
|
||||
shaderMtlThreadPool.s_airCacheQueueCount.increment();
|
||||
shaderMtlThreadPool.s_airCacheQueueMutex.unlock();
|
||||
*/
|
||||
}
|
||||
|
||||
m_function = library->newFunction(ToNSString("main0"));
|
||||
library->release();
|
||||
|
||||
// Count shader compilation
|
||||
if (ShouldCountCompilation())
|
||||
g_compiled_shaders_total++;
|
||||
}
|
||||
|
||||
/*
|
||||
void RendererShaderMtl::CompileToAIR()
|
||||
{
|
||||
uint64 h1, h2;
|
||||
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
|
||||
|
||||
// The shader is not in the cache, compile it
|
||||
std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2);
|
||||
|
||||
// Source
|
||||
std::ofstream mslFile;
|
||||
mslFile.open(fmt::format("{}.metal", baseFilename));
|
||||
mslFile << m_mslCode;
|
||||
mslFile.close();
|
||||
|
||||
// Compile
|
||||
if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename))
|
||||
return;
|
||||
if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename))
|
||||
return;
|
||||
|
||||
// Clean up
|
||||
executeCommand("rm {}.metal", baseFilename);
|
||||
executeCommand("rm {}.ir", baseFilename);
|
||||
|
||||
// Load from the newly generated AIR
|
||||
MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename));
|
||||
std::span<uint8> airData = std::span<uint8>(airFile.data(), airFile.size());
|
||||
//library = LibraryFromAIR(std::span<uint8>(airData.data(), airData.size()));
|
||||
|
||||
// Store in the cache
|
||||
s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size());
|
||||
|
||||
// Clean up
|
||||
executeCommand("rm {}.metallib", baseFilename);
|
||||
|
||||
FinishCompilation();
|
||||
}
|
||||
*/
|
||||
|
||||
void RendererShaderMtl::FinishCompilation()
|
||||
{
|
||||
m_mslCode.clear();
|
||||
m_mslCode.shrink_to_fit();
|
||||
}
|
79
src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h
Normal file
79
src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h
Normal file
|
@ -0,0 +1,79 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/RendererShader.h"
|
||||
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "util/helpers/ConcurrentQueue.h"
|
||||
#include "util/helpers/Semaphore.h"
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
class RendererShaderMtl : public RendererShader
|
||||
{
|
||||
friend class ShaderMtlThreadPool;
|
||||
|
||||
enum class COMPILATION_STATE : uint32
|
||||
{
|
||||
NONE,
|
||||
QUEUED,
|
||||
COMPILING,
|
||||
DONE
|
||||
};
|
||||
|
||||
public:
|
||||
static void ShaderCacheLoading_begin(uint64 cacheTitleId);
|
||||
static void ShaderCacheLoading_end();
|
||||
static void ShaderCacheLoading_Close();
|
||||
|
||||
static void Initialize();
|
||||
static void Shutdown();
|
||||
|
||||
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
|
||||
virtual ~RendererShaderMtl();
|
||||
|
||||
MTL::Function* GetFunction() const
|
||||
{
|
||||
return m_function;
|
||||
}
|
||||
|
||||
sint32 GetUniformLocation(const char* name) override
|
||||
{
|
||||
cemu_assert_suspicious();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SetUniform2fv(sint32 location, void* data, sint32 count) override
|
||||
{
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
|
||||
void SetUniform4iv(sint32 location, void* data, sint32 count) override
|
||||
{
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
|
||||
void PreponeCompilation(bool isRenderThread) override;
|
||||
bool IsCompiled() override;
|
||||
bool WaitForCompiled() override;
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::Function* m_function = nullptr;
|
||||
|
||||
StateSemaphore<COMPILATION_STATE> m_compilationState{ COMPILATION_STATE::NONE };
|
||||
|
||||
std::string m_mslCode;
|
||||
|
||||
bool ShouldCountCompilation() const;
|
||||
|
||||
MTL::Library* LibraryFromSource();
|
||||
|
||||
//MTL::Library* LibraryFromAIR(std::span<uint8> data);
|
||||
|
||||
void CompileInternal();
|
||||
|
||||
//void CompileToAIR();
|
||||
|
||||
void FinishCompilation();
|
||||
};
|
51
src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h
Normal file
51
src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
#pragma once
|
||||
|
||||
#define __STRINGIFY(x) #x
|
||||
#define _STRINGIFY(x) __STRINGIFY(x)
|
||||
|
||||
constexpr const char* utilityShaderSource = R"(#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
#define GET_BUFFER_BINDING(index) (28 + index)
|
||||
#define GET_TEXTURE_BINDING(index) (29 + index)
|
||||
#define GET_SAMPLER_BINDING(index) (14 + index)
|
||||
|
||||
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
|
||||
|
||||
struct VertexOut {
|
||||
float4 position [[position]];
|
||||
float2 texCoord;
|
||||
};
|
||||
|
||||
vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {
|
||||
VertexOut out;
|
||||
out.position = float4(positions[vid], 0.0, 1.0);
|
||||
out.texCoord = positions[vid] * 0.5 + 0.5;
|
||||
out.texCoord.y = 1.0 - out.texCoord.y;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
//fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], //sampler samplr [[sampler(0)]]) {
|
||||
// return tex.sample(samplr, in.texCoord);
|
||||
//}
|
||||
|
||||
vertex void vertexCopyBufferToBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]]) {
|
||||
dst[vid] = src[vid];
|
||||
}
|
||||
|
||||
fragment float4 fragmentCopyDepthToColor(VertexOut in [[stage_in]], texture2d<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]]) {
|
||||
return float4(src.read(uint2(in.position.xy)).r, 0.0, 0.0, 0.0);
|
||||
}
|
||||
|
||||
//struct RestrideParams {
|
||||
// uint oldStride;
|
||||
// uint newStride;
|
||||
//};
|
||||
|
||||
//vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer//(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant //RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) {
|
||||
// for (uint32_t i = 0; i < params.oldStride; i++) {
|
||||
// dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
|
||||
// }
|
||||
//}
|
||||
)";
|
7
src/Cafe/HW/Latte/Renderer/MetalView.h
Normal file
7
src/Cafe/HW/Latte/Renderer/MetalView.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#import <Cocoa/Cocoa.h>
|
||||
#import <QuartzCore/CAMetalLayer.h>
|
||||
|
||||
@interface MetalView : NSView
|
||||
@end
|
26
src/Cafe/HW/Latte/Renderer/MetalView.mm
Normal file
26
src/Cafe/HW/Latte/Renderer/MetalView.mm
Normal file
|
@ -0,0 +1,26 @@
|
|||
#include "Cafe/HW/Latte/Renderer/MetalView.h"
|
||||
|
||||
@implementation MetalView
|
||||
|
||||
-(BOOL) wantsUpdateLayer { return YES; }
|
||||
|
||||
+(Class) layerClass { return [CAMetalLayer class]; }
|
||||
|
||||
// copied from https://github.com/KhronosGroup/MoltenVK/blob/master/Demos/Cube/macOS/DemoViewController.m
|
||||
|
||||
-(CALayer*) makeBackingLayer
|
||||
{
|
||||
CALayer* layer = [self.class.layerClass layer];
|
||||
CGSize viewScale = [self convertSizeToBacking: CGSizeMake(1.0, 1.0)];
|
||||
layer.contentsScale = MIN(viewScale.width, viewScale.height);
|
||||
return layer;
|
||||
}
|
||||
|
||||
-(BOOL) layer: (CALayer *)layer shouldInheritContentsScale: (CGFloat)newScale fromWindow: (NSWindow *)window
|
||||
{
|
||||
if (newScale == layer.contentsScale) { return NO; }
|
||||
|
||||
layer.contentsScale = newScale;
|
||||
return YES;
|
||||
}
|
||||
@end
|
|
@ -33,6 +33,7 @@ enum class RendererAPI
|
|||
{
|
||||
OpenGL,
|
||||
Vulkan,
|
||||
Metal,
|
||||
|
||||
MAX
|
||||
};
|
||||
|
@ -84,6 +85,7 @@ public:
|
|||
virtual void DeleteFontTextures() = 0;
|
||||
|
||||
GfxVendor GetVendor() const { return m_vendor; }
|
||||
virtual bool UseTFViaSSBO() const { return false; }
|
||||
virtual void AppendOverlayDebugInfo() = 0;
|
||||
|
||||
// rendertarget
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "Cafe/HW/Latte/Renderer/RendererOuputShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h"
|
||||
#include "HW/Latte/Renderer/Renderer.h"
|
||||
|
||||
const std::string RendererOutputShader::s_copy_shader_source =
|
||||
R"(
|
||||
|
@ -9,6 +10,19 @@ void main()
|
|||
}
|
||||
)";
|
||||
|
||||
const std::string RendererOutputShader::s_copy_shader_source_mtl =
|
||||
R"(#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct VertexOut {
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]]) {
|
||||
return float4(textureSrc.sample(samplr, in.uv).rgb, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
const std::string RendererOutputShader::s_bicubic_shader_source =
|
||||
R"(
|
||||
vec4 cubic(float x)
|
||||
|
@ -55,6 +69,57 @@ void main(){
|
|||
}
|
||||
)";
|
||||
|
||||
const std::string RendererOutputShader::s_bicubic_shader_source_mtl =
|
||||
R"(#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
float4 cubic(float x) {
|
||||
float x2 = x * x;
|
||||
float x3 = x2 * x;
|
||||
float4 w;
|
||||
w.x = -x3 + 3 * x2 - 3 * x + 1;
|
||||
w.y = 3 * x3 - 6 * x2 + 4;
|
||||
w.z = -3 * x3 + 3 * x2 + 3 * x + 1;
|
||||
w.w = x3;
|
||||
return w / 6.0;
|
||||
}
|
||||
|
||||
float4 bcFilter(texture2d<float> textureSrc, sampler samplr, float2 texcoord, float2 texscale) {
|
||||
float fx = fract(texcoord.x);
|
||||
float fy = fract(texcoord.y);
|
||||
texcoord.x -= fx;
|
||||
texcoord.y -= fy;
|
||||
|
||||
float4 xcubic = cubic(fx);
|
||||
float4 ycubic = cubic(fy);
|
||||
|
||||
float4 c = float4(texcoord.x - 0.5, texcoord.x + 1.5, texcoord.y - 0.5, texcoord.y + 1.5);
|
||||
float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w);
|
||||
float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s;
|
||||
|
||||
float4 sample0 = textureSrc.sample(samplr, float2(offset.x, offset.z) * texscale);
|
||||
float4 sample1 = textureSrc.sample(samplr, float2(offset.y, offset.z) * texscale);
|
||||
float4 sample2 = textureSrc.sample(samplr, float2(offset.x, offset.w) * texscale);
|
||||
float4 sample3 = textureSrc.sample(samplr, float2(offset.y, offset.w) * texscale);
|
||||
|
||||
float sx = s.x / (s.x + s.y);
|
||||
float sy = s.z / (s.z + s.w);
|
||||
|
||||
return mix(
|
||||
mix(sample3, sample2, sx),
|
||||
mix(sample1, sample0, sx), sy);
|
||||
}
|
||||
|
||||
struct VertexOut {
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]]) {
|
||||
float2 textureSrcResolution = float2(textureSrc.get_width(), textureSrc.get_height());
|
||||
return float4(bcFilter(textureSrc, samplr, in.uv * textureSrcResolution, float2(1.0, 1.0) / textureSrcResolution).rgb, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
const std::string RendererOutputShader::s_hermite_shader_source =
|
||||
R"(
|
||||
// https://www.shadertoy.com/view/MllSzX
|
||||
|
@ -114,9 +179,77 @@ void main(){
|
|||
}
|
||||
)";
|
||||
|
||||
const std::string RendererOutputShader::s_hermite_shader_source_mtl =
|
||||
R"(#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
// https://www.shadertoy.com/view/MllSzX
|
||||
|
||||
float3 CubicHermite(float3 A, float3 B, float3 C, float3 D, float t) {
|
||||
float t2 = t*t;
|
||||
float t3 = t*t*t;
|
||||
float3 a = -A/2.0 + (3.0*B)/2.0 - (3.0*C)/2.0 + D/2.0;
|
||||
float3 b = A - (5.0*B)/2.0 + 2.0*C - D / 2.0;
|
||||
float3 c = -A/2.0 + C/2.0;
|
||||
float3 d = B;
|
||||
|
||||
return a*t3 + b*t2 + c*t + d;
|
||||
}
|
||||
|
||||
|
||||
float3 BicubicHermiteTexture(texture2d<float> textureSrc, sampler samplr, float2 uv, float4 texelSize) {
|
||||
float2 pixel = uv*texelSize.zw + 0.5;
|
||||
float2 frac = fract(pixel);
|
||||
pixel = floor(pixel) / texelSize.zw - float2(texelSize.xy/2.0);
|
||||
|
||||
float4 doubleSize = texelSize*texelSize;
|
||||
|
||||
float3 C00 = textureSrc.sample(samplr, pixel + float2(-texelSize.x ,-texelSize.y)).rgb;
|
||||
float3 C10 = textureSrc.sample(samplr, pixel + float2( 0.0 ,-texelSize.y)).rgb;
|
||||
float3 C20 = textureSrc.sample(samplr, pixel + float2( texelSize.x ,-texelSize.y)).rgb;
|
||||
float3 C30 = textureSrc.sample(samplr, pixel + float2( doubleSize.x,-texelSize.y)).rgb;
|
||||
|
||||
float3 C01 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , 0.0)).rgb;
|
||||
float3 C11 = textureSrc.sample(samplr, pixel + float2( 0.0 , 0.0)).rgb;
|
||||
float3 C21 = textureSrc.sample(samplr, pixel + float2( texelSize.x , 0.0)).rgb;
|
||||
float3 C31 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, 0.0)).rgb;
|
||||
|
||||
float3 C02 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , texelSize.y)).rgb;
|
||||
float3 C12 = textureSrc.sample(samplr, pixel + float2( 0.0 , texelSize.y)).rgb;
|
||||
float3 C22 = textureSrc.sample(samplr, pixel + float2( texelSize.x , texelSize.y)).rgb;
|
||||
float3 C32 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, texelSize.y)).rgb;
|
||||
|
||||
float3 C03 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , doubleSize.y)).rgb;
|
||||
float3 C13 = textureSrc.sample(samplr, pixel + float2( 0.0 , doubleSize.y)).rgb;
|
||||
float3 C23 = textureSrc.sample(samplr, pixel + float2( texelSize.x , doubleSize.y)).rgb;
|
||||
float3 C33 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, doubleSize.y)).rgb;
|
||||
|
||||
float3 CP0X = CubicHermite(C00, C10, C20, C30, frac.x);
|
||||
float3 CP1X = CubicHermite(C01, C11, C21, C31, frac.x);
|
||||
float3 CP2X = CubicHermite(C02, C12, C22, C32, frac.x);
|
||||
float3 CP3X = CubicHermite(C03, C13, C23, C33, frac.x);
|
||||
|
||||
return CubicHermite(CP0X, CP1X, CP2X, CP3X, frac.y);
|
||||
}
|
||||
|
||||
struct VertexOut {
|
||||
float4 position [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]], constant float2& outputResolution [[buffer(0)]]) {
|
||||
float4 texelSize = float4(1.0 / outputResolution.xy, outputResolution.xy);
|
||||
return float4(BicubicHermiteTexture(textureSrc, samplr, in.uv, texelSize), 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
RendererOutputShader::RendererOutputShader(const std::string& vertex_source, const std::string& fragment_source)
|
||||
{
|
||||
auto finalFragmentSrc = PrependFragmentPreamble(fragment_source);
|
||||
std::string finalFragmentSrc;
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
finalFragmentSrc = fragment_source;
|
||||
else
|
||||
finalFragmentSrc = PrependFragmentPreamble(fragment_source);
|
||||
|
||||
m_vertex_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kVertex, 0, 0, vertex_source, false, false));
|
||||
m_fragment_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kFragment, 0, 0, finalFragmentSrc, false, false));
|
||||
|
@ -282,6 +415,44 @@ void main(){
|
|||
return vertex_source.str();
|
||||
}
|
||||
|
||||
std::string RendererOutputShader::GetMetalVertexSource(bool render_upside_down)
|
||||
{
|
||||
// vertex shader
|
||||
std::ostringstream vertex_source;
|
||||
vertex_source <<
|
||||
R"(#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
struct VertexOut {
|
||||
float4 position [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
|
||||
vertex VertexOut main0(ushort vid [[vertex_id]]) {
|
||||
VertexOut out;
|
||||
float2 pos;
|
||||
if (vid == 0) pos = float2(-1.0, -3.0);
|
||||
else if (vid == 1) pos = float2(-1.0, 1.0);
|
||||
else if (vid == 2) pos = float2(3.0, 1.0);
|
||||
out.uv = pos * 0.5 + 0.5;
|
||||
out.uv.y = 1.0 - out.uv.y;
|
||||
)";
|
||||
|
||||
if (render_upside_down)
|
||||
{
|
||||
vertex_source <<
|
||||
R"( pos.y = -pos.y;
|
||||
)";
|
||||
}
|
||||
|
||||
vertex_source <<
|
||||
R"( out.position = float4(pos, 0.0, 1.0);
|
||||
return out;
|
||||
}
|
||||
)";
|
||||
return vertex_source.str();
|
||||
}
|
||||
|
||||
std::string RendererOutputShader::PrependFragmentPreamble(const std::string& shaderSrc)
|
||||
{
|
||||
return R"(#version 430
|
||||
|
@ -303,6 +474,22 @@ layout(location = 0) out vec4 colorOut0;
|
|||
)" + shaderSrc;
|
||||
}
|
||||
void RendererOutputShader::InitializeStatic()
|
||||
{
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
std::string vertex_source = GetMetalVertexSource(false);
|
||||
std::string vertex_source_ud = GetMetalVertexSource(true);
|
||||
|
||||
s_copy_shader = new RendererOutputShader(vertex_source, s_copy_shader_source_mtl);
|
||||
s_copy_shader_ud = new RendererOutputShader(vertex_source_ud, s_copy_shader_source_mtl);
|
||||
|
||||
s_bicubic_shader = new RendererOutputShader(vertex_source, s_bicubic_shader_source_mtl);
|
||||
s_bicubic_shader_ud = new RendererOutputShader(vertex_source_ud, s_bicubic_shader_source_mtl);
|
||||
|
||||
s_hermit_shader = new RendererOutputShader(vertex_source, s_hermite_shader_source_mtl);
|
||||
s_hermit_shader_ud = new RendererOutputShader(vertex_source_ud, s_hermite_shader_source_mtl);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string vertex_source, vertex_source_ud;
|
||||
// vertex shader
|
||||
|
@ -311,7 +498,7 @@ void RendererOutputShader::InitializeStatic()
|
|||
vertex_source = GetOpenGlVertexSource(false);
|
||||
vertex_source_ud = GetOpenGlVertexSource(true);
|
||||
}
|
||||
else
|
||||
else if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
vertex_source = GetVulkanVertexSource(false);
|
||||
vertex_source_ud = GetVulkanVertexSource(true);
|
||||
|
@ -325,6 +512,7 @@ void RendererOutputShader::InitializeStatic()
|
|||
s_hermit_shader = new RendererOutputShader(vertex_source, s_hermite_shader_source);
|
||||
s_hermit_shader_ud = new RendererOutputShader(vertex_source_ud, s_hermite_shader_source);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererOutputShader::ShutdownStatic()
|
||||
{
|
||||
|
|
|
@ -41,8 +41,9 @@ public:
|
|||
static RendererOutputShader* s_hermit_shader;
|
||||
static RendererOutputShader* s_hermit_shader_ud;
|
||||
|
||||
static std::string GetVulkanVertexSource(bool render_upside_down);
|
||||
static std::string GetOpenGlVertexSource(bool render_upside_down);
|
||||
static std::string GetVulkanVertexSource(bool render_upside_down);
|
||||
static std::string GetMetalVertexSource(bool render_upside_down);
|
||||
|
||||
static std::string PrependFragmentPreamble(const std::string& shaderSrc);
|
||||
|
||||
|
@ -64,4 +65,8 @@ private:
|
|||
|
||||
static const std::string s_bicubic_shader_source_vk;
|
||||
static const std::string s_hermite_shader_source_vk;
|
||||
|
||||
static const std::string s_copy_shader_source_mtl;
|
||||
static const std::string s_bicubic_shader_source_mtl;
|
||||
static const std::string s_hermite_shader_source_mtl;
|
||||
};
|
||||
|
|
|
@ -1,36 +1,7 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
||||
|
||||
#import <Cocoa/Cocoa.h>
|
||||
#import <QuartzCore/CAMetalLayer.h>
|
||||
|
||||
@interface MetalView : NSView
|
||||
@end
|
||||
|
||||
@implementation MetalView
|
||||
|
||||
-(BOOL) wantsUpdateLayer { return YES; }
|
||||
|
||||
+(Class) layerClass { return [CAMetalLayer class]; }
|
||||
|
||||
// copied from https://github.com/KhronosGroup/MoltenVK/blob/master/Demos/Cube/macOS/DemoViewController.m
|
||||
|
||||
-(CALayer*) makeBackingLayer
|
||||
{
|
||||
CALayer* layer = [self.class.layerClass layer];
|
||||
CGSize viewScale = [self convertSizeToBacking: CGSizeMake(1.0, 1.0)];
|
||||
layer.contentsScale = MIN(viewScale.width, viewScale.height);
|
||||
return layer;
|
||||
}
|
||||
|
||||
-(BOOL) layer: (CALayer *)layer shouldInheritContentsScale: (CGFloat)newScale fromWindow: (NSWindow *)window
|
||||
{
|
||||
if (newScale == layer.contentsScale) { return NO; }
|
||||
|
||||
layer.contentsScale = newScale;
|
||||
return YES;
|
||||
}
|
||||
@end
|
||||
#include "Cafe/HW/Latte/Renderer/MetalView.h"
|
||||
|
||||
VkSurfaceKHR CreateCocoaSurface(VkInstance instance, void* handle)
|
||||
{
|
||||
|
|
|
@ -391,8 +391,8 @@ VulkanRenderer::VulkanRenderer()
|
|||
auto surface = CreateFramebufferSurface(m_instance, gui_getWindowInfo().window_main);
|
||||
|
||||
auto& config = GetConfig();
|
||||
decltype(config.graphic_device_uuid) zero{};
|
||||
const bool has_device_set = config.graphic_device_uuid != zero;
|
||||
decltype(config.vk_graphic_device_uuid) zero{};
|
||||
const bool has_device_set = config.vk_graphic_device_uuid != zero;
|
||||
|
||||
VkPhysicalDevice fallbackDevice = VK_NULL_HANDLE;
|
||||
|
||||
|
@ -412,7 +412,7 @@ VulkanRenderer::VulkanRenderer()
|
|||
physDeviceProps.pNext = &physDeviceIDProps;
|
||||
vkGetPhysicalDeviceProperties2(device, &physDeviceProps);
|
||||
|
||||
if (memcmp(config.graphic_device_uuid.data(), physDeviceIDProps.deviceUUID, VK_UUID_SIZE) != 0)
|
||||
if (memcmp(config.vk_graphic_device_uuid.data(), physDeviceIDProps.deviceUUID, VK_UUID_SIZE) != 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -425,7 +425,7 @@ VulkanRenderer::VulkanRenderer()
|
|||
{
|
||||
cemuLog_log(LogType::Force, "The selected GPU could not be found or is not suitable. Falling back to first available device instead");
|
||||
m_physicalDevice = fallbackDevice;
|
||||
config.graphic_device_uuid = {}; // resetting device selection
|
||||
config.vk_graphic_device_uuid = {}; // resetting device selection
|
||||
}
|
||||
else if (m_physicalDevice == VK_NULL_HANDLE)
|
||||
{
|
||||
|
@ -2369,7 +2369,7 @@ void VulkanRenderer::GetTextureFormatInfoVK(Latte::E_GX2SURFFMT format, bool isD
|
|||
}
|
||||
else {
|
||||
formatInfoOut->vkImageFormat = VK_FORMAT_R4G4B4A4_UNORM_PACK16;
|
||||
formatInfoOut->decoder = TextureDecoder_R4_G4_UNORM_To_RGBA4_vk::getInstance();
|
||||
formatInfoOut->decoder = TextureDecoder_R4_G4_UNORM_To_ABGR4::getInstance();
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -278,7 +278,6 @@ public:
|
|||
void* texture_acquireTextureUploadBuffer(uint32 size) override;
|
||||
void texture_releaseTextureUploadBuffer(uint8* mem) override;
|
||||
|
||||
|
||||
TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override;
|
||||
|
||||
void texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) override;
|
||||
|
@ -515,6 +514,8 @@ private:
|
|||
void DeleteFontTextures() override;
|
||||
bool BeginFrame(bool mainWindow) override;
|
||||
|
||||
bool UseTFViaSSBO() const override { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
||||
|
||||
// drawcall emulation
|
||||
PipelineInfo* draw_createGraphicsPipeline(uint32 indexCount);
|
||||
PipelineInfo* draw_getOrCreateGraphicsPipeline(uint32 indexCount);
|
||||
|
@ -929,7 +930,6 @@ private:
|
|||
|
||||
public:
|
||||
bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; }
|
||||
bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
||||
bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; }
|
||||
bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; }
|
||||
|
||||
|
|
|
@ -462,9 +462,15 @@ void FileCache::_addFileInternal(uint64 name1, uint64 name2, const uint8* fileDa
|
|||
// write file data
|
||||
fileStream->SetPosition(this->dataOffset + currentStartOffset);
|
||||
fileStream->writeData(rawData, rawSize);
|
||||
#ifdef __APPLE__
|
||||
fileStream->Flush();
|
||||
#endif
|
||||
// write file table entry
|
||||
fileStream->SetPosition(this->dataOffset + this->fileTableOffset + (uint64)(sizeof(FileTableEntry)*entryIndex));
|
||||
fileStream->writeData(this->fileTableEntries + entryIndex, sizeof(FileTableEntry));
|
||||
#ifdef __APPLE__
|
||||
fileStream->Flush();
|
||||
#endif
|
||||
if (isCompressed)
|
||||
free(rawData);
|
||||
}
|
||||
|
|
|
@ -116,6 +116,11 @@ void FileStream::extract(std::vector<uint8>& data)
|
|||
readData(data.data(), fileSize);
|
||||
}
|
||||
|
||||
void FileStream::Flush()
|
||||
{
|
||||
m_fileStream.flush();
|
||||
}
|
||||
|
||||
uint32 FileStream::readData(void* data, uint32 length)
|
||||
{
|
||||
SyncReadWriteSeek(false);
|
||||
|
|
|
@ -22,6 +22,8 @@ class FileStream
|
|||
bool SetEndOfFile();
|
||||
void extract(std::vector<uint8>& data);
|
||||
|
||||
void Flush();
|
||||
|
||||
// reading
|
||||
uint32 readData(void* data, uint32 length);
|
||||
bool readU64(uint64& v);
|
||||
|
|
|
@ -213,7 +213,8 @@ void CemuConfig::Load(XMLConfigParser& parser)
|
|||
// graphics
|
||||
auto graphic = parser.get("Graphic");
|
||||
graphic_api = graphic.get("api", kOpenGL);
|
||||
graphic.get("device", graphic_device_uuid);
|
||||
graphic.get("vkDevice", vk_graphic_device_uuid);
|
||||
mtl_graphic_device_uuid = graphic.get("mtlDevice", 0);
|
||||
vsync = graphic.get("VSync", 0);
|
||||
gx2drawdone_sync = graphic.get("GX2DrawdoneSync", true);
|
||||
upscale_filter = graphic.get("UpscaleFilter", kBicubicHermiteFilter);
|
||||
|
@ -221,6 +222,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
|
|||
fullscreen_scaling = graphic.get("FullscreenScaling", kKeepAspectRatio);
|
||||
async_compile = graphic.get("AsyncCompile", async_compile);
|
||||
vk_accurate_barriers = graphic.get("vkAccurateBarriers", true); // this used to be "VulkanAccurateBarriers" but because we changed the default to true in 1.27.1 the option name had to be changed
|
||||
force_mesh_shaders = graphic.get("ForceMeshShaders", false);
|
||||
|
||||
auto overlay_node = graphic.get("Overlay");
|
||||
if(overlay_node.valid())
|
||||
|
@ -336,6 +338,8 @@ void CemuConfig::Load(XMLConfigParser& parser)
|
|||
crash_dump = debug.get("CrashDumpUnix", crash_dump);
|
||||
#endif
|
||||
gdb_port = debug.get("GDBPort", 1337);
|
||||
gpu_capture_dir = debug.get("GPUCaptureDir", "");
|
||||
framebuffer_fetch = debug.get("FramebufferFetch", true);
|
||||
|
||||
// input
|
||||
auto input = parser.get("Input");
|
||||
|
@ -470,9 +474,11 @@ void CemuConfig::Save(XMLConfigParser& parser)
|
|||
// graphics
|
||||
auto graphic = config.set("Graphic");
|
||||
graphic.set("api", graphic_api);
|
||||
graphic.set("device", graphic_device_uuid);
|
||||
graphic.set("vkDevice", vk_graphic_device_uuid);
|
||||
graphic.set("mtlDevice", mtl_graphic_device_uuid);
|
||||
graphic.set("VSync", vsync);
|
||||
graphic.set("GX2DrawdoneSync", gx2drawdone_sync);
|
||||
graphic.set("ForceMeshShaders", force_mesh_shaders);
|
||||
//graphic.set("PrecompiledShaders", precompiled_shaders.GetValue());
|
||||
graphic.set("UpscaleFilter", upscale_filter);
|
||||
graphic.set("DownscaleFilter", downscale_filter);
|
||||
|
@ -537,6 +543,8 @@ void CemuConfig::Save(XMLConfigParser& parser)
|
|||
debug.set("CrashDumpUnix", crash_dump.GetValue());
|
||||
#endif
|
||||
debug.set("GDBPort", gdb_port);
|
||||
debug.set("GPUCaptureDir", gpu_capture_dir);
|
||||
debug.set("FramebufferFetch", framebuffer_fetch);
|
||||
|
||||
// input
|
||||
auto input = config.set("Input");
|
||||
|
|
|
@ -74,6 +74,7 @@ enum GraphicAPI
|
|||
{
|
||||
kOpenGL = 0,
|
||||
kVulkan,
|
||||
kMetal,
|
||||
};
|
||||
|
||||
enum AudioChannels
|
||||
|
@ -123,6 +124,23 @@ enum class AccurateShaderMulOption
|
|||
};
|
||||
ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True);
|
||||
|
||||
enum class BufferCacheMode
|
||||
{
|
||||
Auto,
|
||||
DevicePrivate,
|
||||
DeviceShared,
|
||||
Host,
|
||||
};
|
||||
ENABLE_ENUM_ITERATORS(BufferCacheMode, BufferCacheMode::Auto, BufferCacheMode::Host);
|
||||
|
||||
enum class PositionInvariance
|
||||
{
|
||||
Auto,
|
||||
False,
|
||||
True,
|
||||
};
|
||||
ENABLE_ENUM_ITERATORS(PositionInvariance, PositionInvariance::False, PositionInvariance::True);
|
||||
|
||||
enum class CPUMode
|
||||
{
|
||||
SinglecoreInterpreter = 0,
|
||||
|
@ -221,6 +239,37 @@ struct fmt::formatter<const AccurateShaderMulOption> : formatter<string_view> {
|
|||
}
|
||||
};
|
||||
template <>
|
||||
struct fmt::formatter<const BufferCacheMode> : formatter<string_view> {
|
||||
template <typename FormatContext>
|
||||
auto format(const BufferCacheMode c, FormatContext &ctx) const {
|
||||
string_view name;
|
||||
switch (c)
|
||||
{
|
||||
case BufferCacheMode::Auto: name = "auto"; break;
|
||||
case BufferCacheMode::DevicePrivate: name = "device private"; break;
|
||||
case BufferCacheMode::DeviceShared: name = "device shared"; break;
|
||||
case BufferCacheMode::Host: name = "host"; break;
|
||||
default: name = "unknown"; break;
|
||||
}
|
||||
return formatter<string_view>::format(name, ctx);
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct fmt::formatter<const PositionInvariance> : formatter<string_view> {
|
||||
template <typename FormatContext>
|
||||
auto format(const PositionInvariance c, FormatContext &ctx) const {
|
||||
string_view name;
|
||||
switch (c)
|
||||
{
|
||||
case PositionInvariance::Auto: name = "auto"; break;
|
||||
case PositionInvariance::False: name = "false"; break;
|
||||
case PositionInvariance::True: name = "true"; break;
|
||||
default: name = "unknown"; break;
|
||||
}
|
||||
return formatter<string_view>::format(name, ctx);
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct fmt::formatter<const CPUMode> : formatter<string_view> {
|
||||
template <typename FormatContext>
|
||||
auto format(const CPUMode c, FormatContext &ctx) const {
|
||||
|
@ -439,11 +488,13 @@ struct CemuConfig
|
|||
|
||||
// graphics
|
||||
ConfigValue<GraphicAPI> graphic_api{ kVulkan };
|
||||
std::array<uint8, 16> graphic_device_uuid;
|
||||
ConfigValue<int> vsync{ 0 }; // 0 = off, 1+ = on depending on render backend
|
||||
std::array<uint8, 16> vk_graphic_device_uuid;
|
||||
uint64 mtl_graphic_device_uuid{ 0 };
|
||||
ConfigValue<int> vsync{ 0 }; // 0 = off, 1+ = depending on render backend
|
||||
ConfigValue<bool> gx2drawdone_sync { true };
|
||||
ConfigValue<bool> render_upside_down{ false };
|
||||
ConfigValue<bool> async_compile{ true };
|
||||
ConfigValue<bool> force_mesh_shaders{ false };
|
||||
|
||||
ConfigValue<bool> vk_accurate_barriers{ true };
|
||||
|
||||
|
@ -502,6 +553,8 @@ struct CemuConfig
|
|||
// debug
|
||||
ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled };
|
||||
ConfigValue<uint16> gdb_port{ 1337 };
|
||||
ConfigValue<std::string> gpu_capture_dir{ "" };
|
||||
ConfigValue<bool> framebuffer_fetch{ true };
|
||||
|
||||
void Load(XMLConfigParser& parser);
|
||||
void Save(XMLConfigParser& parser);
|
||||
|
@ -546,5 +599,3 @@ struct CemuConfig
|
|||
typedef XMLDataConfig<CemuConfig, &CemuConfig::Load, &CemuConfig::Save> XMLCemuConfig_t;
|
||||
extern XMLCemuConfig_t g_config;
|
||||
inline CemuConfig& GetConfig() { return g_config.data(); }
|
||||
|
||||
|
||||
|
|
|
@ -129,6 +129,13 @@ add_library(CemuGui
|
|||
wxHelper.h
|
||||
)
|
||||
|
||||
if(ENABLE_METAL)
|
||||
target_sources(CemuGui PRIVATE
|
||||
canvas/MetalCanvas.cpp
|
||||
canvas/MetalCanvas.h
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(TARGET CemuGui PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
|
||||
|
|
|
@ -383,7 +383,8 @@ void CemuApp::OnAssertFailure(const wxChar* file, int line, const wxChar* func,
|
|||
#if BOOST_OS_WINDOWS
|
||||
DumpThreadStackTrace();
|
||||
#endif
|
||||
cemu_assert_debug(false);
|
||||
// HACK
|
||||
//cemu_assert_debug(false);
|
||||
}
|
||||
|
||||
int CemuApp::FilterEvent(wxEvent& event)
|
||||
|
@ -567,5 +568,3 @@ void CemuApp::ActivateApp(wxActivateEvent& event)
|
|||
g_window_info.app_active = event.GetActive();
|
||||
event.Skip();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <wx/wupdlock.h>
|
||||
#include <wx/slider.h>
|
||||
|
||||
#include "config/CemuConfig.h"
|
||||
#include "gui/helpers/wxHelpers.h"
|
||||
#include "input/InputManager.h"
|
||||
|
||||
|
@ -112,7 +113,11 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
|
|||
|
||||
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
wxString gapi_values[] = { "", "OpenGL", "Vulkan" };
|
||||
wxString gapi_values[] = { "", "OpenGL", "Vulkan",
|
||||
#if ENABLE_METAL
|
||||
"Metal"
|
||||
#endif
|
||||
};
|
||||
m_graphic_api = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(gapi_values), gapi_values);
|
||||
first_row->Add(m_graphic_api, 0, wxALL, 5);
|
||||
|
||||
|
@ -123,6 +128,27 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
|
|||
m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true"));
|
||||
first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5);
|
||||
|
||||
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Fast math")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
wxString math_values[] = { _("false"), _("true") };
|
||||
m_fast_math = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(math_values), math_values);
|
||||
m_fast_math->SetToolTip(_("EXPERT OPTION\nEnables fast math for all shaders. May (rarely) cause graphical bugs.\n\nMetal only\n\nRecommended: true"));
|
||||
first_row->Add(m_fast_math, 0, wxALL, 5);
|
||||
|
||||
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Buffer cache mode")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
wxString cache_values[] = { _("auto"), _("device private"), _("device shared"), _("host") };
|
||||
m_buffer_cache_mode = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(cache_values), cache_values);
|
||||
m_buffer_cache_mode->SetToolTip(_("EXPERT OPTION\nDecides how the buffer cache memory will be managed.\n\nMetal only\n\nRecommended: auto"));
|
||||
first_row->Add(m_buffer_cache_mode, 0, wxALL, 5);
|
||||
|
||||
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Position invariance")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
wxString pos_values[] = { _("auto"), _("false"), _("true") };
|
||||
m_position_invariance = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(pos_values), pos_values);
|
||||
m_position_invariance->SetToolTip(_("EXPERT OPTION\nDisables most optimizations for vertex positions. May fix polygon cutouts or flickering in some games.\n\nMetal only\n\nRecommended: auto"));
|
||||
first_row->Add(m_position_invariance, 0, wxALL, 5);
|
||||
|
||||
/*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
wxString accuarcy_values[] = { _("high"), _("medium"), _("low") };
|
||||
m_cache_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(accuarcy_values), accuarcy_values);
|
||||
|
@ -267,8 +293,11 @@ void GameProfileWindow::ApplyProfile()
|
|||
if (!m_game_profile.m_graphics_api.has_value())
|
||||
m_graphic_api->SetSelection(0); // selecting ""
|
||||
else
|
||||
m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan
|
||||
m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan, Metal
|
||||
m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul);
|
||||
m_fast_math->SetSelection((int)m_game_profile.m_fastMath);
|
||||
m_buffer_cache_mode->SetSelection((int)m_game_profile.m_bufferCacheMode);
|
||||
m_position_invariance->SetSelection((int)m_game_profile.m_positionInvariance);
|
||||
|
||||
//// audio
|
||||
//m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio));
|
||||
|
@ -330,11 +359,14 @@ void GameProfileWindow::SaveProfile()
|
|||
m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection();
|
||||
if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True)
|
||||
m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value
|
||||
m_game_profile.m_fastMath = (bool)m_fast_math->GetSelection();
|
||||
m_game_profile.m_bufferCacheMode = (BufferCacheMode)m_buffer_cache_mode->GetSelection();
|
||||
m_game_profile.m_positionInvariance = (PositionInvariance)m_position_invariance->GetSelection();
|
||||
|
||||
if (m_graphic_api->GetSelection() == 0)
|
||||
m_game_profile.m_graphics_api = {};
|
||||
else
|
||||
m_game_profile.m_graphics_api = (GraphicAPI)(m_graphic_api->GetSelection() - 1); // "", OpenGL, Vulkan
|
||||
m_game_profile.m_graphics_api = (GraphicAPI)(m_graphic_api->GetSelection() - 1); // "", OpenGL, Vulkan, Metal
|
||||
|
||||
// controller
|
||||
for (int i = 0; i < 8; ++i)
|
||||
|
|
|
@ -40,6 +40,9 @@ private:
|
|||
wxChoice* m_graphic_api;
|
||||
|
||||
wxChoice* m_shader_mul_accuracy;
|
||||
wxChoice* m_fast_math;
|
||||
wxChoice* m_buffer_cache_mode;
|
||||
wxChoice* m_position_invariance;
|
||||
//wxChoice* m_cache_accuracy;
|
||||
|
||||
// audio
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <wx/collpane.h>
|
||||
#include <wx/clrpicker.h>
|
||||
#include <wx/cshelp.h>
|
||||
#include <wx/textctrl.h>
|
||||
#include <wx/textdlg.h>
|
||||
#include <wx/hyperlink.h>
|
||||
|
||||
|
@ -27,6 +28,9 @@
|
|||
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
#if ENABLE_METAL
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#endif
|
||||
#include "Cafe/Account/Account.h"
|
||||
|
||||
#include <boost/tokenizer.hpp>
|
||||
|
@ -93,6 +97,19 @@ private:
|
|||
VulkanRenderer::DeviceInfo m_device_info;
|
||||
};
|
||||
|
||||
#if ENABLE_METAL
|
||||
class wxMetalUUID : public wxClientData
|
||||
{
|
||||
public:
|
||||
wxMetalUUID(const MetalRenderer::DeviceInfo& info)
|
||||
: m_device_info(info) {}
|
||||
const MetalRenderer::DeviceInfo& GetDeviceInfo() const { return m_device_info; }
|
||||
|
||||
private:
|
||||
MetalRenderer::DeviceInfo m_device_info;
|
||||
};
|
||||
#endif
|
||||
|
||||
class wxAccountData : public wxClientData
|
||||
{
|
||||
public:
|
||||
|
@ -311,12 +328,14 @@ wxPanel* GeneralSettings2::AddGraphicsPage(wxNotebook* notebook)
|
|||
row->Add(new wxStaticText(box, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
sint32 api_size = 1;
|
||||
wxString choices[2] = { "OpenGL" };
|
||||
wxString choices[3] = { "OpenGL" };
|
||||
if (g_vulkan_available)
|
||||
{
|
||||
choices[1] = "Vulkan";
|
||||
api_size = 2;
|
||||
choices[api_size++] = "Vulkan";
|
||||
}
|
||||
#if ENABLE_METAL
|
||||
choices[api_size++] = "Metal";
|
||||
#endif
|
||||
|
||||
m_graphic_api = new wxChoice(box, wxID_ANY, wxDefaultPosition, wxDefaultSize, api_size, choices);
|
||||
m_graphic_api->SetSelection(0);
|
||||
|
@ -348,6 +367,10 @@ wxPanel* GeneralSettings2::AddGraphicsPage(wxNotebook* notebook)
|
|||
m_gx2drawdone_sync->SetToolTip(_("If synchronization is requested by the game, the emulated CPU will wait for the GPU to finish all operations.\nThis is more accurate behavior, but may cause lower performance"));
|
||||
graphic_misc_row->Add(m_gx2drawdone_sync, 0, wxALL, 5);
|
||||
|
||||
m_force_mesh_shaders = new wxCheckBox(box, wxID_ANY, _("Force mesh shaders"));
|
||||
m_force_mesh_shaders->SetToolTip(_("Force mesh shaders on all GPUs that support them. Mesh shaders are disabled by default on Intel GPUs due to potential stability issues"));
|
||||
graphic_misc_row->Add(m_force_mesh_shaders, 0, wxALL, 5);
|
||||
|
||||
box_sizer->Add(graphic_misc_row, 1, wxEXPAND, 5);
|
||||
graphics_panel_sizer->Add(box_sizer, 0, wxEXPAND | wxALL, 5);
|
||||
}
|
||||
|
@ -876,6 +899,33 @@ wxPanel* GeneralSettings2::AddDebugPage(wxNotebook* notebook)
|
|||
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
||||
}
|
||||
|
||||
{
|
||||
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
|
||||
debug_row->SetFlexibleDirection(wxBOTH);
|
||||
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
|
||||
|
||||
debug_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU capture save directory"), wxDefaultPosition, wxDefaultSize, 0), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
|
||||
|
||||
m_gpu_capture_dir = new wxTextCtrl(panel, wxID_ANY, wxEmptyString, wxDefaultPosition, wxDefaultSize, wxTE_DONTWRAP);
|
||||
m_gpu_capture_dir->SetMinSize(wxSize(150, -1));
|
||||
m_gpu_capture_dir->SetToolTip(_("Cemu will save the GPU captures done by selecting Debug -> GPU capture in the menu bar in this directory. If a debugger with support for GPU captures (like Xcode) is attached, the capture will be opened in that debugger instead. If such debugger is not attached, METAL_CAPTURE_ENABLED must be set to 1 as an environment variable."));
|
||||
|
||||
debug_row->Add(m_gpu_capture_dir, 0, wxALL | wxEXPAND, 5);
|
||||
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
||||
}
|
||||
|
||||
{
|
||||
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
|
||||
debug_row->SetFlexibleDirection(wxBOTH);
|
||||
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
|
||||
|
||||
m_framebuffer_fetch = new wxCheckBox(panel, wxID_ANY, _("Framebuffer fetch"));
|
||||
m_framebuffer_fetch->SetToolTip(_("Enable framebuffer fetch for eligible textures on supported devices."));
|
||||
|
||||
debug_row->Add(m_framebuffer_fetch, 0, wxALL | wxEXPAND, 5);
|
||||
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
||||
}
|
||||
|
||||
panel->SetSizerAndFit(debug_panel_sizer);
|
||||
|
||||
return panel;
|
||||
|
@ -946,7 +996,6 @@ void GeneralSettings2::StoreConfig()
|
|||
ScreenSaver::SetInhibit(config.disable_screensaver);
|
||||
}
|
||||
|
||||
|
||||
// -1 is default wx widget value -> set to dummy 0 so mainwindow and padwindow will update it
|
||||
config.window_position = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1};
|
||||
config.window_size = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1};
|
||||
|
@ -1025,20 +1074,37 @@ void GeneralSettings2::StoreConfig()
|
|||
config.graphic_api = (GraphicAPI)m_graphic_api->GetSelection();
|
||||
|
||||
selection = m_graphic_device->GetSelection();
|
||||
if (config.graphic_api == GraphicAPI::kVulkan)
|
||||
{
|
||||
if (selection != wxNOT_FOUND)
|
||||
{
|
||||
const auto* info = (wxVulkanUUID*)m_graphic_device->GetClientObject(selection);
|
||||
if (info)
|
||||
config.graphic_device_uuid = info->GetDeviceInfo().uuid;
|
||||
config.vk_graphic_device_uuid = info->GetDeviceInfo().uuid;
|
||||
else
|
||||
config.graphic_device_uuid = {};
|
||||
config.vk_graphic_device_uuid = {};
|
||||
}
|
||||
else
|
||||
config.graphic_device_uuid = {};
|
||||
config.vk_graphic_device_uuid = {};
|
||||
}
|
||||
else if (config.graphic_api == GraphicAPI::kMetal)
|
||||
{
|
||||
if (selection != wxNOT_FOUND)
|
||||
{
|
||||
const auto* info = (wxMetalUUID*)m_graphic_device->GetClientObject(selection);
|
||||
if (info)
|
||||
config.mtl_graphic_device_uuid = info->GetDeviceInfo().uuid;
|
||||
else
|
||||
config.mtl_graphic_device_uuid = {};
|
||||
}
|
||||
else
|
||||
config.mtl_graphic_device_uuid = {};
|
||||
}
|
||||
|
||||
|
||||
config.vsync = m_vsync->GetSelection();
|
||||
config.gx2drawdone_sync = m_gx2drawdone_sync->IsChecked();
|
||||
config.force_mesh_shaders = m_force_mesh_shaders->IsChecked();
|
||||
config.async_compile = m_async_compile->IsChecked();
|
||||
|
||||
config.upscale_filter = m_upscale_filter->GetSelection();
|
||||
|
@ -1071,6 +1137,8 @@ void GeneralSettings2::StoreConfig()
|
|||
// debug
|
||||
config.crash_dump = (CrashDump)m_crash_dump->GetSelection();
|
||||
config.gdb_port = m_gdb_port->GetValue();
|
||||
config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string();
|
||||
config.framebuffer_fetch = m_framebuffer_fetch->IsChecked();
|
||||
|
||||
g_config.Save();
|
||||
}
|
||||
|
@ -1517,12 +1585,14 @@ void GeneralSettings2::HandleGraphicsApiSelection()
|
|||
|
||||
m_gx2drawdone_sync->Enable();
|
||||
m_async_compile->Disable();
|
||||
m_force_mesh_shaders->Disable();
|
||||
}
|
||||
else
|
||||
else if (m_graphic_api->GetSelection() == 1)
|
||||
{
|
||||
// Vulkan
|
||||
m_gx2drawdone_sync->Disable();
|
||||
m_async_compile->Enable();
|
||||
m_force_mesh_shaders->Disable();
|
||||
|
||||
m_vsync->AppendString(_("Off"));
|
||||
m_vsync->AppendString(_("Double buffering"));
|
||||
|
@ -1547,7 +1617,7 @@ void GeneralSettings2::HandleGraphicsApiSelection()
|
|||
const auto& config = GetConfig();
|
||||
for(size_t i = 0; i < devices.size(); ++i)
|
||||
{
|
||||
if(config.graphic_device_uuid == devices[i].uuid)
|
||||
if(config.vk_graphic_device_uuid == devices[i].uuid)
|
||||
{
|
||||
m_graphic_device->SetSelection(i);
|
||||
break;
|
||||
|
@ -1555,6 +1625,42 @@ void GeneralSettings2::HandleGraphicsApiSelection()
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Metal
|
||||
m_gx2drawdone_sync->Disable();
|
||||
m_async_compile->Enable();
|
||||
m_force_mesh_shaders->Enable();
|
||||
|
||||
m_vsync->AppendString(_("Off"));
|
||||
m_vsync->AppendString(_("On"));
|
||||
|
||||
m_vsync->Select(selection);
|
||||
|
||||
m_graphic_device->Enable();
|
||||
auto devices = MetalRenderer::GetDevices();
|
||||
m_graphic_device->Clear();
|
||||
#if ENABLE_METAL
|
||||
if(!devices.empty())
|
||||
{
|
||||
for (const auto& device : devices)
|
||||
{
|
||||
m_graphic_device->Append(device.name, new wxMetalUUID(device));
|
||||
}
|
||||
m_graphic_device->SetSelection(0);
|
||||
|
||||
const auto& config = GetConfig();
|
||||
for (size_t i = 0; i < devices.size(); ++i)
|
||||
{
|
||||
if (config.mtl_graphic_device_uuid == devices[i].uuid)
|
||||
{
|
||||
m_graphic_device->SetSelection(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void GeneralSettings2::ApplyConfig()
|
||||
|
@ -1608,6 +1714,7 @@ void GeneralSettings2::ApplyConfig()
|
|||
m_vsync->SetSelection(config.vsync);
|
||||
m_async_compile->SetValue(config.async_compile);
|
||||
m_gx2drawdone_sync->SetValue(config.gx2drawdone_sync);
|
||||
m_force_mesh_shaders->SetValue(config.force_mesh_shaders);
|
||||
m_upscale_filter->SetSelection(config.upscale_filter);
|
||||
m_downscale_filter->SetSelection(config.downscale_filter);
|
||||
m_fullscreen_scaling->SetSelection(config.fullscreen_scaling);
|
||||
|
@ -1728,6 +1835,8 @@ void GeneralSettings2::ApplyConfig()
|
|||
// debug
|
||||
m_crash_dump->SetSelection((int)config.crash_dump.GetValue());
|
||||
m_gdb_port->SetValue(config.gdb_port.GetValue());
|
||||
m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue()));
|
||||
m_framebuffer_fetch->SetValue(config.framebuffer_fetch);
|
||||
}
|
||||
|
||||
void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event)
|
||||
|
|
|
@ -53,7 +53,7 @@ private:
|
|||
// Graphics
|
||||
wxChoice* m_graphic_api, * m_graphic_device;
|
||||
wxChoice* m_vsync;
|
||||
wxCheckBox *m_async_compile, *m_gx2drawdone_sync;
|
||||
wxCheckBox *m_async_compile, *m_gx2drawdone_sync, *m_force_mesh_shaders;
|
||||
wxRadioBox* m_upscale_filter, *m_downscale_filter, *m_fullscreen_scaling;
|
||||
wxChoice* m_overlay_position, *m_notification_position, *m_overlay_scale, *m_notification_scale;
|
||||
wxCheckBox* m_controller_profile_name, *m_controller_low_battery, *m_shader_compiling, *m_friends_data;
|
||||
|
@ -79,6 +79,8 @@ private:
|
|||
// Debug
|
||||
wxChoice* m_crash_dump;
|
||||
wxSpinCtrl* m_gdb_port;
|
||||
wxTextCtrl* m_gpu_capture_dir;
|
||||
wxCheckBox* m_framebuffer_fetch;
|
||||
|
||||
void OnAccountCreate(wxCommandEvent& event);
|
||||
void OnAccountDelete(wxCommandEvent& event);
|
||||
|
@ -114,4 +116,3 @@ private:
|
|||
void HandleGraphicsApiSelection();
|
||||
void ApplyConfig();
|
||||
};
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ LoggingWindow::LoggingWindow(wxFrame* parent)
|
|||
|
||||
filter_row->Add(new wxStaticText( this, wxID_ANY, _("Filter")), 0, wxALIGN_CENTER_VERTICAL|wxALL, 5 );
|
||||
|
||||
wxString choices[] = {"Unsupported APIs calls", "Coreinit Logging", "Coreinit File-Access", "Coreinit Thread-Synchronization", "Coreinit Memory", "Coreinit MP", "Coreinit Thread", "nn::nfp", "GX2", "Audio", "Input", "Socket", "Save", "H264", "Graphic pack patches", "Texture cache", "Texture readback", "OpenGL debug output", "Vulkan validation layer"};
|
||||
wxString choices[] = {"Unsupported APIs calls", "Coreinit Logging", "Coreinit File-Access", "Coreinit Thread-Synchronization", "Coreinit Memory", "Coreinit MP", "Coreinit Thread", "nn::nfp", "GX2", "Audio", "Input", "Socket", "Save", "H264", "Graphic pack patches", "Texture cache", "Texture readback", "OpenGL debug output", "Vulkan validation layer", "Metal debug output"};
|
||||
m_filter = new wxComboBox( this, wxID_ANY, wxEmptyString, wxDefaultPosition, wxDefaultSize, std::size(choices), choices, 0 );
|
||||
m_filter->Bind(wxEVT_COMBOBOX, &LoggingWindow::OnFilterChange, this);
|
||||
m_filter->Bind(wxEVT_TEXT, &LoggingWindow::OnFilterChange, this);
|
||||
|
@ -97,4 +97,3 @@ void LoggingWindow::OnFilterMessageChange(wxCommandEvent& event)
|
|||
m_log_list->SetFilterMessage(m_filter_message->GetValue());
|
||||
event.Skip();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "gui/wxgui.h"
|
||||
#include "gui/MainWindow.h"
|
||||
#include "gui/guiWrapper.h"
|
||||
|
@ -12,6 +14,7 @@
|
|||
#include "audio/audioDebuggerWindow.h"
|
||||
#include "gui/canvas/OpenGLCanvas.h"
|
||||
#include "gui/canvas/VulkanCanvas.h"
|
||||
#include "gui/canvas/MetalCanvas.h"
|
||||
#include "Cafe/OS/libs/nfc/nfc.h"
|
||||
#include "Cafe/OS/libs/swkbd/swkbd.h"
|
||||
#include "gui/debugger/DebuggerWindow2.h"
|
||||
|
@ -137,6 +140,7 @@ enum
|
|||
MAINFRAME_MENU_ID_DEBUG_VIEW_TEXTURE_RELATIONS,
|
||||
MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY,
|
||||
MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS,
|
||||
MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE,
|
||||
|
||||
// debug->logging
|
||||
MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500,
|
||||
|
@ -215,6 +219,7 @@ EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, MainWindow::OnDebugSetting)
|
|||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, MainWindow::OnDebugSetting)
|
||||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY, MainWindow::OnDebugSetting)
|
||||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, MainWindow::OnDebugSetting)
|
||||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE, MainWindow::OnDebugSetting)
|
||||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_RAM, MainWindow::OnDebugSetting)
|
||||
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_FST, MainWindow::OnDebugSetting)
|
||||
// debug -> View ...
|
||||
|
@ -1014,6 +1019,14 @@ void MainWindow::OnDebugSetting(wxCommandEvent& event)
|
|||
GetConfig().vk_accurate_barriers = event.IsChecked();
|
||||
if(!GetConfig().vk_accurate_barriers)
|
||||
wxMessageBox(_("Warning: Disabling the accurate barriers option will lead to flickering graphics but may improve performance. It is highly recommended to leave it turned on."), _("Accurate barriers are off"), wxOK);
|
||||
}
|
||||
else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE)
|
||||
{
|
||||
cemu_assert_debug(g_renderer->GetType() == RendererAPI::Metal);
|
||||
|
||||
#if ENABLE_METAL
|
||||
static_cast<MetalRenderer*>(g_renderer.get())->CaptureFrame();
|
||||
#endif
|
||||
}
|
||||
else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY)
|
||||
ActiveSettings::EnableAudioOnlyAux(event.IsChecked());
|
||||
|
@ -1572,8 +1585,12 @@ void MainWindow::CreateCanvas()
|
|||
// create canvas
|
||||
if (ActiveSettings::GetGraphicsAPI() == kVulkan)
|
||||
m_render_canvas = new VulkanCanvas(m_game_panel, wxSize(1280, 720), true);
|
||||
else
|
||||
else if (ActiveSettings::GetGraphicsAPI() == kOpenGL)
|
||||
m_render_canvas = GLCanvas_Create(m_game_panel, wxSize(1280, 720), true);
|
||||
#if ENABLE_METAL
|
||||
else
|
||||
m_render_canvas = new MetalCanvas(m_game_panel, wxSize(1280, 720), true);
|
||||
#endif
|
||||
|
||||
// mouse events
|
||||
m_render_canvas->Bind(wxEVT_MOTION, &MainWindow::OnMouseMove, this);
|
||||
|
@ -2256,6 +2273,9 @@ void MainWindow::RecreateMenu()
|
|||
auto accurateBarriers = debugMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, _("&Accurate barriers (Vulkan)"), wxEmptyString);
|
||||
accurateBarriers->Check(GetConfig().vk_accurate_barriers);
|
||||
|
||||
auto gpuCapture = debugMenu->Append(MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE, _("&GPU capture (Metal)"));
|
||||
gpuCapture->Enable(m_game_launched && g_renderer->GetType() == RendererAPI::Metal);
|
||||
|
||||
debugMenu->AppendSeparator();
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "Cafe/OS/libs/swkbd/swkbd.h"
|
||||
#include "gui/canvas/OpenGLCanvas.h"
|
||||
#include "gui/canvas/VulkanCanvas.h"
|
||||
#include "gui/canvas/MetalCanvas.h"
|
||||
#include "config/CemuConfig.h"
|
||||
#include "gui/MainWindow.h"
|
||||
#include "gui/helpers/wxHelpers.h"
|
||||
|
@ -74,8 +75,12 @@ void PadViewFrame::InitializeRenderCanvas()
|
|||
{
|
||||
if (ActiveSettings::GetGraphicsAPI() == kVulkan)
|
||||
m_render_canvas = new VulkanCanvas(this, wxSize(854, 480), false);
|
||||
else
|
||||
else if (ActiveSettings::GetGraphicsAPI() == kOpenGL)
|
||||
m_render_canvas = GLCanvas_Create(this, wxSize(854, 480), false);
|
||||
#if ENABLE_METAL
|
||||
else
|
||||
m_render_canvas = new MetalCanvas(this, wxSize(854, 480), false);
|
||||
#endif
|
||||
sizer->Add(m_render_canvas, 1, wxEXPAND, 0, nullptr);
|
||||
}
|
||||
SetSizer(sizer);
|
||||
|
|
62
src/gui/canvas/MetalCanvas.cpp
Normal file
62
src/gui/canvas/MetalCanvas.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#include "gui/canvas/MetalCanvas.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "gui/guiWrapper.h"
|
||||
|
||||
#include <wx/msgdlg.h>
|
||||
#include <helpers/wxHelpers.h>
|
||||
|
||||
MetalCanvas::MetalCanvas(wxWindow* parent, const wxSize& size, bool is_main_window)
|
||||
: IRenderCanvas(is_main_window), wxWindow(parent, wxID_ANY, wxDefaultPosition, size, wxNO_FULL_REPAINT_ON_RESIZE | wxWANTS_CHARS)
|
||||
{
|
||||
Bind(wxEVT_PAINT, &MetalCanvas::OnPaint, this);
|
||||
Bind(wxEVT_SIZE, &MetalCanvas::OnResize, this);
|
||||
|
||||
WindowHandleInfo& canvas = is_main_window ? gui_getWindowInfo().canvas_main : gui_getWindowInfo().canvas_pad;
|
||||
gui_initHandleContextFromWxWidgetsWindow(canvas, this);
|
||||
|
||||
try
|
||||
{
|
||||
if (is_main_window)
|
||||
g_renderer = std::make_unique<MetalRenderer>();
|
||||
|
||||
auto metal_renderer = MetalRenderer::GetInstance();
|
||||
metal_renderer->InitializeLayer({size.x, size.y}, is_main_window);
|
||||
}
|
||||
catch(const std::exception& ex)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Error when initializing Metal renderer: {}", ex.what());
|
||||
auto msg = formatWxString(_("Error when initializing Metal renderer:\n{}"), ex.what());
|
||||
wxMessageDialog dialog(this, msg, _("Error"), wxOK | wxCENTRE | wxICON_ERROR);
|
||||
dialog.ShowModal();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
wxWindow::EnableTouchEvents(wxTOUCH_PAN_GESTURES);
|
||||
}
|
||||
|
||||
MetalCanvas::~MetalCanvas()
|
||||
{
|
||||
Unbind(wxEVT_PAINT, &MetalCanvas::OnPaint, this);
|
||||
Unbind(wxEVT_SIZE, &MetalCanvas::OnResize, this);
|
||||
|
||||
MetalRenderer* mtlr = (MetalRenderer*)g_renderer.get();
|
||||
if (mtlr)
|
||||
mtlr->ShutdownLayer(m_is_main_window);
|
||||
}
|
||||
|
||||
void MetalCanvas::OnPaint(wxPaintEvent& event)
|
||||
{
|
||||
}
|
||||
|
||||
void MetalCanvas::OnResize(wxSizeEvent& event)
|
||||
{
|
||||
const wxSize size = GetSize();
|
||||
if (size.GetWidth() == 0 || size.GetHeight() == 0)
|
||||
return;
|
||||
|
||||
const wxRect refreshRect(size);
|
||||
RefreshRect(refreshRect, false);
|
||||
|
||||
auto metal_renderer = MetalRenderer::GetInstance();
|
||||
metal_renderer->ResizeLayer({size.x, size.y}, m_is_main_window);
|
||||
}
|
19
src/gui/canvas/MetalCanvas.h
Normal file
19
src/gui/canvas/MetalCanvas.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#include "gui/canvas/IRenderCanvas.h"
|
||||
|
||||
#include <wx/frame.h>
|
||||
|
||||
#include <set>
|
||||
|
||||
class MetalCanvas : public IRenderCanvas, public wxWindow
|
||||
{
|
||||
public:
|
||||
MetalCanvas(wxWindow* parent, const wxSize& size, bool is_main_window);
|
||||
~MetalCanvas();
|
||||
|
||||
private:
|
||||
|
||||
void OnPaint(wxPaintEvent& event);
|
||||
void OnResize(wxSizeEvent& event);
|
||||
};
|
|
@ -69,8 +69,11 @@ std::list<fs::path> _getCachesPaths(const TitleId& titleId)
|
|||
ActiveSettings::GetCachePath(L"shaderCache/driver/vk/{:016x}.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_spirv.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_gl.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_air.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_shaders.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId)};
|
||||
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlshaders.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId),
|
||||
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlpipeline.bin", titleId)};
|
||||
|
||||
cachePaths.remove_if(
|
||||
[](const fs::path& cachePath)
|
||||
|
|
|
@ -85,6 +85,9 @@ void gui_updateWindowTitles(bool isIdle, bool isLoading, double fps)
|
|||
case RendererAPI::Vulkan:
|
||||
renderer = "[Vulkan]";
|
||||
break;
|
||||
case RendererAPI::Metal:
|
||||
renderer = "[Metal]";
|
||||
break;
|
||||
default: ;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,15 @@ add_library(imguiImpl
|
|||
imgui_extension.h
|
||||
)
|
||||
|
||||
if (ENABLE_METAL)
|
||||
target_sources(imguiImpl PRIVATE
|
||||
imgui_impl_metal.mm
|
||||
imgui_impl_metal.h
|
||||
)
|
||||
|
||||
target_compile_definitions(imguiImpl PRIVATE IMGUI_IMPL_METAL_CPP)
|
||||
endif ()
|
||||
|
||||
set_property(TARGET imguiImpl PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
target_include_directories(imguiImpl PUBLIC "../")
|
||||
|
|
64
src/imgui/imgui_impl_metal.h
Normal file
64
src/imgui/imgui_impl_metal.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
// dear imgui: Renderer Backend for Metal
|
||||
// This needs to be used along with a Platform Backend (e.g. OSX)
|
||||
|
||||
// Implemented features:
|
||||
// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID!
|
||||
// [X] Renderer: Large meshes support (64k+ vertices) with 16-bit indices.
|
||||
|
||||
// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
|
||||
// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
|
||||
// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
|
||||
// Read online: https://github.com/ocornut/imgui/tree/master/docs
|
||||
|
||||
#include "imgui.h" // IMGUI_IMPL_API
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// ObjC API
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#ifdef __OBJC__
|
||||
|
||||
@class MTLRenderPassDescriptor;
|
||||
@protocol MTLDevice, MTLCommandBuffer, MTLRenderCommandEncoder;
|
||||
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_Init(id<MTLDevice> device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown();
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData,
|
||||
id<MTLCommandBuffer> commandBuffer,
|
||||
id<MTLRenderCommandEncoder> commandEncoder);
|
||||
|
||||
// Called by Init/NewFrame/Shutdown
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(id<MTLDevice> device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture();
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(id<MTLDevice> device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects();
|
||||
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// C++ API
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// Enable Metal C++ binding support with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file
|
||||
// More info about using Metal from C++: https://developer.apple.com/metal/cpp/
|
||||
|
||||
#ifdef IMGUI_IMPL_METAL_CPP
|
||||
#include <Metal/Metal.hpp>
|
||||
#ifndef __OBJC__
|
||||
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_Init(MTL::Device* device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_Shutdown();
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data,
|
||||
MTL::CommandBuffer* commandBuffer,
|
||||
MTL::RenderCommandEncoder* commandEncoder);
|
||||
|
||||
// Called by Init/NewFrame/Shutdown
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_DestroyFontsTexture();
|
||||
IMGUI_IMPL_API bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device);
|
||||
IMGUI_IMPL_API void ImGui_ImplMetal_DestroyDeviceObjects();
|
||||
|
||||
#endif
|
||||
#endif
|
575
src/imgui/imgui_impl_metal.mm
Normal file
575
src/imgui/imgui_impl_metal.mm
Normal file
|
@ -0,0 +1,575 @@
|
|||
// dear imgui: Renderer Backend for Metal
|
||||
// This needs to be used along with a Platform Backend (e.g. OSX)
|
||||
|
||||
// Implemented features:
|
||||
// [X] Renderer: User texture binding. Use 'MTLTexture' as ImTextureID. Read the FAQ about ImTextureID!
|
||||
// [X] Renderer: Large meshes support (64k+ vertices) with 16-bit indices.
|
||||
|
||||
// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
|
||||
// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
|
||||
// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
|
||||
// Read online: https://github.com/ocornut/imgui/tree/master/docs
|
||||
|
||||
// CHANGELOG
|
||||
// (minor and older changes stripped away, please see git history for details)
|
||||
// 2022-08-23: Metal: Update deprecated property 'sampleCount'->'rasterSampleCount'.
|
||||
// 2022-07-05: Metal: Add dispatch synchronization.
|
||||
// 2022-06-30: Metal: Use __bridge for ARC based systems.
|
||||
// 2022-06-01: Metal: Fixed null dereference on exit inside command buffer completion handler.
|
||||
// 2022-04-27: Misc: Store backend data in a per-context struct, allowing to use this backend with multiple contexts.
|
||||
// 2022-01-03: Metal: Ignore ImDrawCmd where ElemCount == 0 (very rare but can technically be manufactured by user code).
|
||||
// 2021-12-30: Metal: Added Metal C++ support. Enable with '#define IMGUI_IMPL_METAL_CPP' in your imconfig.h file.
|
||||
// 2021-08-24: Metal: Fixed a crash when clipping rect larger than framebuffer is submitted. (#4464)
|
||||
// 2021-05-19: Metal: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement)
|
||||
// 2021-02-18: Metal: Change blending equation to preserve alpha in output buffer.
|
||||
// 2021-01-25: Metal: Fixed texture storage mode when building on Mac Catalyst.
|
||||
// 2019-05-29: Metal: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag.
|
||||
// 2019-04-30: Metal: Added support for special ImDrawCallback_ResetRenderState callback to reset render state.
|
||||
// 2019-02-11: Metal: Projecting clipping rectangles correctly using draw_data->FramebufferScale to allow multi-viewports for retina display.
|
||||
// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window.
|
||||
// 2018-07-05: Metal: Added new Metal backend implementation.
|
||||
|
||||
#include "imgui.h"
|
||||
#include "imgui_impl_metal.h"
|
||||
#import <time.h>
|
||||
#import <Metal/Metal.h>
|
||||
|
||||
#pragma mark - Support classes
|
||||
|
||||
// A wrapper around a MTLBuffer object that knows the last time it was reused
|
||||
@interface MetalBuffer : NSObject
|
||||
@property (nonatomic, strong) id<MTLBuffer> buffer;
|
||||
@property (nonatomic, assign) double lastReuseTime;
|
||||
- (instancetype)initWithBuffer:(id<MTLBuffer>)buffer;
|
||||
@end
|
||||
|
||||
// An object that encapsulates the data necessary to uniquely identify a
|
||||
// render pipeline state. These are used as cache keys.
|
||||
@interface FramebufferDescriptor : NSObject<NSCopying>
|
||||
@property (nonatomic, assign) unsigned long sampleCount;
|
||||
@property (nonatomic, assign) MTLPixelFormat colorPixelFormat;
|
||||
@property (nonatomic, assign) MTLPixelFormat depthPixelFormat;
|
||||
@property (nonatomic, assign) MTLPixelFormat stencilPixelFormat;
|
||||
- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor;
|
||||
@end
|
||||
|
||||
// A singleton that stores long-lived objects that are needed by the Metal
|
||||
// renderer backend. Stores the render pipeline state cache and the default
|
||||
// font texture, and manages the reusable buffer cache.
|
||||
@interface MetalContext : NSObject
|
||||
@property (nonatomic, strong) id<MTLDevice> device;
|
||||
@property (nonatomic, strong) id<MTLDepthStencilState> depthStencilState;
|
||||
@property (nonatomic, strong) FramebufferDescriptor* framebufferDescriptor; // framebuffer descriptor for current frame; transient
|
||||
@property (nonatomic, strong) NSMutableDictionary* renderPipelineStateCache; // pipeline cache; keyed on framebuffer descriptors
|
||||
@property (nonatomic, strong, nullable) id<MTLTexture> fontTexture;
|
||||
@property (nonatomic, strong) NSMutableArray<MetalBuffer*>* bufferCache;
|
||||
@property (nonatomic, assign) double lastBufferCachePurge;
|
||||
- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id<MTLDevice>)device;
|
||||
- (id<MTLRenderPipelineState>)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id<MTLDevice>)device;
|
||||
@end
|
||||
|
||||
struct ImGui_ImplMetal_Data
|
||||
{
|
||||
MetalContext* SharedMetalContext;
|
||||
|
||||
ImGui_ImplMetal_Data() { memset(this, 0, sizeof(*this)); }
|
||||
};
|
||||
|
||||
static ImGui_ImplMetal_Data* ImGui_ImplMetal_CreateBackendData() { return IM_NEW(ImGui_ImplMetal_Data)(); }
|
||||
static ImGui_ImplMetal_Data* ImGui_ImplMetal_GetBackendData() { return ImGui::GetCurrentContext() ? (ImGui_ImplMetal_Data*)ImGui::GetIO().BackendRendererUserData : nullptr; }
|
||||
static void ImGui_ImplMetal_DestroyBackendData(){ IM_DELETE(ImGui_ImplMetal_GetBackendData()); }
|
||||
|
||||
static inline CFTimeInterval GetMachAbsoluteTimeInSeconds() { return (CFTimeInterval)(double)(clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / 1e9); }
|
||||
|
||||
#ifdef IMGUI_IMPL_METAL_CPP
|
||||
|
||||
#pragma mark - Dear ImGui Metal C++ Backend API
|
||||
|
||||
bool ImGui_ImplMetal_Init(MTL::Device* device)
|
||||
{
|
||||
return ImGui_ImplMetal_Init((__bridge id<MTLDevice>)(device));
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_NewFrame(MTL::RenderPassDescriptor* renderPassDescriptor)
|
||||
{
|
||||
ImGui_ImplMetal_NewFrame((__bridge MTLRenderPassDescriptor*)(renderPassDescriptor));
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_RenderDrawData(ImDrawData* draw_data,
|
||||
MTL::CommandBuffer* commandBuffer,
|
||||
MTL::RenderCommandEncoder* commandEncoder)
|
||||
{
|
||||
ImGui_ImplMetal_RenderDrawData(draw_data,
|
||||
(__bridge id<MTLCommandBuffer>)(commandBuffer),
|
||||
(__bridge id<MTLRenderCommandEncoder>)(commandEncoder));
|
||||
|
||||
}
|
||||
|
||||
bool ImGui_ImplMetal_CreateFontsTexture(MTL::Device* device)
|
||||
{
|
||||
return ImGui_ImplMetal_CreateFontsTexture((__bridge id<MTLDevice>)(device));
|
||||
}
|
||||
|
||||
bool ImGui_ImplMetal_CreateDeviceObjects(MTL::Device* device)
|
||||
{
|
||||
return ImGui_ImplMetal_CreateDeviceObjects((__bridge id<MTLDevice>)(device));
|
||||
}
|
||||
|
||||
#endif // #ifdef IMGUI_IMPL_METAL_CPP
|
||||
|
||||
#pragma mark - Dear ImGui Metal Backend API
|
||||
|
||||
bool ImGui_ImplMetal_Init(id<MTLDevice> device)
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_CreateBackendData();
|
||||
ImGuiIO& io = ImGui::GetIO();
|
||||
io.BackendRendererUserData = (void*)bd;
|
||||
io.BackendRendererName = "imgui_impl_metal";
|
||||
io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes.
|
||||
|
||||
bd->SharedMetalContext = [[MetalContext alloc] init];
|
||||
bd->SharedMetalContext.device = device;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_Shutdown()
|
||||
{
|
||||
ImGui_ImplMetal_DestroyDeviceObjects();
|
||||
ImGui_ImplMetal_DestroyBackendData();
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_NewFrame(MTLRenderPassDescriptor* renderPassDescriptor)
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
IM_ASSERT(bd->SharedMetalContext != nil && "No Metal context. Did you call ImGui_ImplMetal_Init() ?");
|
||||
bd->SharedMetalContext.framebufferDescriptor = [[FramebufferDescriptor alloc] initWithRenderPassDescriptor:renderPassDescriptor];
|
||||
|
||||
if (bd->SharedMetalContext.depthStencilState == nil)
|
||||
ImGui_ImplMetal_CreateDeviceObjects(bd->SharedMetalContext.device);
|
||||
}
|
||||
|
||||
static void ImGui_ImplMetal_SetupRenderState(ImDrawData* drawData, id<MTLCommandBuffer> commandBuffer,
|
||||
id<MTLRenderCommandEncoder> commandEncoder, id<MTLRenderPipelineState> renderPipelineState,
|
||||
MetalBuffer* vertexBuffer, size_t vertexBufferOffset)
|
||||
{
|
||||
IM_UNUSED(commandBuffer);
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
[commandEncoder setCullMode:MTLCullModeNone];
|
||||
[commandEncoder setDepthStencilState:bd->SharedMetalContext.depthStencilState];
|
||||
|
||||
// Setup viewport, orthographic projection matrix
|
||||
// Our visible imgui space lies from draw_data->DisplayPos (top left) to
|
||||
// draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin is typically (0,0) for single viewport apps.
|
||||
MTLViewport viewport =
|
||||
{
|
||||
.originX = 0.0,
|
||||
.originY = 0.0,
|
||||
.width = (double)(drawData->DisplaySize.x * drawData->FramebufferScale.x),
|
||||
.height = (double)(drawData->DisplaySize.y * drawData->FramebufferScale.y),
|
||||
.znear = 0.0,
|
||||
.zfar = 1.0
|
||||
};
|
||||
[commandEncoder setViewport:viewport];
|
||||
|
||||
float L = drawData->DisplayPos.x;
|
||||
float R = drawData->DisplayPos.x + drawData->DisplaySize.x;
|
||||
float T = drawData->DisplayPos.y;
|
||||
float B = drawData->DisplayPos.y + drawData->DisplaySize.y;
|
||||
float N = (float)viewport.znear;
|
||||
float F = (float)viewport.zfar;
|
||||
const float ortho_projection[4][4] =
|
||||
{
|
||||
{ 2.0f/(R-L), 0.0f, 0.0f, 0.0f },
|
||||
{ 0.0f, 2.0f/(T-B), 0.0f, 0.0f },
|
||||
{ 0.0f, 0.0f, 1/(F-N), 0.0f },
|
||||
{ (R+L)/(L-R), (T+B)/(B-T), N/(F-N), 1.0f },
|
||||
};
|
||||
[commandEncoder setVertexBytes:&ortho_projection length:sizeof(ortho_projection) atIndex:1];
|
||||
|
||||
[commandEncoder setRenderPipelineState:renderPipelineState];
|
||||
|
||||
[commandEncoder setVertexBuffer:vertexBuffer.buffer offset:0 atIndex:0];
|
||||
[commandEncoder setVertexBufferOffset:vertexBufferOffset atIndex:0];
|
||||
}
|
||||
|
||||
// Metal Render function.
|
||||
void ImGui_ImplMetal_RenderDrawData(ImDrawData* drawData, id<MTLCommandBuffer> commandBuffer, id<MTLRenderCommandEncoder> commandEncoder)
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
MetalContext* ctx = bd->SharedMetalContext;
|
||||
|
||||
// Avoid rendering when minimized, scale coordinates for retina displays (screen coordinates != framebuffer coordinates)
|
||||
int fb_width = (int)(drawData->DisplaySize.x * drawData->FramebufferScale.x);
|
||||
int fb_height = (int)(drawData->DisplaySize.y * drawData->FramebufferScale.y);
|
||||
if (fb_width <= 0 || fb_height <= 0 || drawData->CmdListsCount == 0)
|
||||
return;
|
||||
|
||||
// Try to retrieve a render pipeline state that is compatible with the framebuffer config for this frame
|
||||
// The hit rate for this cache should be very near 100%.
|
||||
id<MTLRenderPipelineState> renderPipelineState = ctx.renderPipelineStateCache[ctx.framebufferDescriptor];
|
||||
if (renderPipelineState == nil)
|
||||
{
|
||||
// No luck; make a new render pipeline state
|
||||
renderPipelineState = [ctx renderPipelineStateForFramebufferDescriptor:ctx.framebufferDescriptor device:commandBuffer.device];
|
||||
|
||||
// Cache render pipeline state for later reuse
|
||||
ctx.renderPipelineStateCache[ctx.framebufferDescriptor] = renderPipelineState;
|
||||
}
|
||||
|
||||
size_t vertexBufferLength = (size_t)drawData->TotalVtxCount * sizeof(ImDrawVert);
|
||||
size_t indexBufferLength = (size_t)drawData->TotalIdxCount * sizeof(ImDrawIdx);
|
||||
MetalBuffer* vertexBuffer = [ctx dequeueReusableBufferOfLength:vertexBufferLength device:commandBuffer.device];
|
||||
MetalBuffer* indexBuffer = [ctx dequeueReusableBufferOfLength:indexBufferLength device:commandBuffer.device];
|
||||
|
||||
ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, 0);
|
||||
|
||||
// Will project scissor/clipping rectangles into framebuffer space
|
||||
ImVec2 clip_off = drawData->DisplayPos; // (0,0) unless using multi-viewports
|
||||
ImVec2 clip_scale = drawData->FramebufferScale; // (1,1) unless using retina display which are often (2,2)
|
||||
|
||||
// Render command lists
|
||||
size_t vertexBufferOffset = 0;
|
||||
size_t indexBufferOffset = 0;
|
||||
for (int n = 0; n < drawData->CmdListsCount; n++)
|
||||
{
|
||||
const ImDrawList* cmd_list = drawData->CmdLists[n];
|
||||
|
||||
memcpy((char*)vertexBuffer.buffer.contents + vertexBufferOffset, cmd_list->VtxBuffer.Data, (size_t)cmd_list->VtxBuffer.Size * sizeof(ImDrawVert));
|
||||
memcpy((char*)indexBuffer.buffer.contents + indexBufferOffset, cmd_list->IdxBuffer.Data, (size_t)cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx));
|
||||
|
||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||
{
|
||||
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
|
||||
if (pcmd->UserCallback)
|
||||
{
|
||||
// User callback, registered via ImDrawList::AddCallback()
|
||||
// (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.)
|
||||
if (pcmd->UserCallback == ImDrawCallback_ResetRenderState)
|
||||
ImGui_ImplMetal_SetupRenderState(drawData, commandBuffer, commandEncoder, renderPipelineState, vertexBuffer, vertexBufferOffset);
|
||||
else
|
||||
pcmd->UserCallback(cmd_list, pcmd);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Project scissor/clipping rectangles into framebuffer space
|
||||
ImVec2 clip_min((pcmd->ClipRect.x - clip_off.x) * clip_scale.x, (pcmd->ClipRect.y - clip_off.y) * clip_scale.y);
|
||||
ImVec2 clip_max((pcmd->ClipRect.z - clip_off.x) * clip_scale.x, (pcmd->ClipRect.w - clip_off.y) * clip_scale.y);
|
||||
|
||||
// Clamp to viewport as setScissorRect() won't accept values that are off bounds
|
||||
if (clip_min.x < 0.0f) { clip_min.x = 0.0f; }
|
||||
if (clip_min.y < 0.0f) { clip_min.y = 0.0f; }
|
||||
if (clip_max.x > fb_width) { clip_max.x = (float)fb_width; }
|
||||
if (clip_max.y > fb_height) { clip_max.y = (float)fb_height; }
|
||||
if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y)
|
||||
continue;
|
||||
if (pcmd->ElemCount == 0) // drawIndexedPrimitives() validation doesn't accept this
|
||||
continue;
|
||||
|
||||
// Apply scissor/clipping rectangle
|
||||
MTLScissorRect scissorRect =
|
||||
{
|
||||
.x = NSUInteger(clip_min.x),
|
||||
.y = NSUInteger(clip_min.y),
|
||||
.width = NSUInteger(clip_max.x - clip_min.x),
|
||||
.height = NSUInteger(clip_max.y - clip_min.y)
|
||||
};
|
||||
[commandEncoder setScissorRect:scissorRect];
|
||||
|
||||
// Bind texture, Draw
|
||||
if (ImTextureID tex_id = pcmd->GetTexID())
|
||||
[commandEncoder setFragmentTexture:(__bridge id<MTLTexture>)(tex_id) atIndex:0];
|
||||
|
||||
[commandEncoder setVertexBufferOffset:(vertexBufferOffset + pcmd->VtxOffset * sizeof(ImDrawVert)) atIndex:0];
|
||||
[commandEncoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
|
||||
indexCount:pcmd->ElemCount
|
||||
indexType:sizeof(ImDrawIdx) == 2 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
|
||||
indexBuffer:indexBuffer.buffer
|
||||
indexBufferOffset:indexBufferOffset + pcmd->IdxOffset * sizeof(ImDrawIdx)];
|
||||
}
|
||||
}
|
||||
|
||||
vertexBufferOffset += (size_t)cmd_list->VtxBuffer.Size * sizeof(ImDrawVert);
|
||||
indexBufferOffset += (size_t)cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx);
|
||||
}
|
||||
|
||||
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>)
|
||||
{
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
if (bd != nullptr)
|
||||
{
|
||||
@synchronized(bd->SharedMetalContext.bufferCache)
|
||||
{
|
||||
[bd->SharedMetalContext.bufferCache addObject:vertexBuffer];
|
||||
[bd->SharedMetalContext.bufferCache addObject:indexBuffer];
|
||||
}
|
||||
}
|
||||
});
|
||||
}];
|
||||
}
|
||||
|
||||
bool ImGui_ImplMetal_CreateFontsTexture(id<MTLDevice> device)
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
ImGuiIO& io = ImGui::GetIO();
|
||||
|
||||
// We are retrieving and uploading the font atlas as a 4-channels RGBA texture here.
|
||||
// In theory we could call GetTexDataAsAlpha8() and upload a 1-channel texture to save on memory access bandwidth.
|
||||
// However, using a shader designed for 1-channel texture would make it less obvious to use the ImTextureID facility to render users own textures.
|
||||
// You can make that change in your implementation.
|
||||
unsigned char* pixels;
|
||||
int width, height;
|
||||
io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height);
|
||||
MTLTextureDescriptor* textureDescriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
|
||||
width:(NSUInteger)width
|
||||
height:(NSUInteger)height
|
||||
mipmapped:NO];
|
||||
textureDescriptor.usage = MTLTextureUsageShaderRead;
|
||||
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
|
||||
textureDescriptor.storageMode = MTLStorageModeManaged;
|
||||
#else
|
||||
textureDescriptor.storageMode = MTLStorageModeShared;
|
||||
#endif
|
||||
id <MTLTexture> texture = [device newTextureWithDescriptor:textureDescriptor];
|
||||
[texture replaceRegion:MTLRegionMake2D(0, 0, (NSUInteger)width, (NSUInteger)height) mipmapLevel:0 withBytes:pixels bytesPerRow:(NSUInteger)width * 4];
|
||||
bd->SharedMetalContext.fontTexture = texture;
|
||||
io.Fonts->SetTexID((__bridge void*)bd->SharedMetalContext.fontTexture); // ImTextureID == void*
|
||||
|
||||
return (bd->SharedMetalContext.fontTexture != nil);
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_DestroyFontsTexture()
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
ImGuiIO& io = ImGui::GetIO();
|
||||
bd->SharedMetalContext.fontTexture = nil;
|
||||
io.Fonts->SetTexID(nullptr);
|
||||
}
|
||||
|
||||
bool ImGui_ImplMetal_CreateDeviceObjects(id<MTLDevice> device)
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
MTLDepthStencilDescriptor* depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init];
|
||||
depthStencilDescriptor.depthWriteEnabled = NO;
|
||||
depthStencilDescriptor.depthCompareFunction = MTLCompareFunctionAlways;
|
||||
bd->SharedMetalContext.depthStencilState = [device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
|
||||
ImGui_ImplMetal_CreateFontsTexture(device);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ImGui_ImplMetal_DestroyDeviceObjects()
|
||||
{
|
||||
ImGui_ImplMetal_Data* bd = ImGui_ImplMetal_GetBackendData();
|
||||
ImGui_ImplMetal_DestroyFontsTexture();
|
||||
[bd->SharedMetalContext.renderPipelineStateCache removeAllObjects];
|
||||
}
|
||||
|
||||
#pragma mark - MetalBuffer implementation
|
||||
|
||||
@implementation MetalBuffer
|
||||
- (instancetype)initWithBuffer:(id<MTLBuffer>)buffer
|
||||
{
|
||||
if ((self = [super init]))
|
||||
{
|
||||
_buffer = buffer;
|
||||
_lastReuseTime = GetMachAbsoluteTimeInSeconds();
|
||||
}
|
||||
return self;
|
||||
}
|
||||
@end
|
||||
|
||||
#pragma mark - FramebufferDescriptor implementation
|
||||
|
||||
@implementation FramebufferDescriptor
|
||||
- (instancetype)initWithRenderPassDescriptor:(MTLRenderPassDescriptor*)renderPassDescriptor
|
||||
{
|
||||
if ((self = [super init]))
|
||||
{
|
||||
_sampleCount = renderPassDescriptor.colorAttachments[0].texture.sampleCount;
|
||||
_colorPixelFormat = renderPassDescriptor.colorAttachments[0].texture.pixelFormat;
|
||||
_depthPixelFormat = renderPassDescriptor.depthAttachment.texture.pixelFormat;
|
||||
_stencilPixelFormat = renderPassDescriptor.stencilAttachment.texture.pixelFormat;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (nonnull id)copyWithZone:(nullable NSZone*)zone
|
||||
{
|
||||
FramebufferDescriptor* copy = [[FramebufferDescriptor allocWithZone:zone] init];
|
||||
copy.sampleCount = self.sampleCount;
|
||||
copy.colorPixelFormat = self.colorPixelFormat;
|
||||
copy.depthPixelFormat = self.depthPixelFormat;
|
||||
copy.stencilPixelFormat = self.stencilPixelFormat;
|
||||
return copy;
|
||||
}
|
||||
|
||||
- (NSUInteger)hash
|
||||
{
|
||||
NSUInteger sc = _sampleCount & 0x3;
|
||||
NSUInteger cf = _colorPixelFormat & 0x3FF;
|
||||
NSUInteger df = _depthPixelFormat & 0x3FF;
|
||||
NSUInteger sf = _stencilPixelFormat & 0x3FF;
|
||||
NSUInteger hash = (sf << 22) | (df << 12) | (cf << 2) | sc;
|
||||
return hash;
|
||||
}
|
||||
|
||||
- (BOOL)isEqual:(id)object
|
||||
{
|
||||
FramebufferDescriptor* other = object;
|
||||
if (![other isKindOfClass:[FramebufferDescriptor class]])
|
||||
return NO;
|
||||
return other.sampleCount == self.sampleCount &&
|
||||
other.colorPixelFormat == self.colorPixelFormat &&
|
||||
other.depthPixelFormat == self.depthPixelFormat &&
|
||||
other.stencilPixelFormat == self.stencilPixelFormat;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
#pragma mark - MetalContext implementation
|
||||
|
||||
@implementation MetalContext
|
||||
- (instancetype)init
|
||||
{
|
||||
if ((self = [super init]))
|
||||
{
|
||||
self.renderPipelineStateCache = [NSMutableDictionary dictionary];
|
||||
self.bufferCache = [NSMutableArray array];
|
||||
_lastBufferCachePurge = GetMachAbsoluteTimeInSeconds();
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (MetalBuffer*)dequeueReusableBufferOfLength:(NSUInteger)length device:(id<MTLDevice>)device
|
||||
{
|
||||
uint64_t now = GetMachAbsoluteTimeInSeconds();
|
||||
|
||||
@synchronized(self.bufferCache)
|
||||
{
|
||||
// Purge old buffers that haven't been useful for a while
|
||||
if (now - self.lastBufferCachePurge > 1.0)
|
||||
{
|
||||
NSMutableArray* survivors = [NSMutableArray array];
|
||||
for (MetalBuffer* candidate in self.bufferCache)
|
||||
if (candidate.lastReuseTime > self.lastBufferCachePurge)
|
||||
[survivors addObject:candidate];
|
||||
self.bufferCache = [survivors mutableCopy];
|
||||
self.lastBufferCachePurge = now;
|
||||
}
|
||||
|
||||
// See if we have a buffer we can reuse
|
||||
MetalBuffer* bestCandidate = nil;
|
||||
for (MetalBuffer* candidate in self.bufferCache)
|
||||
if (candidate.buffer.length >= length && (bestCandidate == nil || bestCandidate.lastReuseTime > candidate.lastReuseTime))
|
||||
bestCandidate = candidate;
|
||||
|
||||
if (bestCandidate != nil)
|
||||
{
|
||||
[self.bufferCache removeObject:bestCandidate];
|
||||
bestCandidate.lastReuseTime = now;
|
||||
return bestCandidate;
|
||||
}
|
||||
}
|
||||
|
||||
// No luck; make a new buffer
|
||||
id<MTLBuffer> backing = [device newBufferWithLength:length options:MTLResourceStorageModeShared];
|
||||
return [[MetalBuffer alloc] initWithBuffer:backing];
|
||||
}
|
||||
|
||||
// Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling.
|
||||
- (id<MTLRenderPipelineState>)renderPipelineStateForFramebufferDescriptor:(FramebufferDescriptor*)descriptor device:(id<MTLDevice>)device
|
||||
{
|
||||
NSError* error = nil;
|
||||
|
||||
NSString* shaderSource = @""
|
||||
"#include <metal_stdlib>\n"
|
||||
"using namespace metal;\n"
|
||||
"\n"
|
||||
"struct Uniforms {\n"
|
||||
" float4x4 projectionMatrix;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct VertexIn {\n"
|
||||
" float2 position [[attribute(0)]];\n"
|
||||
" float2 texCoords [[attribute(1)]];\n"
|
||||
" uchar4 color [[attribute(2)]];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct VertexOut {\n"
|
||||
" float4 position [[position]];\n"
|
||||
" float2 texCoords;\n"
|
||||
" float4 color;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"vertex VertexOut vertex_main(VertexIn in [[stage_in]],\n"
|
||||
" constant Uniforms &uniforms [[buffer(1)]]) {\n"
|
||||
" VertexOut out;\n"
|
||||
" out.position = uniforms.projectionMatrix * float4(in.position, 0, 1);\n"
|
||||
" out.texCoords = in.texCoords;\n"
|
||||
" out.color = float4(in.color) / float4(255.0);\n"
|
||||
" return out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"fragment half4 fragment_main(VertexOut in [[stage_in]],\n"
|
||||
" texture2d<half, access::sample> texture [[texture(0)]]) {\n"
|
||||
" constexpr sampler linearSampler(coord::normalized, min_filter::linear, mag_filter::linear, mip_filter::linear);\n"
|
||||
" half4 texColor = texture.sample(linearSampler, in.texCoords);\n"
|
||||
" return half4(in.color) * texColor;\n"
|
||||
"}\n";
|
||||
|
||||
id<MTLLibrary> library = [device newLibraryWithSource:shaderSource options:nil error:&error];
|
||||
if (library == nil)
|
||||
{
|
||||
NSLog(@"Error: failed to create Metal library: %@", error);
|
||||
return nil;
|
||||
}
|
||||
|
||||
id<MTLFunction> vertexFunction = [library newFunctionWithName:@"vertex_main"];
|
||||
id<MTLFunction> fragmentFunction = [library newFunctionWithName:@"fragment_main"];
|
||||
|
||||
if (vertexFunction == nil || fragmentFunction == nil)
|
||||
{
|
||||
NSLog(@"Error: failed to find Metal shader functions in library: %@", error);
|
||||
return nil;
|
||||
}
|
||||
|
||||
MTLVertexDescriptor* vertexDescriptor = [MTLVertexDescriptor vertexDescriptor];
|
||||
vertexDescriptor.attributes[0].offset = IM_OFFSETOF(ImDrawVert, pos);
|
||||
vertexDescriptor.attributes[0].format = MTLVertexFormatFloat2; // position
|
||||
vertexDescriptor.attributes[0].bufferIndex = 0;
|
||||
vertexDescriptor.attributes[1].offset = IM_OFFSETOF(ImDrawVert, uv);
|
||||
vertexDescriptor.attributes[1].format = MTLVertexFormatFloat2; // texCoords
|
||||
vertexDescriptor.attributes[1].bufferIndex = 0;
|
||||
vertexDescriptor.attributes[2].offset = IM_OFFSETOF(ImDrawVert, col);
|
||||
vertexDescriptor.attributes[2].format = MTLVertexFormatUChar4; // color
|
||||
vertexDescriptor.attributes[2].bufferIndex = 0;
|
||||
vertexDescriptor.layouts[0].stepRate = 1;
|
||||
vertexDescriptor.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
|
||||
vertexDescriptor.layouts[0].stride = sizeof(ImDrawVert);
|
||||
|
||||
MTLRenderPipelineDescriptor* pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
|
||||
pipelineDescriptor.vertexFunction = vertexFunction;
|
||||
pipelineDescriptor.fragmentFunction = fragmentFunction;
|
||||
pipelineDescriptor.vertexDescriptor = vertexDescriptor;
|
||||
pipelineDescriptor.rasterSampleCount = self.framebufferDescriptor.sampleCount;
|
||||
pipelineDescriptor.colorAttachments[0].pixelFormat = self.framebufferDescriptor.colorPixelFormat;
|
||||
pipelineDescriptor.colorAttachments[0].blendingEnabled = YES;
|
||||
pipelineDescriptor.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd;
|
||||
pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha;
|
||||
pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
|
||||
pipelineDescriptor.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd;
|
||||
pipelineDescriptor.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne;
|
||||
pipelineDescriptor.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
|
||||
pipelineDescriptor.depthAttachmentPixelFormat = self.framebufferDescriptor.depthPixelFormat;
|
||||
pipelineDescriptor.stencilAttachmentPixelFormat = self.framebufferDescriptor.stencilPixelFormat;
|
||||
|
||||
id<MTLRenderPipelineState> renderPipelineState = [device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:&error];
|
||||
if (error != nil)
|
||||
NSLog(@"Error: failed to create Metal pipeline state: %@", error);
|
||||
|
||||
return renderPipelineState;
|
||||
}
|
||||
|
||||
@end
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue