This commit is contained in:
SamoZ256 2025-04-27 00:44:21 +08:00 committed by GitHub
commit f6bb885d00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
108 changed files with 14995 additions and 480 deletions

4
.gitmodules vendored
View file

@ -18,3 +18,7 @@
path = dependencies/imgui path = dependencies/imgui
url = https://github.com/ocornut/imgui url = https://github.com/ocornut/imgui
shallow = true shallow = true
[submodule "dependencies/metal-cpp"]
path = dependencies/metal-cpp
url = https://github.com/bkaradzic/metal-cpp.git
shallow = true

View file

@ -25,7 +25,7 @@ if (ENABLE_VCPKG)
OUTPUT_VARIABLE is_vcpkg_shallow OUTPUT_VARIABLE is_vcpkg_shallow
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_STRIP_TRAILING_WHITESPACE
) )
if(is_vcpkg_shallow STREQUAL "true") if(is_vcpkg_shallow STREQUAL "true")
message(STATUS "vcpkg is shallow. Unshallowing it now...") message(STATUS "vcpkg is shallow. Unshallowing it now...")
execute_process( execute_process(
@ -102,10 +102,20 @@ if (UNIX AND NOT APPLE)
option(ENABLE_BLUEZ "Build with Bluez support" ON) option(ENABLE_BLUEZ "Build with Bluez support" ON)
endif() endif()
if (APPLE)
set(ENABLE_METAL_DEFAULT ON)
else()
set(ENABLE_METAL_DEFAULT OFF)
endif()
option(ENABLE_OPENGL "Enables the OpenGL backend" ON) option(ENABLE_OPENGL "Enables the OpenGL backend" ON)
option(ENABLE_VULKAN "Enables the Vulkan backend" ON) option(ENABLE_VULKAN "Enables the Vulkan backend" ON)
option(ENABLE_METAL "Enables the Metal backend" ${ENABLE_METAL_DEFAULT})
option(ENABLE_DISCORD_RPC "Enables the Discord Rich Presence feature" ON) option(ENABLE_DISCORD_RPC "Enables the Discord Rich Presence feature" ON)
if (ENABLE_METAL AND NOT APPLE)
message(FATAL_ERROR "Metal backend is only supported on Apple platforms")
endif()
# input backends # input backends
if (WIN32) if (WIN32)
@ -180,6 +190,12 @@ if (ENABLE_OPENGL)
find_package(OpenGL REQUIRED) find_package(OpenGL REQUIRED)
endif() endif()
if (ENABLE_METAL)
include_directories(${CMAKE_SOURCE_DIR}/dependencies/metal-cpp)
add_definitions(-DENABLE_METAL=1)
endif()
if (ENABLE_DISCORD_RPC) if (ENABLE_DISCORD_RPC)
add_compile_definitions(ENABLE_DISCORD_RPC) add_compile_definitions(ENABLE_DISCORD_RPC)
add_subdirectory(dependencies/discord-rpc EXCLUDE_FROM_ALL) add_subdirectory(dependencies/discord-rpc EXCLUDE_FROM_ALL)
@ -206,7 +222,7 @@ endif()
if (ENABLE_CUBEB) if (ENABLE_CUBEB)
if (NOT ENABLE_VCPKG) if (NOT ENABLE_VCPKG)
find_package(cubeb) find_package(cubeb)
endif() endif()
if (NOT cubeb_FOUND) if (NOT cubeb_FOUND)
option(BUILD_TESTS "" OFF) option(BUILD_TESTS "" OFF)

1
dependencies/metal-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6

View file

@ -530,7 +530,70 @@ add_library(CemuCafe
) )
if(APPLE) if(APPLE)
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm") target_sources(CemuCafe PRIVATE
HW/Latte/Renderer/Vulkan/CocoaSurface.mm
HW/Latte/Renderer/MetalView.mm
HW/Latte/Renderer/MetalView.h
)
endif()
if(ENABLE_METAL)
# TODO: sort alphabetically
target_sources(CemuCafe PRIVATE
HW/Latte/Renderer/Metal/MetalRenderer.cpp
HW/Latte/Renderer/Metal/MetalRenderer.h
HW/Latte/Renderer/Metal/MetalCommon.h
HW/Latte/Renderer/Metal/MetalCppImpl.cpp
HW/Latte/Renderer/Metal/MetalLayer.mm
HW/Latte/Renderer/Metal/MetalLayer.h
HW/Latte/Renderer/Metal/MetalLayerHandle.cpp
HW/Latte/Renderer/Metal/MetalLayerHandle.h
HW/Latte/Renderer/Metal/LatteToMtl.cpp
HW/Latte/Renderer/Metal/LatteToMtl.h
HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
HW/Latte/Renderer/Metal/LatteTextureMtl.h
HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp
HW/Latte/Renderer/Metal/LatteTextureViewMtl.h
HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp
HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h
HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
HW/Latte/Renderer/Metal/RendererShaderMtl.h
HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
HW/Latte/Renderer/Metal/CachedFBOMtl.h
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
HW/Latte/Renderer/Metal/MetalMemoryManager.h
HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
HW/Latte/Renderer/Metal/MetalPipelineCache.h
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
HW/Latte/Renderer/Metal/MetalSamplerCache.cpp
HW/Latte/Renderer/Metal/MetalSamplerCache.h
HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.cpp
HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h
HW/Latte/Renderer/Metal/MetalQuery.cpp
HW/Latte/Renderer/Metal/MetalQuery.h
HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
HW/Latte/Renderer/Metal/UtilityShaderSource.h
)
target_sources(CemuCafe PRIVATE
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp
)
#target_link_libraries(CemuCafe PRIVATE
# "-framework Metal"
# "-framework QuartzCore"
#)
endif() endif()
set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")

View file

@ -252,7 +252,17 @@ void InfoLog_PrintActiveSettings()
if (ActiveSettings::GetGraphicsAPI() == GraphicAPI::kVulkan) if (ActiveSettings::GetGraphicsAPI() == GraphicAPI::kVulkan)
{ {
cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false"); cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false");
if(!GetConfig().vk_accurate_barriers.GetValue()) if (!GetConfig().vk_accurate_barriers.GetValue())
cemuLog_log(LogType::Force, "Accurate barriers are disabled!");
}
else if (ActiveSettings::GetGraphicsAPI() == GraphicAPI::kMetal)
{
cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false");
cemuLog_log(LogType::Force, "Force mesh shaders: {}", GetConfig().force_mesh_shaders.GetValue() ? "true" : "false");
cemuLog_log(LogType::Force, "Fast math: {}", g_current_game_profile->GetFastMath() ? "true" : "false");
cemuLog_log(LogType::Force, "Buffer cache type: {}", g_current_game_profile->GetBufferCacheMode());
cemuLog_log(LogType::Force, "Position invariance: {}", g_current_game_profile->GetPositionInvariance());
if (!GetConfig().vk_accurate_barriers.GetValue())
cemuLog_log(LogType::Force, "Accurate barriers are disabled!"); cemuLog_log(LogType::Force, "Accurate barriers are disabled!");
} }
cemuLog_log(LogType::Force, "Console language: {}", stdx::to_underlying(config.console_language.GetValue())); cemuLog_log(LogType::Force, "Console language: {}", stdx::to_underlying(config.console_language.GetValue()));
@ -1014,7 +1024,7 @@ namespace CafeSystem
{ {
// starting with Cemu 1.27.0 /vol/storage_mlc01/ is virtualized, meaning that it doesn't point to one singular host os folder anymore // starting with Cemu 1.27.0 /vol/storage_mlc01/ is virtualized, meaning that it doesn't point to one singular host os folder anymore
// instead it now uses a more complex solution to source titles with various formats (folder, wud, wua) from the game paths and host mlc path // instead it now uses a more complex solution to source titles with various formats (folder, wud, wua) from the game paths and host mlc path
// todo - mount /vol/storage_mlc01/ with base priority to the host mlc? // todo - mount /vol/storage_mlc01/ with base priority to the host mlc?
// since mounting titles is an expensive operation we have to avoid mounting all titles at once // since mounting titles is an expensive operation we have to avoid mounting all titles at once

View file

@ -127,7 +127,7 @@ bool gameProfile_loadIntegerOption(IniParser& iniParser, const char* optionName,
{ {
cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName);
return false; return false;
} }
} }
template<typename T> template<typename T>
@ -224,8 +224,11 @@ bool GameProfile::Load(uint64_t title_id)
gameProfile_loadIntegerOption(&iniParser, "graphics_api", &graphicsApi, -1, 0, 1); gameProfile_loadIntegerOption(&iniParser, "graphics_api", &graphicsApi, -1, 0, 1);
if (graphicsApi.value != -1) if (graphicsApi.value != -1)
m_graphics_api = (GraphicAPI)graphicsApi.value; m_graphics_api = (GraphicAPI)graphicsApi.value;
gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul); gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul);
gameProfile_loadBooleanOption2(iniParser, "fastMath", m_fastMath);
gameProfile_loadEnumOption(iniParser, "bufferCacheMode2", m_bufferCacheMode);
gameProfile_loadEnumOption(iniParser, "positionInvariance2", m_positionInvariance);
// legacy support // legacy support
auto option_precompiledShaders = iniParser.FindOption("precompiledShaders"); auto option_precompiledShaders = iniParser.FindOption("precompiledShaders");
@ -277,7 +280,7 @@ bool GameProfile::Load(uint64_t title_id)
void GameProfile::Save(uint64_t title_id) void GameProfile::Save(uint64_t title_id)
{ {
auto gameProfileDir = ActiveSettings::GetConfigPath("gameProfiles"); auto gameProfileDir = ActiveSettings::GetConfigPath("gameProfiles");
if (std::error_code ex_ec; !fs::exists(gameProfileDir, ex_ec)) if (std::error_code ex_ec; !fs::exists(gameProfileDir, ex_ec))
fs::create_directories(gameProfileDir, ex_ec); fs::create_directories(gameProfileDir, ex_ec);
auto gameProfilePath = gameProfileDir / fmt::format("{:016x}.ini", title_id); auto gameProfilePath = gameProfileDir / fmt::format("{:016x}.ini", title_id);
FileStream* fs = FileStream::createFile2(gameProfilePath); FileStream* fs = FileStream::createFile2(gameProfilePath);
@ -292,22 +295,23 @@ void GameProfile::Save(uint64_t title_id)
#define WRITE_OPTIONAL_ENTRY(__NAME) if (m_##__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME.value()).c_str()); #define WRITE_OPTIONAL_ENTRY(__NAME) if (m_##__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME.value()).c_str());
#define WRITE_ENTRY(__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME).c_str()); #define WRITE_ENTRY(__NAME) fs->writeLine(fmt::format("{} = {}", #__NAME, m_##__NAME).c_str());
#define WRITE_ENTRY_NUMBERED(__NAME, __NUM) fs->writeLine(fmt::format("{} = {}", #__NAME #__NUM, m_##__NAME).c_str());
fs->writeLine("[General]"); fs->writeLine("[General]");
WRITE_OPTIONAL_ENTRY(loadSharedLibraries); WRITE_OPTIONAL_ENTRY(loadSharedLibraries);
WRITE_ENTRY(startWithPadView); WRITE_ENTRY(startWithPadView);
fs->writeLine(""); fs->writeLine("");
fs->writeLine("[CPU]"); fs->writeLine("[CPU]");
WRITE_OPTIONAL_ENTRY(cpuMode); WRITE_OPTIONAL_ENTRY(cpuMode);
WRITE_ENTRY(threadQuantum); WRITE_ENTRY(threadQuantum);
fs->writeLine(""); fs->writeLine("");
fs->writeLine("[Graphics]"); fs->writeLine("[Graphics]");
WRITE_ENTRY(accurateShaderMul); WRITE_ENTRY(accurateShaderMul);
WRITE_ENTRY(fastMath);
WRITE_ENTRY_NUMBERED(bufferCacheMode, 2);
WRITE_ENTRY_NUMBERED(positionInvariance, 2);
WRITE_OPTIONAL_ENTRY(precompiledShaders); WRITE_OPTIONAL_ENTRY(precompiledShaders);
WRITE_OPTIONAL_ENTRY(graphics_api); WRITE_OPTIONAL_ENTRY(graphics_api);
fs->writeLine(""); fs->writeLine("");
@ -323,6 +327,7 @@ void GameProfile::Save(uint64_t title_id)
#undef WRITE_OPTIONAL_ENTRY #undef WRITE_OPTIONAL_ENTRY
#undef WRITE_ENTRY #undef WRITE_ENTRY
#undef WRITE_ENTRY_NUMBERED
delete fs; delete fs;
} }
@ -337,6 +342,9 @@ void GameProfile::ResetOptional()
// graphic settings // graphic settings
m_accurateShaderMul = AccurateShaderMulOption::True; m_accurateShaderMul = AccurateShaderMulOption::True;
m_fastMath = true;
m_bufferCacheMode = BufferCacheMode::Auto;
m_positionInvariance = PositionInvariance::Auto;
// cpu settings // cpu settings
m_threadQuantum = kThreadQuantumDefault; m_threadQuantum = kThreadQuantumDefault;
m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler; m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler;
@ -354,9 +362,12 @@ void GameProfile::Reset()
// general settings // general settings
m_loadSharedLibraries = true; m_loadSharedLibraries = true;
m_startWithPadView = false; m_startWithPadView = false;
// graphic settings // graphic settings
m_accurateShaderMul = AccurateShaderMulOption::True; m_accurateShaderMul = AccurateShaderMulOption::True;
m_fastMath = true;
m_bufferCacheMode = BufferCacheMode::Auto;
m_positionInvariance = PositionInvariance::Auto;
m_precompiledShaders = PrecompiledShaderOption::Auto; m_precompiledShaders = PrecompiledShaderOption::Auto;
// cpu settings // cpu settings
m_threadQuantum = kThreadQuantumDefault; m_threadQuantum = kThreadQuantumDefault;
@ -366,4 +377,4 @@ void GameProfile::Reset()
// controller settings // controller settings
for (auto& profile : m_controllerProfile) for (auto& profile : m_controllerProfile)
profile.reset(); profile.reset();
} }

View file

@ -31,6 +31,9 @@ public:
[[nodiscard]] const std::optional<GraphicAPI>& GetGraphicsAPI() const { return m_graphics_api; } [[nodiscard]] const std::optional<GraphicAPI>& GetGraphicsAPI() const { return m_graphics_api; }
[[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; } [[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; }
[[nodiscard]] bool GetFastMath() const { return m_fastMath; }
[[nodiscard]] BufferCacheMode GetBufferCacheMode() const { return m_bufferCacheMode; }
[[nodiscard]] PositionInvariance GetPositionInvariance() const { return m_positionInvariance; }
[[nodiscard]] const std::optional<PrecompiledShaderOption>& GetPrecompiledShadersState() const { return m_precompiledShaders; } [[nodiscard]] const std::optional<PrecompiledShaderOption>& GetPrecompiledShadersState() const { return m_precompiledShaders; }
[[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; } [[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; }
@ -54,6 +57,9 @@ private:
// graphic settings // graphic settings
std::optional<GraphicAPI> m_graphics_api{}; std::optional<GraphicAPI> m_graphics_api{};
AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True; AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True;
bool m_fastMath = true;
BufferCacheMode m_bufferCacheMode = BufferCacheMode::Auto;
PositionInvariance m_positionInvariance = PositionInvariance::Auto;
std::optional<PrecompiledShaderOption> m_precompiledShaders{}; std::optional<PrecompiledShaderOption> m_precompiledShaders{};
// cpu settings // cpu settings
uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000 uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000

View file

@ -109,7 +109,7 @@ bool GraphicPack2::LoadGraphicPack(const fs::path& rulesPath, IniParser& rules)
gp->SetActivePreset(kv.first, kv.second, false); gp->SetActivePreset(kv.first, kv.second, false);
} }
gp->SetEnabled(enabled); gp->SetEnabled(enabled);
} }
@ -141,7 +141,7 @@ bool GraphicPack2::DeactivateGraphicPack(const std::shared_ptr<GraphicPack2>& gr
if (!graphic_pack->IsActivated()) if (!graphic_pack->IsActivated())
return false; return false;
const auto it = std::find_if(s_active_graphic_packs.begin(), s_active_graphic_packs.end(), const auto it = std::find_if(s_active_graphic_packs.begin(), s_active_graphic_packs.end(),
[graphic_pack](const GraphicPackPtr& gp) [graphic_pack](const GraphicPackPtr& gp)
{ {
return gp->GetNormalizedPathString() == graphic_pack->GetNormalizedPathString(); return gp->GetNormalizedPathString() == graphic_pack->GetNormalizedPathString();
@ -269,6 +269,8 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
m_renderer_api = RendererAPI::Vulkan; m_renderer_api = RendererAPI::Vulkan;
else if (boost::iequals(*option_rendererFilter, "opengl")) else if (boost::iequals(*option_rendererFilter, "opengl"))
m_renderer_api = RendererAPI::OpenGL; m_renderer_api = RendererAPI::OpenGL;
else if (boost::iequals(*option_rendererFilter, "metal"))
m_renderer_api = RendererAPI::Metal;
else else
cemuLog_log(LogType::Force, "Unknown value '{}' for rendererFilter option", *option_rendererFilter); cemuLog_log(LogType::Force, "Unknown value '{}' for rendererFilter option", *option_rendererFilter);
} }
@ -348,7 +350,7 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
cemuLog_log(LogType::Force, "Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", GetNormalizedPathString(), rules.GetCurrentSectionLineNumber()); cemuLog_log(LogType::Force, "Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", GetNormalizedPathString(), rules.GetCurrentSectionLineNumber());
continue; continue;
} }
const auto category = rules.FindOption("category"); const auto category = rules.FindOption("category");
const auto condition = rules.FindOption("condition"); const auto condition = rules.FindOption("condition");
const auto default_selected = rules.FindOption("default"); const auto default_selected = rules.FindOption("default");
@ -420,13 +422,13 @@ GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules)
{ {
// store by category // store by category
std::unordered_map<std::string, std::vector<PresetPtr>> tmp_map; std::unordered_map<std::string, std::vector<PresetPtr>> tmp_map;
// all vars must be defined in the default preset vars before // all vars must be defined in the default preset vars before
std::vector<std::pair<std::string, std::string>> mismatchingPresetVars; std::vector<std::pair<std::string, std::string>> mismatchingPresetVars;
for (const auto& presetEntry : m_presets) for (const auto& presetEntry : m_presets)
{ {
tmp_map[presetEntry->category].emplace_back(presetEntry); tmp_map[presetEntry->category].emplace_back(presetEntry);
for (auto& presetVar : presetEntry->variables) for (auto& presetVar : presetEntry->variables)
{ {
const auto it = m_preset_vars.find(presetVar.first); const auto it = m_preset_vars.find(presetVar.first);
@ -568,7 +570,7 @@ void GraphicPack2::ValidatePresetSelections()
// //
// example: a preset category might be hidden entirely (e.g. due to a separate advanced options dropdown) // example: a preset category might be hidden entirely (e.g. due to a separate advanced options dropdown)
// how to handle: leave the previously selected preset // how to handle: leave the previously selected preset
// //
// the logic is therefore as follows: // the logic is therefore as follows:
// if there is a preset category with at least 1 visible preset entry then make sure one of those is actually selected // if there is a preset category with at least 1 visible preset entry then make sure one of those is actually selected
// for completely hidden preset categories we leave the selection as-is // for completely hidden preset categories we leave the selection as-is
@ -632,17 +634,17 @@ bool GraphicPack2::SetActivePreset(std::string_view category, std::string_view n
// disable currently active preset // disable currently active preset
std::for_each(m_presets.begin(), m_presets.end(), [category](PresetPtr& p) std::for_each(m_presets.begin(), m_presets.end(), [category](PresetPtr& p)
{ {
if(p->category == category) if(p->category == category)
p->active = false; p->active = false;
}); });
if (name.empty()) if (name.empty())
return true; return true;
// enable new preset // enable new preset
const auto it = std::find_if(m_presets.cbegin(), m_presets.cend(), [category, name](const PresetPtr& preset) const auto it = std::find_if(m_presets.cbegin(), m_presets.cend(), [category, name](const PresetPtr& preset)
{ {
return preset->category == category && preset->name == name; return preset->category == category && preset->name == name;
}); });
bool result; bool result;
@ -681,12 +683,14 @@ void GraphicPack2::LoadShaders()
wchar_t shader_type[256]{}; wchar_t shader_type[256]{};
if (filename.size() < 256 && swscanf(filename.c_str(), L"%" SCNx64 "_%" SCNx64 "_%ls", &shader_base_hash, &shader_aux_hash, shader_type) == 3) if (filename.size() < 256 && swscanf(filename.c_str(), L"%" SCNx64 "_%" SCNx64 "_%ls", &shader_base_hash, &shader_aux_hash, shader_type) == 3)
{ {
bool isMetalShader = (shader_type[2] == '_' && shader_type[3] == 'm' && shader_type[4] == 's' && shader_type[5] == 'l');
if (shader_type[0] == 'p' && shader_type[1] == 's') if (shader_type[0] == 'p' && shader_type[1] == 's')
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::PIXEL)); m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::PIXEL, isMetalShader));
else if (shader_type[0] == 'v' && shader_type[1] == 's') else if (shader_type[0] == 'v' && shader_type[1] == 's')
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::VERTEX)); m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::VERTEX, isMetalShader));
else if (shader_type[0] == 'g' && shader_type[1] == 's') else if (shader_type[0] == 'g' && shader_type[1] == 's')
m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::GEOMETRY)); m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::GEOMETRY, isMetalShader));
} }
else if (filename == L"output.glsl") else if (filename == L"output.glsl")
{ {
@ -783,7 +787,7 @@ std::optional<GraphicPack2::PresetVar> GraphicPack2::GetPresetVariable(const std
return it->second; return it->second;
} }
} }
for (const auto& preset : presets) for (const auto& preset : presets)
{ {
if (!preset->visible) if (!preset->visible)
@ -793,7 +797,7 @@ std::optional<GraphicPack2::PresetVar> GraphicPack2::GetPresetVariable(const std
return it->second; return it->second;
} }
} }
const auto it = std::find_if(m_preset_vars.cbegin(), m_preset_vars.cend(), [&var_name](auto p) { return p.first == var_name; }); const auto it = std::find_if(m_preset_vars.cbegin(), m_preset_vars.cend(), [&var_name](auto p) { return p.first == var_name; });
if (it != m_preset_vars.cend()) if (it != m_preset_vars.cend())
{ {
@ -839,7 +843,7 @@ void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC
virtualMountPath = fs::path("vol/content/") / virtualMountPath; virtualMountPath = fs::path("vol/content/") / virtualMountPath;
} }
fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority); fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority);
} }
} }
} }
@ -859,7 +863,7 @@ void GraphicPack2::LoadReplacedFiles()
std::error_code ec; std::error_code ec;
if (fs::exists(contentPath, ec)) if (fs::exists(contentPath, ec))
{ {
// setup redirections // setup redirections
fscDeviceRedirect_map(); fscDeviceRedirect_map();
_iterateReplacedFiles(contentPath, false); _iterateReplacedFiles(contentPath, false);
} }
@ -872,7 +876,7 @@ void GraphicPack2::LoadReplacedFiles()
uint64 aocTitleId = CafeSystem::GetForegroundTitleId(); uint64 aocTitleId = CafeSystem::GetForegroundTitleId();
aocTitleId = aocTitleId & 0xFFFFFFFFULL; aocTitleId = aocTitleId & 0xFFFFFFFFULL;
aocTitleId |= 0x0005000c00000000ULL; aocTitleId |= 0x0005000c00000000ULL;
// setup redirections // setup redirections
fscDeviceRedirect_map(); fscDeviceRedirect_map();
_iterateReplacedFiles(aocPath, true); _iterateReplacedFiles(aocPath, true);
} }
@ -988,7 +992,7 @@ bool GraphicPack2::Activate()
// enable patch groups // enable patch groups
EnablePatches(); EnablePatches();
// load replaced files // load replaced files
LoadReplacedFiles(); LoadReplacedFiles();
@ -1034,7 +1038,7 @@ bool GraphicPack2::Deactivate()
m_output_shader_source.clear(); m_output_shader_source.clear();
m_upscaling_shader_source.clear(); m_upscaling_shader_source.clear();
m_downscaling_shader_source.clear(); m_downscaling_shader_source.clear();
if (HasCustomVSyncFrequency()) if (HasCustomVSyncFrequency())
{ {
m_vsync_frequency = -1; m_vsync_frequency = -1;
@ -1045,7 +1049,7 @@ bool GraphicPack2::Deactivate()
return true; return true;
} }
const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer) const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer, bool isMetalRenderer)
{ {
for (const auto& gp : GraphicPack2::GetActiveGraphicPacks()) for (const auto& gp : GraphicPack2::GetActiveGraphicPacks())
{ {
@ -1055,9 +1059,12 @@ const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, u
if (it == gp->m_custom_shaders.end()) if (it == gp->m_custom_shaders.end())
continue; continue;
if(isVulkanRenderer && (*it).isPreVulkanShader) if (isVulkanRenderer && (*it).isPreVulkanShader)
continue; continue;
if (isMetalRenderer != (*it).isMetalShader)
continue;
return &it->source; return &it->source;
} }
return nullptr; return nullptr;
@ -1066,7 +1073,7 @@ const std::string* GraphicPack2::FindCustomShaderSource(uint64 shaderBaseHash, u
std::unordered_map<std::string, std::vector<GraphicPack2::PresetPtr>> GraphicPack2::GetCategorizedPresets(std::vector<std::string>& order) const std::unordered_map<std::string, std::vector<GraphicPack2::PresetPtr>> GraphicPack2::GetCategorizedPresets(std::vector<std::string>& order) const
{ {
order.clear(); order.clear();
std::unordered_map<std::string, std::vector<PresetPtr>> result; std::unordered_map<std::string, std::vector<PresetPtr>> result;
for(const auto& entry : m_presets) for(const auto& entry : m_presets)
{ {
@ -1075,13 +1082,13 @@ std::unordered_map<std::string, std::vector<GraphicPack2::PresetPtr>> GraphicPac
if (it == order.cend()) if (it == order.cend())
order.emplace_back(entry->category); order.emplace_back(entry->category);
} }
return result; return result;
} }
bool GraphicPack2::HasShaders() const bool GraphicPack2::HasShaders() const
{ {
return !GetCustomShaders().empty() return !GetCustomShaders().empty()
|| !m_output_shader_source.empty() || !m_upscaling_shader_source.empty() || !m_downscaling_shader_source.empty(); || !m_output_shader_source.empty() || !m_upscaling_shader_source.empty() || !m_downscaling_shader_source.empty();
} }
@ -1215,7 +1222,7 @@ void GraphicPack2::ApplyShaderPresets(std::string& shader_source) const
} }
} }
GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type, bool isMetalShader) const
{ {
CustomShader shader; CustomShader shader;
@ -1234,6 +1241,7 @@ GraphicPack2::CustomShader GraphicPack2::LoadShader(const fs::path& path, uint64
shader.shader_aux_hash = shader_aux_hash; shader.shader_aux_hash = shader_aux_hash;
shader.type = shader_type; shader.type = shader_type;
shader.isPreVulkanShader = this->m_version <= 3; shader.isPreVulkanShader = this->m_version <= 3;
shader.isMetalShader = isMetalShader;
return shader; return shader;
} }

View file

@ -57,7 +57,7 @@ public:
sint32 lod_bias = -1; // in 1/64th steps sint32 lod_bias = -1; // in 1/64th steps
sint32 relative_lod_bias = -1; // in 1/64th steps sint32 relative_lod_bias = -1; // in 1/64th steps
sint32 anistropic_value = -1; // 1<<n sint32 anistropic_value = -1; // 1<<n
} overwrite_settings; } overwrite_settings;
}; };
struct CustomShader struct CustomShader
@ -67,6 +67,7 @@ public:
uint64 shader_aux_hash; uint64 shader_aux_hash;
GP_SHADER_TYPE type; GP_SHADER_TYPE type;
bool isPreVulkanShader{}; // set to true for V3 packs since the shaders are not compatible with the Vulkan renderer bool isPreVulkanShader{}; // set to true for V3 packs since the shaders are not compatible with the Vulkan renderer
bool isMetalShader{}; // set to true if the shader is written in Metal Shading Language
}; };
enum VarType enum VarType
@ -85,13 +86,13 @@ public:
bool active = false; // selected/active preset bool active = false; // selected/active preset
bool visible = true; // set by condition or true bool visible = true; // set by condition or true
bool is_default = false; // selected by default bool is_default = false; // selected by default
Preset(std::string_view name, std::unordered_map<std::string, PresetVar> vars) Preset(std::string_view name, std::unordered_map<std::string, PresetVar> vars)
: name(name), variables(std::move(vars)) {} : name(name), variables(std::move(vars)) {}
Preset(std::string_view category, std::string_view name, std::unordered_map<std::string, PresetVar> vars) Preset(std::string_view category, std::string_view name, std::unordered_map<std::string, PresetVar> vars)
: category(category), name(name), variables(std::move(vars)) {} : category(category), name(name), variables(std::move(vars)) {}
Preset(std::string_view category, std::string_view name, std::string_view condition, std::unordered_map<std::string, PresetVar> vars) Preset(std::string_view category, std::string_view name, std::string_view condition, std::unordered_map<std::string, PresetVar> vars)
: category(category), name(name), condition(condition), variables(std::move(vars)) {} : category(category), name(name), condition(condition), variables(std::move(vars)) {}
}; };
@ -136,19 +137,19 @@ public:
bool SetActivePreset(std::string_view category, std::string_view name, bool update_visibility = true); bool SetActivePreset(std::string_view category, std::string_view name, bool update_visibility = true);
bool SetActivePreset(std::string_view name); bool SetActivePreset(std::string_view name);
void UpdatePresetVisibility(); void UpdatePresetVisibility();
void AddConstantsForCurrentPreset(ExpressionParser& ep); void AddConstantsForCurrentPreset(ExpressionParser& ep);
bool ResolvePresetConstant(const std::string& varname, double& value) const; bool ResolvePresetConstant(const std::string& varname, double& value) const;
[[nodiscard]] const std::vector<PresetPtr>& GetPresets() const { return m_presets; } [[nodiscard]] const std::vector<PresetPtr>& GetPresets() const { return m_presets; }
[[nodiscard]] std::unordered_map<std::string, std::vector<PresetPtr>> GetCategorizedPresets(std::vector<std::string>& order) const; [[nodiscard]] std::unordered_map<std::string, std::vector<PresetPtr>> GetCategorizedPresets(std::vector<std::string>& order) const;
// shaders // shaders
void LoadShaders(); void LoadShaders();
bool HasShaders() const; bool HasShaders() const;
const std::vector<CustomShader>& GetCustomShaders() const { return m_custom_shaders; } const std::vector<CustomShader>& GetCustomShaders() const { return m_custom_shaders; }
static const std::string* FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer); static const std::string* FindCustomShaderSource(uint64 shaderBaseHash, uint64 shaderAuxHash, GP_SHADER_TYPE type, bool isVulkanRenderer, bool isMetalRenderer);
const std::string& GetOutputShaderSource() const { return m_output_shader_source; } const std::string& GetOutputShaderSource() const { return m_output_shader_source; }
const std::string& GetDownscalingShaderSource() const { return m_downscaling_shader_source; } const std::string& GetDownscalingShaderSource() const { return m_downscaling_shader_source; }
@ -194,7 +195,7 @@ private:
{ {
for (auto& var : preset->variables) for (auto& var : preset->variables)
parser.AddConstant(var.first, (TType)var.second.second); parser.AddConstant(var.first, (TType)var.second.second);
} }
} }
for(const auto& preset : active_presets) for(const auto& preset : active_presets)
{ {
@ -202,7 +203,7 @@ private:
{ {
for (auto& var : preset->variables) for (auto& var : preset->variables)
parser.TryAddConstant(var.first, (TType)var.second.second); parser.TryAddConstant(var.first, (TType)var.second.second);
} }
} }
for (auto& var : m_preset_vars) for (auto& var : m_preset_vars)
@ -228,7 +229,7 @@ private:
bool m_activated = false; // set if the graphic pack is currently used by the running game bool m_activated = false; // set if the graphic pack is currently used by the running game
std::vector<uint64_t> m_title_ids; std::vector<uint64_t> m_title_ids;
bool m_patchedFilesLoaded = false; // set to true once patched files are loaded bool m_patchedFilesLoaded = false; // set to true once patched files are loaded
sint32 m_vsync_frequency = -1; sint32 m_vsync_frequency = -1;
sint32 m_fs_priority = 100; sint32 m_fs_priority = 100;
@ -241,12 +242,12 @@ private:
std::vector<PresetPtr> m_presets; std::vector<PresetPtr> m_presets;
// default preset vars // default preset vars
std::unordered_map<std::string, PresetVar> m_preset_vars; std::unordered_map<std::string, PresetVar> m_preset_vars;
std::vector<CustomShader> m_custom_shaders; std::vector<CustomShader> m_custom_shaders;
std::vector<TextureRule> m_texture_rules; std::vector<TextureRule> m_texture_rules;
std::string m_output_shader_source, m_upscaling_shader_source, m_downscaling_shader_source; std::string m_output_shader_source, m_upscaling_shader_source, m_downscaling_shader_source;
std::unique_ptr<RendererOutputShader> m_output_shader, m_upscaling_shader, m_downscaling_shader, m_output_shader_ud, m_upscaling_shader_ud, m_downscaling_shader_ud; std::unique_ptr<RendererOutputShader> m_output_shader, m_upscaling_shader, m_downscaling_shader, m_output_shader_ud, m_upscaling_shader_ud, m_downscaling_shader_ud;
template<typename T> template<typename T>
bool ParseRule(const ExpressionParser& parser, IniParser& iniParser, const char* option_name, T* value_out) const; bool ParseRule(const ExpressionParser& parser, IniParser& iniParser, const char* option_name, T* value_out) const;
@ -257,7 +258,7 @@ private:
std::vector<uint64> ParseTitleIds(IniParser& rules, const char* option_name) const; std::vector<uint64> ParseTitleIds(IniParser& rules, const char* option_name) const;
CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const; CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type, bool isMetalShader) const;
void ApplyShaderPresets(std::string& shader_source) const; void ApplyShaderPresets(std::string& shader_source) const;
void LoadReplacedFiles(); void LoadReplacedFiles();
void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC); void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC);
@ -330,6 +331,6 @@ std::vector<T> GraphicPack2::ParseList(const ExpressionParser& parser, IniParser
} }
catch (const std::invalid_argument&) {} catch (const std::invalid_argument&) {}
} }
return result; return result;
} }

View file

@ -8,8 +8,12 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/LatteInstructions.h" #include "Cafe/HW/Latte/ISA/LatteInstructions.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/containers/LookupTableL3.h" #include "util/containers/LookupTableL3.h"
#include "util/helpers/fspinlock.h" #include "util/helpers/fspinlock.h"
#if ENABLE_METAL
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */ #include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
#include <openssl/evp.h> /* EVP_Digest */ #include <openssl/evp.h> /* EVP_Digest */
@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu
return 4; return 4;
} }
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister)
{ {
uint64 key = 0; uint64 key = 0;
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++) for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
key = std::rotl<uint64>(key, 8); key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->semanticId; key += (uint64)attrib->semanticId;
key = std::rotl<uint64>(key, 8); key = std::rotl<uint64>(key, 8);
key += (uint64)(attrib->offset & 3); if (g_renderer->GetType() == RendererAPI::Metal)
key = std::rotl<uint64>(key, 2); key += (uint64)attrib->offset;
else
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 7);
} }
} }
// todo - also hash invalid buffer groups? // todo - also hash invalid buffer groups?
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g];
key += (uint64)group.attributeBufferIndex;
key = std::rotl<uint64>(key, 5);
}
}
fetchShader->key = key; fetchShader->key = key;
} }
@ -146,6 +164,29 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h; this->vkPipelineHashFragment = h;
} }
void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister)
{
#if ENABLE_METAL
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
if (bufferStride % 4 != 0)
mtlFetchVertexManually = true;
for (sint32 f = 0; f < group.attribCount; f++)
{
auto& attr = group.attrib[f];
if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride)
mtlFetchVertexManually = true;
}
}
#endif
}
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr) void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
{ {
uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader) uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader)
@ -161,7 +202,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars
auto nfa = instr->getField_NUM_FORMAT_ALL(); auto nfa = instr->getField_NUM_FORMAT_ALL();
bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED; bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED;
auto endianSwap = instr->getField_ENDIAN_SWAP(); auto endianSwap = instr->getField_ENDIAN_SWAP();
// get buffer // get buffer
cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0); cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0);
uint32 bufferIndex = (bufferId - 0xA0); uint32 bufferIndex = (bufferId - 0xA0);
@ -316,7 +357,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50 // {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50
// {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60 // {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60
// {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90 // {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90
// our new implementation: // our new implementation:
// {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...} // {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...}
@ -326,8 +367,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
{ {
// empty fetch shader, seen in Minecraft // empty fetch shader, seen in Minecraft
// these only make sense when vertex shader does not call FS? // these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader); LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash(); newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
return newFetchShader; return newFetchShader;
} }
@ -385,8 +427,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
} }
bufferGroup.vboStride = vboOffset; bufferGroup.vboStride = vboOffset;
} }
LatteShader_calculateFSKey(newFetchShader); LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash(); newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
// register in cache // register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously // its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously
@ -411,7 +454,7 @@ LatteFetchShader::~LatteFetchShader()
UnregisterInCache(); UnregisterInCache();
} }
struct FetchShaderLookupInfo struct FetchShaderLookupInfo
{ {
LatteFetchShader* fetchShader; LatteFetchShader* fetchShader;
uint32 programSize; uint32 programSize;

View file

@ -46,13 +46,17 @@ struct LatteFetchShader
// Vulkan // Vulkan
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
// Metal
bool mtlFetchVertexManually{};
// cache info // cache info
CacheHash m_cacheHash{}; CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded) bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
void CalculateFetchShaderVkHash(); void CalculateFetchShaderVkHash();
void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister);
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; }; uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };
static bool isValidBufferIndex(const uint32 index) { return index < 0x10; }; static bool isValidBufferIndex(const uint32 index) { return index < 0x10; };
@ -69,4 +73,4 @@ struct LatteFetchShader
static std::unordered_map<CacheHash, LatteFetchShader*> s_fetchShaderByHash; static std::unordered_map<CacheHash, LatteFetchShader*> s_fetchShaderByHash;
}; };
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize); LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);

View file

@ -441,7 +441,7 @@ public:
if (uploadBegin >= uploadEnd) if (uploadBegin >= uploadEnd)
return; // reserve range not within invalidation or range is zero sized return; // reserve range not within invalidation or range is zero sized
if (uploadBegin == m_invalidationRangeBegin) if (uploadBegin == m_invalidationRangeBegin)
{ {
m_invalidationRangeBegin = uploadEnd; m_invalidationRangeBegin = uploadEnd;
@ -536,7 +536,7 @@ private:
MPTR m_invalidationRangeBegin; MPTR m_invalidationRangeBegin;
MPTR m_invalidationRangeEnd; MPTR m_invalidationRangeEnd;
BufferCacheNode(MPTR rangeBegin, MPTR rangeEnd): m_rangeBegin(rangeBegin), m_rangeEnd(rangeEnd) BufferCacheNode(MPTR rangeBegin, MPTR rangeEnd): m_rangeBegin(rangeBegin), m_rangeEnd(rangeEnd)
{ {
flagInUse(); flagInUse();
cemu_assert_debug(rangeBegin < rangeEnd); cemu_assert_debug(rangeBegin < rangeEnd);
@ -740,7 +740,7 @@ private:
cemu_assert_debug(rangeEnd <= pageRangeEnd); cemu_assert_debug(rangeEnd <= pageRangeEnd);
cemu_assert_debug((rangeBegin & 0xF) == 0); cemu_assert_debug((rangeBegin & 0xF) == 0);
cemu_assert_debug((rangeEnd & 0xF) == 0); cemu_assert_debug((rangeEnd & 0xF) == 0);
auto pageInfo = m_pageInfo.data() + pageIndex; auto pageInfo = m_pageInfo.data() + pageIndex;
pageInfo->hasStreamoutData = true; pageInfo->hasStreamoutData = true;
@ -805,7 +805,7 @@ public:
s_allCacheNodes.clear(); s_allCacheNodes.clear();
g_deallocateQueue.clear(); g_deallocateQueue.clear();
} }
static void ProcessDeallocations() static void ProcessDeallocations()
{ {
for(auto& itr : g_deallocateQueue) for(auto& itr : g_deallocateQueue)

View file

@ -62,7 +62,7 @@ void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1,
// order of rectangle vertices is // order of rectangle vertices is
// v0 v1 // v0 v1
// v2 v3 // v2 v3
for (sint32 f = 0; f < vectorLen*4; f++) for (sint32 f = 0; f < vectorLen*4; f++)
output[f] = _swapEndianU32(output[f]); output[f] = _swapEndianU32(output[f]);
@ -199,11 +199,14 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
#if BOOST_OS_MACOS #if BOOST_OS_MACOS
if(bufferStride % 4 != 0) if(bufferStride % 4 != 0)
{ {
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) if (g_renderer->GetType() == RendererAPI::Vulkan)
{ {
auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride); if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize); {
continue; auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride);
vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize);
continue;
}
} }
} }
#endif #endif
@ -222,4 +225,4 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
if (pixelShader) if (pixelShader)
LatteBufferCache_syncGPUUniformBuffers(pixelShader, mmSQ_PS_UNIFORM_BLOCK_START, LatteConst::ShaderType::Pixel); LatteBufferCache_syncGPUUniformBuffers(pixelShader, mmSQ_PS_UNIFORM_BLOCK_START, LatteConst::ShaderType::Pixel);
return true; return true;
} }

View file

@ -8,7 +8,7 @@
#include <immintrin.h> #include <immintrin.h>
#endif #endif
struct struct
{ {
struct CacheEntry struct CacheEntry
{ {
@ -113,6 +113,21 @@ uint32 LatteIndices_calculateIndexOutputSize(LattePrimitiveMode primitiveMode, L
cemu_assert_suspicious(); cemu_assert_suspicious();
return 0; return 0;
} }
else if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN && g_renderer->GetType() == RendererAPI::Metal)
{
if (indexType == LatteIndexType::AUTO)
{
if (count <= 0xFFFF)
return count * sizeof(uint16);
return count * sizeof(uint32);
}
if (indexType == LatteIndexType::U16_BE || indexType == LatteIndexType::U16_LE)
return count * sizeof(uint16);
if (indexType == LatteIndexType::U32_BE || indexType == LatteIndexType::U32_LE)
return count * sizeof(uint32);
cemu_assert_suspicious();
return 0;
}
else if(indexType == LatteIndexType::AUTO) else if(indexType == LatteIndexType::AUTO)
return 0; return 0;
else if (indexType == LatteIndexType::U16_BE || indexType == LatteIndexType::U16_LE) else if (indexType == LatteIndexType::U16_BE || indexType == LatteIndexType::U16_LE)
@ -306,6 +321,44 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
indexMax = std::max(count, 1u) - 1; indexMax = std::max(count, 1u) - 1;
} }
template<typename T>
void LatteIndices_unpackTriangleFanAndConvert(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
const betype<T>* src = (betype<T>*)indexDataInput;
T* dst = (T*)indexDataOutput;
// TODO: check this
for (sint32 i = 0; i < count; i++)
{
uint32 i0;
if (i % 2 == 0)
i0 = i / 2;
else
i0 = count - 1 - i / 2;
T idx = src[i0];
indexMin = std::min(indexMin, (uint32)idx);
indexMax = std::max(indexMax, (uint32)idx);
dst[i] = idx;
}
}
template<typename T>
void LatteIndices_generateAutoTriangleFanIndices(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
const betype<T>* src = (betype<T>*)indexDataInput;
T* dst = (T*)indexDataOutput;
for (sint32 i = 0; i < count; i++)
{
T idx = i;
if (idx % 2 == 0)
idx = idx / 2;
else
idx = count - 1 - idx / 2;
dst[i] = idx;
}
indexMin = 0;
indexMax = std::max(count, 1u) - 1;
}
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
ATTRIBUTE_AVX2 ATTRIBUTE_AVX2
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
@ -317,7 +370,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat
sint32 countRemaining = count & 15; sint32 countRemaining = count & 15;
if (count16) if (count16)
{ {
__m256i mMin = _mm256_set_epi16((sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, __m256i mMin = _mm256_set_epi16((sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF,
(sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF); (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF);
__m256i mMax = _mm256_set_epi16(0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000); __m256i mMax = _mm256_set_epi16(0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000);
__m256i mShuffle16Swap = _mm256_set_epi8(30, 31, 28, 29, 26, 27, 24, 25, 22, 23, 20, 21, 18, 19, 16, 17, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); __m256i mShuffle16Swap = _mm256_set_epi8(30, 31, 28, 29, 26, 27, 24, 25, 22, 23, 20, 21, 18, 19, 16, 17, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
@ -684,6 +737,29 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
cemu_assert_debug(false); cemu_assert_debug(false);
outputCount = count + 1; outputCount = count + 1;
} }
else if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN && g_renderer->GetType() == RendererAPI::Metal)
{
if (indexType == LatteIndexType::AUTO)
{
if (count <= 0xFFFF)
{
LatteIndices_generateAutoTriangleFanIndices<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
renderIndexType = Renderer::INDEX_TYPE::U16;
}
else
{
LatteIndices_generateAutoTriangleFanIndices<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
renderIndexType = Renderer::INDEX_TYPE::U32;
}
}
else if (indexType == LatteIndexType::U16_BE)
LatteIndices_unpackTriangleFanAndConvert<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
else if (indexType == LatteIndexType::U32_BE)
LatteIndices_unpackTriangleFanAndConvert<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
else
cemu_assert_debug(false);
outputCount = count;
}
else else
{ {
if (indexType == LatteIndexType::U16_BE) if (indexType == LatteIndexType::U16_BE)
@ -696,7 +772,7 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
else else
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax); LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
#else #else
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax); LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
#endif #endif
} }
else if (indexType == LatteIndexType::U32_BE) else if (indexType == LatteIndexType::U32_BE)
@ -707,7 +783,7 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
else else
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax); LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
#else #else
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax); LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
#endif #endif
} }
else if (indexType == LatteIndexType::U16_LE) else if (indexType == LatteIndexType::U16_LE)

View file

@ -449,14 +449,6 @@ bool LatteMRT::UpdateCurrentFBO()
uint8 colorBufferMask = GetActiveColorBufferMask(pixelShader, LatteGPUState.contextNew); uint8 colorBufferMask = GetActiveColorBufferMask(pixelShader, LatteGPUState.contextNew);
bool depthBufferMask = GetActiveDepthBufferMask(LatteGPUState.contextNew); bool depthBufferMask = GetActiveDepthBufferMask(LatteGPUState.contextNew);
// if depth test is not used then detach the depth buffer
bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE();
bool stencilTestEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
if (!depthEnable && !stencilTestEnable && !backStencilEnable)
depthBufferMask = false;
bool hasResizedTexture = false; // set to true if any of the color buffers or the depth buffer reference a resized texture (via graphic pack texture rules) bool hasResizedTexture = false; // set to true if any of the color buffers or the depth buffer reference a resized texture (via graphic pack texture rules)
sLatteRenderTargetState.renderTargetIsResized = false; sLatteRenderTargetState.renderTargetIsResized = false;
// real size // real size
@ -723,8 +715,8 @@ void LatteRenderTarget_applyTextureColorClear(LatteTexture* texture, uint32 slic
void LatteRenderTarget_applyTextureDepthClear(LatteTexture* texture, uint32 sliceIndex, uint32 mipIndex, bool hasDepthClear, bool hasStencilClear, float depthValue, uint8 stencilValue, uint64 eventCounter) void LatteRenderTarget_applyTextureDepthClear(LatteTexture* texture, uint32 sliceIndex, uint32 mipIndex, bool hasDepthClear, bool hasStencilClear, float depthValue, uint8 stencilValue, uint64 eventCounter)
{ {
if(texture->isDepth) if(texture->isDepth)
{ {
g_renderer->texture_clearDepthSlice(texture, sliceIndex, mipIndex, hasDepthClear, hasStencilClear, depthValue, stencilValue); g_renderer->texture_clearDepthSlice(texture, sliceIndex, mipIndex, hasDepthClear, hasStencilClear, depthValue, stencilValue);
} }
else else
@ -883,7 +875,7 @@ void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPa
textureView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, 0); textureView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, 0);
_currentOutputImageWidth = effectiveWidth; _currentOutputImageWidth = effectiveWidth;
_currentOutputImageHeight = effectiveHeight; _currentOutputImageHeight = effectiveHeight;
sint32 imageX, imageY; sint32 imageX, imageY;
sint32 imageWidth, imageHeight; sint32 imageWidth, imageHeight;
sint32 fullscreenWidth, fullscreenHeight; sint32 fullscreenWidth, fullscreenHeight;
@ -1037,7 +1029,7 @@ void LatteRenderTarget_updateViewport()
float vpX = LatteGPUState.contextNew.PA_CL_VPORT_XOFFSET.get_OFFSET() - LatteGPUState.contextNew.PA_CL_VPORT_XSCALE.get_SCALE(); float vpX = LatteGPUState.contextNew.PA_CL_VPORT_XOFFSET.get_OFFSET() - LatteGPUState.contextNew.PA_CL_VPORT_XSCALE.get_SCALE();
float vpHeight = LatteGPUState.contextNew.PA_CL_VPORT_YSCALE.get_SCALE() / -0.5f; float vpHeight = LatteGPUState.contextNew.PA_CL_VPORT_YSCALE.get_SCALE() / -0.5f;
float vpY = LatteGPUState.contextNew.PA_CL_VPORT_YOFFSET.get_OFFSET() + LatteGPUState.contextNew.PA_CL_VPORT_YSCALE.get_SCALE(); float vpY = LatteGPUState.contextNew.PA_CL_VPORT_YOFFSET.get_OFFSET() + LatteGPUState.contextNew.PA_CL_VPORT_YSCALE.get_SCALE();
bool halfZ = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF(); bool halfZ = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF();
// calculate near/far // calculate near/far

View file

@ -9,10 +9,15 @@
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
#include "Cafe/GraphicPack/GraphicPack2.h" #include "Cafe/GraphicPack/GraphicPack2.h"
#include "HW/Latte/Core/Latte.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/helpers/StringParser.h" #include "util/helpers/StringParser.h"
#include "config/ActiveSettings.h" #include "config/ActiveSettings.h"
#include "Cafe/GameProfile/GameProfile.h" #include "Cafe/GameProfile/GameProfile.h"
#include "util/containers/flat_hash_map.hpp" #include "util/containers/flat_hash_map.hpp"
#if ENABLE_METAL
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <cinttypes> #include <cinttypes>
// experimental new decompiler (WIP) // experimental new decompiler (WIP)
@ -77,7 +82,7 @@ inline ska::flat_hash_map<uint64, LatteDecompilerShader*>& LatteSHRC_GetCacheByT
if (shaderType == LatteConst::ShaderType::Vertex) if (shaderType == LatteConst::ShaderType::Vertex)
return sVertexShaders; return sVertexShaders;
else if (shaderType == LatteConst::ShaderType::Geometry) else if (shaderType == LatteConst::ShaderType::Geometry)
return sGeometryShaders; return sGeometryShaders;
cemu_assert_debug(shaderType == LatteConst::ShaderType::Pixel); cemu_assert_debug(shaderType == LatteConst::ShaderType::Pixel);
return sPixelShaders; return sPixelShaders;
} }
@ -205,11 +210,9 @@ void LatteShader_free(LatteDecompilerShader* shader)
delete shader; delete shader;
} }
// both vertex and geometry/pixel shader depend on PS inputs void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters)
// we prepare the PS import info in advance
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{ {
// PS control // PS control
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
uint32 spi0_positionEnable = (psControl0 >> 8) & 1; uint32 spi0_positionEnable = (psControl0 >> 8) & 1;
uint32 spi0_positionCentroid = (psControl0 >> 9) & 1; uint32 spi0_positionCentroid = (psControl0 >> 9) & 1;
@ -238,12 +241,12 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{ {
key += std::rotr<uint64>(spi0_paramGen, 7); key += std::rotr<uint64>(spi0_paramGen, 7);
key += std::rotr<uint64>(spi0_paramGenAddr, 3); key += std::rotr<uint64>(spi0_paramGenAddr, 3);
_activePSImportTable.paramGen = spi0_paramGen; psInputTable->paramGen = spi0_paramGen;
_activePSImportTable.paramGenGPR = spi0_paramGenAddr; psInputTable->paramGenGPR = spi0_paramGenAddr;
} }
else else
{ {
_activePSImportTable.paramGen = 0; psInputTable->paramGen = 0;
} }
// semantic imports from vertex shader // semantic imports from vertex shader
@ -277,9 +280,9 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
key = std::rotl<uint64>(key, 7); key = std::rotl<uint64>(key, 7);
if (spi0_positionEnable && f == spi0_positionAddr) if (spi0_positionEnable && f == spi0_positionAddr)
{ {
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION; psInputTable->import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
_activePSImportTable.import[f].isFlat = false; psInputTable->import[f].isFlat = false;
_activePSImportTable.import[f].isNoPerspective = false; psInputTable->import[f].isNoPerspective = false;
key += (uint64)0x33; key += (uint64)0x33;
} }
else else
@ -292,13 +295,20 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7)); semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
#endif #endif
_activePSImportTable.import[f].semanticId = psSemanticId; psInputTable->import[f].semanticId = psSemanticId;
_activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0; psInputTable->import[f].isFlat = (psInputControl&(1 << 10)) != 0;
_activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0; psInputTable->import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
} }
} }
_activePSImportTable.key = key; psInputTable->key = key;
_activePSImportTable.count = numPSInputs; psInputTable->count = numPSInputs;
}
// both vertex and geometry/pixel shader depend on PS inputs
// we prepare the PS import info in advance
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{
LatteShader_CreatePSInputTable(&_activePSImportTable, contextRegisters);
} }
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync) void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
@ -320,7 +330,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil
{ {
shaderType = RendererShader::ShaderType::kGeometry; shaderType = RendererShader::ShaderType::kGeometry;
gpShaderType = GraphicPack2::GP_SHADER_TYPE::GEOMETRY; gpShaderType = GraphicPack2::GP_SHADER_TYPE::GEOMETRY;
} }
else if (shader->shaderType == LatteConst::ShaderType::Pixel) else if (shader->shaderType == LatteConst::ShaderType::Pixel)
{ {
shaderType = RendererShader::ShaderType::kFragment; shaderType = RendererShader::ShaderType::kFragment;
@ -330,7 +340,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil
// check if a custom shader is present // check if a custom shader is present
std::string shaderSrc; std::string shaderSrc;
const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan); const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan, g_renderer->GetType() == RendererAPI::Metal);
if (customShaderSrc) if (customShaderSrc)
{ {
shaderSrc.assign(*customShaderSrc); shaderSrc.assign(*customShaderSrc);
@ -443,7 +453,7 @@ void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShad
{ {
if (!ActiveSettings::DumpShadersEnabled()) if (!ActiveSettings::DumpShadersEnabled())
return; return;
const char* suffix = ""; const char* suffix = "";
if (shader->shaderType == LatteConst::ShaderType::Vertex) if (shader->shaderType == LatteConst::ShaderType::Vertex)
suffix = "vs"; suffix = "vs";
@ -500,6 +510,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
vsHash += tmp; vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader on Metal
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS) if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{ {
vsHash += 13ULL; vsHash += 13ULL;
@ -514,6 +525,37 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537; vsHash += 0x1537;
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
bool isRectVertexShader = (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
if ((usesGeometryShader || isRectVertexShader) || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
if (!(usesGeometryShader || isRectVertexShader))
{
if (LatteGPUState.contextNew.IsRasterizationEnabled())
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
#endif
_shaderBaseHash_vs = vsHash; _shaderBaseHash_vs = vsHash;
} }
@ -539,6 +581,7 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2); _calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
// get vertex shader // get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
_shaderBaseHash_ps = psHash; _shaderBaseHash_ps = psHash;
} }
@ -572,6 +615,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
auxHashTex += 0x333; auxHashTex += 0x333;
} }
} }
return auxHash + auxHashTex; return auxHash + auxHashTex;
} }
@ -605,6 +649,35 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
auxHash = (auxHash << 3) | (auxHash >> 61); auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim; auxHash += (uint64)dim;
} }
// Textures as render targets
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
auxHash = std::rotl<uint64>(auxHash, 11);
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
}
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)dataType;
}
bool hasDepthBuffer = LatteMRT::GetActiveDepthBufferMask(LatteGPUState.contextNew);
if (hasDepthBuffer)
{
auxHash = std::rotl<uint64>(auxHash, 5);
auxHash += 13u;
}
}
#endif
return auxHash; return auxHash;
} }
@ -613,10 +686,13 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
LatteDecompilerShader* shader = decompilerOutput.shader; LatteDecompilerShader* shader = decompilerOutput.shader;
shader->baseHash = baseHash; shader->baseHash = baseHash;
// copy resource mapping // copy resource mapping
if(g_renderer->GetType() == RendererAPI::Vulkan) // HACK
if (g_renderer->GetType() == RendererAPI::Vulkan)
shader->resourceMapping = decompilerOutput.resourceMappingVK; shader->resourceMapping = decompilerOutput.resourceMappingVK;
else else if (g_renderer->GetType() == RendererAPI::OpenGL)
shader->resourceMapping = decompilerOutput.resourceMappingGL; shader->resourceMapping = decompilerOutput.resourceMappingGL;
else
shader->resourceMapping = decompilerOutput.resourceMappingMTL;
// copy texture info // copy texture info
shader->textureUnitMask2 = decompilerOutput.textureUnitMask; shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
// copy streamout info // copy streamout info
@ -624,7 +700,8 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any(); shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
// copy uniform offsets // copy uniform offsets
// for OpenGL these are retrieved in _prepareSeparableUniforms() // for OpenGL these are retrieved in _prepareSeparableUniforms()
if (g_renderer->GetType() == RendererAPI::Vulkan) // HACK
if (g_renderer->GetType() != RendererAPI::OpenGL)
{ {
shader->uniform.loc_remapped = decompilerOutput.uniformOffsetsVK.offset_remapped; shader->uniform.loc_remapped = decompilerOutput.uniformOffsetsVK.offset_remapped;
shader->uniform.loc_uniformRegister = decompilerOutput.uniformOffsetsVK.offset_uniformRegister; shader->uniform.loc_uniformRegister = decompilerOutput.uniformOffsetsVK.offset_uniformRegister;
@ -684,9 +761,9 @@ void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteCons
{ {
options.usesGeometryShader = geometryShaderEnabled; options.usesGeometryShader = geometryShaderEnabled;
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false; options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
options.useTFViaSSBO = g_renderer->UseTFViaSSBO();
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
{ {
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32(); options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
} }
options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False; options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False;
@ -1009,4 +1086,4 @@ void LatteSHRC_UnloadAll()
while(!sPixelShaders.empty()) while(!sPixelShaders.empty())
LatteShader_free(sPixelShaders.begin()->second); LatteShader_free(sPixelShaders.begin()->second);
cemu_assert_debug(sPixelShaders.empty()); cemu_assert_debug(sPixelShaders.empty());
} }

View file

@ -84,6 +84,7 @@ struct LatteShaderPSInputTable
} }
}; };
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters);
void LatteShader_UpdatePSInputs(uint32* contextRegisters); void LatteShader_UpdatePSInputs(uint32* contextRegisters);
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable(); LatteShaderPSInputTable* LatteSHRC_GetPSInputTable();
@ -126,4 +127,4 @@ void LatteShaderCache_writeSeparableGeometryShader(uint64 shaderBaseHash, uint64
void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader); void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader);
// todo - refactor this // todo - refactor this
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType); sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType);

View file

@ -11,6 +11,10 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
#if ENABLE_METAL
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#endif
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h"
#include <imgui.h> #include <imgui.h>
@ -44,7 +48,7 @@ struct
sint32 pixelShaderCount; sint32 pixelShaderCount;
}shaderCacheScreenStats; }shaderCacheScreenStats;
struct struct
{ {
ImTextureID textureTVId; ImTextureID textureTVId;
ImTextureID textureDRCId; ImTextureID textureDRCId;
@ -65,7 +69,7 @@ FileCache* s_shaderCacheGeneric = nullptr; // contains hardware and version inde
#define SHADER_CACHE_TYPE_PIXEL (2) #define SHADER_CACHE_TYPE_PIXEL (2)
bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize); bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize);
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId); void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId);
bool LatteShaderCache_updatePipelineLoadingProgress(); bool LatteShaderCache_updatePipelineLoadingProgress();
void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines); void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines);
@ -272,10 +276,14 @@ static BootSoundPlayer g_bootSndPlayer;
void LatteShaderCache_finish() void LatteShaderCache_finish()
{ {
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
RendererShaderVk::ShaderCacheLoading_end(); RendererShaderVk::ShaderCacheLoading_end();
else if (g_renderer->GetType() == RendererAPI::OpenGL) else if (g_renderer->GetType() == RendererAPI::OpenGL)
RendererShaderGL::ShaderCacheLoading_end(); RendererShaderGL::ShaderCacheLoading_end();
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
RendererShaderMtl::ShaderCacheLoading_end();
#endif
} }
uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
@ -358,8 +366,17 @@ void LatteShaderCache_Load()
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
else if (g_renderer->GetType() == RendererAPI::OpenGL) else if (g_renderer->GetType() == RendererAPI::OpenGL)
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
#endif
// get cache file name // get cache file name
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); fs::path pathGeneric;
if (g_renderer->GetType() == RendererAPI::Metal)
pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_mtlshaders.bin", cacheTitleId);
else
pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
const auto pathGenericPre1_16_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:08x}.bin", CafeSystem::GetRPXHashBase()); // before 1.16.0 const auto pathGenericPre1_16_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:08x}.bin", CafeSystem::GetRPXHashBase()); // before 1.16.0
@ -446,7 +463,7 @@ void LatteShaderCache_Load()
}; };
LatteShaderCache_ShowProgress(LoadShadersUpdate, false); LatteShaderCache_ShowProgress(LoadShadersUpdate, false);
LatteShaderCache_updateCompileQueue(0); LatteShaderCache_updateCompileQueue(0);
// write load time and RAM usage to log file (in dev build) // write load time and RAM usage to log file (in dev build)
#if BOOST_OS_WINDOWS #if BOOST_OS_WINDOWS
@ -459,9 +476,9 @@ void LatteShaderCache_Load()
cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad); cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad);
#endif #endif
LatteShaderCache_finish(); LatteShaderCache_finish();
// if Vulkan then also load pipeline cache // if Vulkan or Metal then also load pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan || g_renderer->GetType() == RendererAPI::Metal)
LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId); LatteShaderCache_LoadPipelineCache(cacheTitleId);
g_renderer->BeginFrame(true); g_renderer->BeginFrame(true);
@ -494,7 +511,7 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
{ {
const auto kPopupFlags = ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize; const auto kPopupFlags = ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize;
const auto textColor = 0xFF888888; const auto textColor = 0xFF888888;
auto lastFrameUpdate = tick_cached(); auto lastFrameUpdate = tick_cached();
while (true) while (true)
@ -547,7 +564,7 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
std::string text; std::string text;
if (isPipelines) if (isPipelines)
{ {
text = "Loading cached Vulkan pipelines..."; text = "Loading cached pipelines...";
} }
else else
{ {
@ -621,19 +638,35 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
} }
} }
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId) void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId)
{ {
auto& pipelineCache = VulkanPipelineStableCache::GetInstance(); if (g_renderer->GetType() == RendererAPI::Vulkan)
g_shaderCacheLoaderState.pipelineFileCount = pipelineCache.BeginLoading(cacheTitleId); g_shaderCacheLoaderState.pipelineFileCount = VulkanPipelineStableCache::GetInstance().BeginLoading(cacheTitleId);
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
g_shaderCacheLoaderState.pipelineFileCount = MetalPipelineCache::GetInstance().BeginLoading(cacheTitleId);
#endif
g_shaderCacheLoaderState.loadedPipelines = 0; g_shaderCacheLoaderState.loadedPipelines = 0;
LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true); LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true);
pipelineCache.EndLoading(); if (g_renderer->GetType() == RendererAPI::Vulkan)
VulkanPipelineStableCache::GetInstance().EndLoading();
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
MetalPipelineCache::GetInstance().EndLoading();
#endif
} }
bool LatteShaderCache_updatePipelineLoadingProgress() bool LatteShaderCache_updatePipelineLoadingProgress()
{ {
uint32 pipelinesMissingShaders = 0; uint32 pipelinesMissingShaders = 0;
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders); if (g_renderer->GetType() == RendererAPI::Vulkan)
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
return MetalPipelineCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
#endif
return false;
} }
uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType) uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType)
@ -892,13 +925,21 @@ void LatteShaderCache_Close()
s_shaderCacheGeneric = nullptr; s_shaderCacheGeneric = nullptr;
} }
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
RendererShaderVk::ShaderCacheLoading_Close(); RendererShaderVk::ShaderCacheLoading_Close();
else if (g_renderer->GetType() == RendererAPI::OpenGL) else if (g_renderer->GetType() == RendererAPI::OpenGL)
RendererShaderGL::ShaderCacheLoading_Close(); RendererShaderGL::ShaderCacheLoading_Close();
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
RendererShaderMtl::ShaderCacheLoading_Close();
#endif
// if Vulkan then also close pipeline cache // if Vulkan or Metal then also close pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
VulkanPipelineStableCache::GetInstance().Close(); VulkanPipelineStableCache::GetInstance().Close();
#if ENABLE_METAL
else if (g_renderer->GetType() == RendererAPI::Metal)
MetalPipelineCache::GetInstance().Close();
#endif
} }
#include <wx/msgdlg.h> #include <wx/msgdlg.h>
@ -914,7 +955,7 @@ void LatteShaderCache_handleDeprecatedCacheFiles(fs::path pathGeneric, fs::path
{ {
// ask user if they want to delete or keep the old cache file // ask user if they want to delete or keep the old cache file
auto infoMsg = _("Cemu detected that the shader cache for this game is outdated.\nOnly shader caches generated with Cemu 1.25.0 or above are supported.\n\nWe recommend deleting the outdated cache file as it will no longer be used by Cemu."); auto infoMsg = _("Cemu detected that the shader cache for this game is outdated.\nOnly shader caches generated with Cemu 1.25.0 or above are supported.\n\nWe recommend deleting the outdated cache file as it will no longer be used by Cemu.");
wxMessageDialog dialog(nullptr, infoMsg, _("Outdated shader cache"), wxMessageDialog dialog(nullptr, infoMsg, _("Outdated shader cache"),
wxYES_NO | wxCENTRE | wxICON_EXCLAMATION); wxYES_NO | wxCENTRE | wxICON_EXCLAMATION);

View file

@ -26,7 +26,7 @@ bool gxShader_checkIfSuccessfullyLinked(GLuint glProgram)
void LatteShader_prepareSeparableUniforms(LatteDecompilerShader* shader) void LatteShader_prepareSeparableUniforms(LatteDecompilerShader* shader)
{ {
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() != RendererAPI::OpenGL)
return; return;
auto shaderGL = (RendererShaderGL*)shader->shader; auto shaderGL = (RendererShaderGL*)shader->shader;

View file

@ -170,7 +170,7 @@ void LatteTexture_UnregisterTextureMemoryOccupancy(LatteTexture* texture)
} }
// calculate the actually accessed data range // calculate the actually accessed data range
// the resulting range is an estimate and may be smaller than the actual slice size (but not larger) // the resulting range is an estimate and may be smaller than the actual slice size (but not larger)
void LatteTexture_EstimateMipSliceAccessedDataRange(LatteTexture* texture, sint32 sliceIndex, sint32 mipIndex, LatteTextureSliceMipInfo* sliceMipInfo) void LatteTexture_EstimateMipSliceAccessedDataRange(LatteTexture* texture, sint32 sliceIndex, sint32 mipIndex, LatteTextureSliceMipInfo* sliceMipInfo)
{ {
uint32 estAddrStart; uint32 estAddrStart;
@ -222,7 +222,7 @@ void LatteTexture_InitSliceAndMipInfo(LatteTexture* texture)
LatteAddrLib::AddrSurfaceInfo_OUT surfaceInfo; LatteAddrLib::AddrSurfaceInfo_OUT surfaceInfo;
LatteAddrLib::GX2CalculateSurfaceInfo(texture->format, texture->width, texture->height, texture->depth, texture->dim, Latte::MakeGX2TileMode(texture->tileMode), 0, mipIndex, &surfaceInfo); LatteAddrLib::GX2CalculateSurfaceInfo(texture->format, texture->width, texture->height, texture->depth, texture->dim, Latte::MakeGX2TileMode(texture->tileMode), 0, mipIndex, &surfaceInfo);
sliceMipInfo->tileMode = surfaceInfo.hwTileMode; sliceMipInfo->tileMode = surfaceInfo.hwTileMode;
if (mipIndex == 0) if (mipIndex == 0)
sliceMipInfo->pitch = texture->pitch; // for the base level, use the pitch value configured in hardware sliceMipInfo->pitch = texture->pitch; // for the base level, use the pitch value configured in hardware
else else
@ -877,7 +877,7 @@ VIEWCOMPATIBILITY LatteTexture_CanTextureBeRepresentedAsView(LatteTexture* baseT
// check pitch // check pitch
if(sliceMipInfo->pitch != pitch) if(sliceMipInfo->pitch != pitch)
continue; continue;
// check all slices // check all slices
if(LatteAddrLib::TM_IsThickAndMacroTiled(baseTexture->tileMode)) if(LatteAddrLib::TM_IsThickAndMacroTiled(baseTexture->tileMode))
continue; // todo - check only every 4th slice? continue; // todo - check only every 4th slice?
for (sint32 s=0; s<baseTexture->GetMipDepth(m); s++) for (sint32 s=0; s<baseTexture->GetMipDepth(m); s++)
@ -978,7 +978,7 @@ LatteTextureView* LatteTexture_CreateMapping(MPTR physAddr, MPTR physMipAddr, si
} }
// note: When creating an existing texture, we only allow mip and slice expansion at the end // note: When creating an existing texture, we only allow mip and slice expansion at the end
cemu_assert_debug(depth); cemu_assert_debug(depth);
cemu_assert_debug(!(depth > 1 && dimBase == Latte::E_DIM::DIM_2D)); cemu_assert_debug(!(depth > 1 && dimBase == Latte::E_DIM::DIM_2D));
cemu_assert_debug(!(numSlice > 1 && dimView == Latte::E_DIM::DIM_2D)); cemu_assert_debug(!(numSlice > 1 && dimView == Latte::E_DIM::DIM_2D));
// todo, depth and numSlice are redundant // todo, depth and numSlice are redundant
@ -1308,6 +1308,40 @@ LatteTexture::LatteTexture(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddre
{ {
this->enableReadback = true; this->enableReadback = true;
} }
// calculate number of potential mip levels (from effective size)
sint32 effectiveWidth = width;
sint32 effectiveHeight = height;
sint32 effectiveDepth = depth;
if (this->overwriteInfo.hasResolutionOverwrite)
{
effectiveWidth = this->overwriteInfo.width;
effectiveHeight = this->overwriteInfo.height;
effectiveDepth = this->overwriteInfo.depth;
}
this->maxPossibleMipLevels = 1;
if (dim != Latte::E_DIM::DIM_3D)
{
for (sint32 i = 0; i < 20; i++)
{
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1)
{
this->maxPossibleMipLevels = i + 1;
break;
}
}
}
else
{
for (sint32 i = 0; i < 20; i++)
{
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1 && (effectiveDepth >> i) <= 1)
{
this->maxPossibleMipLevels = i + 1;
break;
}
}
}
} }
LatteTexture::~LatteTexture() LatteTexture::~LatteTexture()

View file

@ -13,7 +13,7 @@ struct TexScaleXY
float xy[2]; float xy[2];
}; };
struct struct
{ {
TexScaleXY perUnit[Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE]; // stores actualResolution/effectiveResolution ratio for each texture TexScaleXY perUnit[Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE]; // stores actualResolution/effectiveResolution ratio for each texture
}LatteTextureScale[static_cast<size_t>(LatteConst::ShaderType::TotalCount)] = { }; }LatteTextureScale[static_cast<size_t>(LatteConst::ShaderType::TotalCount)] = { };
@ -73,46 +73,16 @@ void LatteTexture_ReloadData(LatteTexture* tex)
LatteTextureView* LatteTexture_CreateTexture(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) LatteTextureView* LatteTexture_CreateTexture(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth)
{ {
const auto tex = g_renderer->texture_createTextureEx(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth); const auto tex = g_renderer->texture_createTextureEx(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth);
// init slice/mip info array // init slice/mip info array
LatteTexture_InitSliceAndMipInfo(tex); LatteTexture_InitSliceAndMipInfo(tex);
LatteTexture_RegisterTextureMemoryOccupancy(tex); LatteTexture_RegisterTextureMemoryOccupancy(tex);
cemu_assert_debug(mipLevels != 0); cemu_assert_debug(mipLevels != 0);
// calculate number of potential mip levels (from effective size)
sint32 effectiveWidth = width;
sint32 effectiveHeight = height;
sint32 effectiveDepth = depth;
if (tex->overwriteInfo.hasResolutionOverwrite)
{
effectiveWidth = tex->overwriteInfo.width;
effectiveHeight = tex->overwriteInfo.height;
effectiveDepth = tex->overwriteInfo.depth;
}
tex->maxPossibleMipLevels = 1;
if (dim != Latte::E_DIM::DIM_3D)
{
for (sint32 i = 0; i < 20; i++)
{
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1)
{
tex->maxPossibleMipLevels = i + 1;
break;
}
}
}
else
{
for (sint32 i = 0; i < 20; i++)
{
if ((effectiveWidth >> i) <= 1 && (effectiveHeight >> i) <= 1 && (effectiveDepth >> i) <= 1)
{
tex->maxPossibleMipLevels = i + 1;
break;
}
}
}
LatteTexture_ReloadData(tex); LatteTexture_ReloadData(tex);
LatteTC_MarkTextureStillInUse(tex); LatteTC_MarkTextureStillInUse(tex);
LatteTC_RegisterTexture(tex); LatteTC_RegisterTexture(tex);
// create initial view that maps to the whole texture // create initial view that maps to the whole texture
tex->baseView = tex->GetOrCreateView(0, tex->mipLevels, 0, tex->depth); tex->baseView = tex->GetOrCreateView(0, tex->mipLevels, 0, tex->depth);
return tex->baseView; return tex->baseView;
@ -371,4 +341,4 @@ uint64 LatteTexture_getNextUpdateEventCounter()
void LatteTexture_init() void LatteTexture_init()
{ {
} }

View file

@ -602,7 +602,7 @@ void LatteTextureLoader_loadTextureDataIntoSlice(LatteTexture* hostTexture, sint
void LatteTextureLoader_UpdateTextureSliceData(LatteTexture* tex, uint32 sliceIndex, uint32 mipIndex, MPTR physImagePtr, MPTR physMipPtr, Latte::E_DIM dim, uint32 width, uint32 height, uint32 depth, uint32 mipLevels, uint32 pitch, Latte::E_HWTILEMODE tileMode, uint32 swizzle, bool dumpTex) void LatteTextureLoader_UpdateTextureSliceData(LatteTexture* tex, uint32 sliceIndex, uint32 mipIndex, MPTR physImagePtr, MPTR physMipPtr, Latte::E_DIM dim, uint32 width, uint32 height, uint32 depth, uint32 mipLevels, uint32 pitch, Latte::E_HWTILEMODE tileMode, uint32 swizzle, bool dumpTex)
{ {
LatteTextureLoaderCtx textureLoader = { 0 }; LatteTextureLoaderCtx textureLoader = { 0 };
Latte::E_GX2SURFFMT format = tex->format; Latte::E_GX2SURFFMT format = tex->format;
LatteTextureLoader_begin(&textureLoader, sliceIndex, mipIndex, physImagePtr, physMipPtr, format, dim, width, height, depth, mipLevels, pitch, tileMode, swizzle); LatteTextureLoader_begin(&textureLoader, sliceIndex, mipIndex, physImagePtr, physMipPtr, format, dim, width, height, depth, mipLevels, pitch, tileMode, swizzle);
@ -853,7 +853,7 @@ void LatteTextureLoader_writeReadbackTextureToMemory(LatteTextureDefinition* tex
pixelInput += 4; pixelInput += 4;
} }
} }
} }
else else
{ {
cemuLog_logDebug(LogType::Force, "Texture readback unsupported format {:04x} for tileMode 0x{:02x}", (uint32)textureData->format, textureData->tileMode); cemuLog_logDebug(LogType::Force, "Texture readback unsupported format {:04x} for tileMode 0x{:02x}", (uint32)textureData->format, textureData->tileMode);

View file

@ -594,7 +594,7 @@ public:
} }
}; };
class TextureDecoder_R4_G4_UNORM_To_RGBA4_vk : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_UNORM_To_RGBA4_vk> class TextureDecoder_R4_G4_UNORM_To_ABGR4 : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_UNORM_To_ABGR4>
{ {
public: public:
sint32 getBytesPerTexel(LatteTextureLoaderCtx* textureLoader) override sint32 getBytesPerTexel(LatteTextureLoaderCtx* textureLoader) override
@ -679,6 +679,51 @@ public:
} }
}; };
class TextureDecoder_R4G4_UNORM_To_RG8 : public TextureDecoder, public SingletonClass<TextureDecoder_R4G4_UNORM_To_RG8>
{
public:
sint32 getBytesPerTexel(LatteTextureLoaderCtx* textureLoader) override
{
return 2;
}
void decode(LatteTextureLoaderCtx* textureLoader, uint8* outputData) override
{
for (sint32 y = 0; y < textureLoader->height; y += textureLoader->stepY)
{
sint32 yc = y;
for (sint32 x = 0; x < textureLoader->width; x += textureLoader->stepX)
{
uint8* blockData = LatteTextureLoader_GetInput(textureLoader, x, y);
sint32 pixelOffset = (x + yc * textureLoader->width) * 2;
uint8 v0 = (*(uint8*)(blockData + 0));
uint8 red4 = (v0 >> 4) & 0xF;
uint8 green4 = (v0 & 0xF);
red4 = (red4 << 4) | red4;
green4 = (green4 << 4) | green4;
*(uint8*)(outputData + pixelOffset + 0) = red4;
*(uint8*)(outputData + pixelOffset + 1) = green4;
}
}
}
void decodePixelToRGBA(uint8* blockData, uint8* outputPixel, uint8 blockOffsetX, uint8 blockOffsetY) override
{
uint8 v0 = *(blockData + 0);
uint8 red4 = (v0 >> 4) & 0xF;
uint8 green4 = (v0 & 0xF);
red4 = (red4 << 4) | red4;
green4 = (green4 << 4) | green4;
*(outputPixel + 0) = red4;
*(outputPixel + 1) = green4;
*(outputPixel + 2) = 0;
*(outputPixel + 3) = 255;
}
};
class TextureDecoder_R4_G4_B4_A4_UNORM : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_B4_A4_UNORM> class TextureDecoder_R4_G4_B4_A4_UNORM : public TextureDecoder, public SingletonClass<TextureDecoder_R4_G4_B4_A4_UNORM>
{ {
public: public:
@ -723,7 +768,6 @@ public:
} }
}; };
class TextureDecoder_R4G4B4A4_UNORM_To_RGBA8 : public TextureDecoder, public SingletonClass<TextureDecoder_R4G4B4A4_UNORM_To_RGBA8> class TextureDecoder_R4G4B4A4_UNORM_To_RGBA8 : public TextureDecoder, public SingletonClass<TextureDecoder_R4G4B4A4_UNORM_To_RGBA8>
{ {
public: public:
@ -2121,4 +2165,4 @@ public:
*(outputPixel + 2) = 0; *(outputPixel + 2) = 0;
*(outputPixel + 3) = 255; *(outputPixel + 3) = 255;
} }
}; };

View file

@ -52,20 +52,20 @@ namespace Latte
{ {
// same as E_TILEMODE but contains additional options with special meaning // same as E_TILEMODE but contains additional options with special meaning
TM_LINEAR_GENERAL = 0, TM_LINEAR_GENERAL = 0,
TM_LINEAR_ALIGNED = 1, TM_LINEAR_ALIGNED = 1,
// micro-tiled // micro-tiled
TM_1D_TILED_THIN1 = 2, TM_1D_TILED_THIN1 = 2,
TM_1D_TILED_THICK = 3, TM_1D_TILED_THICK = 3,
// macro-tiled // macro-tiled
TM_2D_TILED_THIN1 = 4, TM_2D_TILED_THIN1 = 4,
TM_2D_TILED_THIN2 = 5, TM_2D_TILED_THIN2 = 5,
TM_2D_TILED_THIN4 = 6, TM_2D_TILED_THIN4 = 6,
TM_2D_TILED_THICK = 7, TM_2D_TILED_THICK = 7,
TM_2B_TILED_THIN1 = 8, TM_2B_TILED_THIN1 = 8,
TM_2B_TILED_THIN2 = 9, TM_2B_TILED_THIN2 = 9,
TM_2B_TILED_THIN4 = 10, TM_2B_TILED_THIN4 = 10,
TM_2B_TILED_THICK = 11, TM_2B_TILED_THICK = 11,
@ -179,7 +179,7 @@ namespace Latte
HWFMT_4_4_4_4 = 0xB, HWFMT_4_4_4_4 = 0xB,
HWFMT_5_5_5_1 = 0xC, HWFMT_5_5_5_1 = 0xC,
HWFMT_32 = 0xD, HWFMT_32 = 0xD,
HWFMT_32_FLOAT = 0xE, HWFMT_32_FLOAT = 0xE,
HWFMT_16_16 = 0xF, HWFMT_16_16 = 0xF,
HWFMT_16_16_FLOAT = 0x10, HWFMT_16_16_FLOAT = 0x10,
HWFMT_8_24 = 0x11, HWFMT_8_24 = 0x11,
@ -284,7 +284,7 @@ namespace Latte
R32_G32_B32_A32_UINT = (HWFMT_32_32_32_32 | FMT_BIT_INT), R32_G32_B32_A32_UINT = (HWFMT_32_32_32_32 | FMT_BIT_INT),
R32_G32_B32_A32_SINT = (HWFMT_32_32_32_32 | FMT_BIT_INT | FMT_BIT_SIGNED), R32_G32_B32_A32_SINT = (HWFMT_32_32_32_32 | FMT_BIT_INT | FMT_BIT_SIGNED),
R32_G32_B32_A32_FLOAT = (HWFMT_32_32_32_32_FLOAT | FMT_BIT_FLOAT), R32_G32_B32_A32_FLOAT = (HWFMT_32_32_32_32_FLOAT | FMT_BIT_FLOAT),
// depth // depth
D24_S8_UNORM = (HWFMT_8_24), D24_S8_UNORM = (HWFMT_8_24),
D24_S8_FLOAT = (HWFMT_8_24 | FMT_BIT_FLOAT), D24_S8_FLOAT = (HWFMT_8_24 | FMT_BIT_FLOAT),
@ -353,7 +353,7 @@ namespace Latte
enum GPU_LIMITS enum GPU_LIMITS
{ {
NUM_VERTEX_BUFFERS = 16, NUM_VERTEX_BUFFERS = 16,
NUM_TEXTURES_PER_STAGE = 18, NUM_TEXTURES_PER_STAGE = 18,
NUM_SAMPLERS_PER_STAGE = 18, // is this 16 or 18? NUM_SAMPLERS_PER_STAGE = 18, // is this 16 or 18?
NUM_COLOR_ATTACHMENTS = 8, NUM_COLOR_ATTACHMENTS = 8,
}; };
@ -1579,7 +1579,7 @@ struct LatteContextRegister
/* +0x3A4C0 */ _LatteRegisterSetTextureUnit SQ_TEX_START_GS[Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE]; /* +0x3A4C0 */ _LatteRegisterSetTextureUnit SQ_TEX_START_GS[Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE];
uint8 padding_3A6B8[0x3C000 - 0x3A6B8]; uint8 padding_3A6B8[0x3C000 - 0x3A6B8];
/* +0x3C000 */ _LatteRegisterSetSampler SQ_TEX_SAMPLER[18 * 3]; /* +0x3C000 */ _LatteRegisterSetSampler SQ_TEX_SAMPLER[18 * 3];
/* +0x3C288 */ /* +0x3C288 */
@ -1598,6 +1598,24 @@ struct LatteContextRegister
{ {
return (uint32*)hleSpecialState; return (uint32*)hleSpecialState;
} }
bool IsRasterizationEnabled() const
{
bool rasterizationEnabled = !PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly
// for now we use a workaround:
if (!PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
// Culling both front and back faces effectively disables rasterization
uint32 cullFront = PA_SU_SC_MODE_CNTL.get_CULL_FRONT();
uint32 cullBack = PA_SU_SC_MODE_CNTL.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
return rasterizationEnabled;
}
}; };
static_assert(sizeof(LatteContextRegister) == 0x10000 * 4 + 9 * 4); static_assert(sizeof(LatteContextRegister) == 0x10000 * 4 + 9 * 4);
@ -1664,4 +1682,4 @@ static_assert(offsetof(LatteContextRegister, SQ_PGM_RESOURCES_ES) == Latte::REGA
static_assert(offsetof(LatteContextRegister, SQ_PGM_START_GS) == Latte::REGADDR::SQ_PGM_START_GS * 4); static_assert(offsetof(LatteContextRegister, SQ_PGM_START_GS) == Latte::REGADDR::SQ_PGM_START_GS * 4);
static_assert(offsetof(LatteContextRegister, SQ_PGM_RESOURCES_GS) == Latte::REGADDR::SQ_PGM_RESOURCES_GS * 4); static_assert(offsetof(LatteContextRegister, SQ_PGM_RESOURCES_GS) == Latte::REGADDR::SQ_PGM_RESOURCES_GS * 4);
static_assert(offsetof(LatteContextRegister, SPI_VS_OUT_CONFIG) == Latte::REGADDR::SPI_VS_OUT_CONFIG * 4); static_assert(offsetof(LatteContextRegister, SPI_VS_OUT_CONFIG) == Latte::REGADDR::SPI_VS_OUT_CONFIG * 4);
static_assert(offsetof(LatteContextRegister, LATTE_SPI_VS_OUT_ID_N) == Latte::REGADDR::SPI_VS_OUT_ID_0 * 4); static_assert(offsetof(LatteContextRegister, LATTE_SPI_VS_OUT_ID_N) == Latte::REGADDR::SPI_VS_OUT_ID_0 * 4);

View file

@ -381,4 +381,4 @@ void optimizedDecodeLoops(LatteTextureLoaderCtx* textureLoader, uint8* outputDat
} }
} }
} }
} }

View file

@ -323,8 +323,8 @@ bool LatteDecompiler_IsALUTransInstruction(bool isOP3, uint32 opcode)
} }
else if( opcode == ALU_OP2_INST_MOV || else if( opcode == ALU_OP2_INST_MOV ||
opcode == ALU_OP2_INST_ADD || opcode == ALU_OP2_INST_ADD ||
opcode == ALU_OP2_INST_NOP || opcode == ALU_OP2_INST_NOP ||
opcode == ALU_OP2_INST_MUL || opcode == ALU_OP2_INST_MUL ||
opcode == ALU_OP2_INST_DOT4 || opcode == ALU_OP2_INST_DOT4 ||
opcode == ALU_OP2_INST_DOT4_IEEE || opcode == ALU_OP2_INST_DOT4_IEEE ||
opcode == ALU_OP2_INST_MAX || // Not sure if MIN/MAX are non-transcendental? opcode == ALU_OP2_INST_MAX || // Not sure if MIN/MAX are non-transcendental?
@ -929,7 +929,7 @@ void LatteDecompiler_ParseTEXClause(LatteDecompilerShader* shaderContext, LatteD
texInstruction.memRead.format = dataFormat; texInstruction.memRead.format = dataFormat;
texInstruction.memRead.nfa = nfa; texInstruction.memRead.nfa = nfa;
texInstruction.memRead.isSigned = isSigned; texInstruction.memRead.isSigned = isSigned;
cfInstruction->instructionsTEX.emplace_back(texInstruction); cfInstruction->instructionsTEX.emplace_back(texInstruction);
} }
else else
@ -1068,9 +1068,16 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8
LatteDecompiler_analyzeDataTypes(shaderContext); LatteDecompiler_analyzeDataTypes(shaderContext);
// emit code // emit code
if (shaderContext->shader->hasError == false) if (shaderContext->shader->hasError == false)
LatteDecompiler_emitGLSLShader(shaderContext, shaderContext->shader); {
if (g_renderer->GetType() == RendererAPI::OpenGL || g_renderer->GetType() == RendererAPI::Vulkan)
LatteDecompiler_emitGLSLShader(shaderContext, shaderContext->shader);
#if ENABLE_METAL
else
LatteDecompiler_emitMSLShader(shaderContext, shaderContext->shader);
#endif
}
LatteDecompiler_cleanup(shaderContext); LatteDecompiler_cleanup(shaderContext);
// fast access // fast access
_LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader); _LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader);
} }

View file

@ -36,7 +36,7 @@ typedef struct
uint16 mappedIndexOffset; // index in remapped uniform array uint16 mappedIndexOffset; // index in remapped uniform array
}LatteFastAccessRemappedUniformEntry_buffer_t; }LatteFastAccessRemappedUniformEntry_buffer_t;
typedef struct typedef struct
{ {
uint32 texUnit; uint32 texUnit;
sint32 uniformLocation; sint32 uniformLocation;
@ -57,12 +57,16 @@ struct LatteDecompilerShaderResourceMapping
// texture // texture
sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS]; sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS];
// uniform buffer // uniform buffer
sint8 uniformVarsBufferBindingPoint{}; // special block for uniform registers/remapped array/custom variables sint8 uniformVarsBufferBindingPoint{-1}; // special block for uniform registers/remapped array/custom variables
sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS]; sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS];
// shader storage buffer for transform feedback (if alternative mode is used) // shader storage buffer for transform feedback (if alternative mode is used)
sint8 tfStorageBindingPoint{-1}; sint8 tfStorageBindingPoint{-1};
// attributes (vertex shader only) // attributes (vertex shader only)
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
// Metal exclusive
sint8 verticesPerInstanceBinding{-1};
sint8 indexBufferBinding{-1};
sint8 indexTypeBinding{-1};
sint32 getTextureCount() sint32 getTextureCount()
{ {
@ -179,9 +183,12 @@ struct LatteDecompilerShader
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2; std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS];
// analyzer stage (pixel outputs) // analyzer stage (pixel outputs)
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
// analyzer stage (depth output)
bool depthMask{ false };
// analyzer stage (geometry shader parameters/inputs) // analyzer stage (geometry shader parameters/inputs)
uint32 ringParameterCount{ 0 }; uint32 ringParameterCount{ 0 };
uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount
@ -198,7 +205,7 @@ struct LatteDecompilerShader
// resource mapping (binding points) // resource mapping (binding points)
LatteDecompilerShaderResourceMapping resourceMapping{}; LatteDecompilerShaderResourceMapping resourceMapping{};
// uniforms // uniforms
struct struct
{ {
sint32 loc_remapped; // uf_remappedVS/uf_remappedGS/uf_remappedPS sint32 loc_remapped; // uf_remappedVS/uf_remappedGS/uf_remappedPS
sint32 loc_uniformRegister; // uf_uniformRegisterVS/uf_uniformRegisterGS/uf_uniformRegisterPS sint32 loc_uniformRegister; // uf_uniformRegisterVS/uf_uniformRegisterGS/uf_uniformRegisterPS
@ -215,7 +222,7 @@ struct LatteDecompilerShader
sint32 uniformRangeSize; // entire size of uniform variable block sint32 uniformRangeSize; // entire size of uniform variable block
}uniform{ 0 }; }uniform{ 0 };
// fast access // fast access
struct _RemappedUniformBufferGroup struct _RemappedUniformBufferGroup
{ {
_RemappedUniformBufferGroup(uint32 _kcacheBankIdOffset) : kcacheBankIdOffset(_kcacheBankIdOffset) {}; _RemappedUniformBufferGroup(uint32 _kcacheBankIdOffset) : kcacheBankIdOffset(_kcacheBankIdOffset) {};
@ -255,14 +262,14 @@ struct LatteDecompilerOutputUniformOffsets
} }
}; };
struct LatteDecompilerOptions struct LatteDecompilerOptions
{ {
bool usesGeometryShader{ false }; bool usesGeometryShader{ false };
// floating point math // floating point math
bool strictMul{}; // if true, 0*anything=0 rule is emulated bool strictMul{}; // if true, 0*anything=0 rule is emulated
// Vulkan-specific // Vulkan-specific
bool useTFViaSSBO{ false }; bool useTFViaSSBO{ false };
struct struct
{ {
bool hasRoundingModeRTEFloat32{ false }; bool hasRoundingModeRTEFloat32{ false };
}spirvInstrinsics; }spirvInstrinsics;
@ -286,6 +293,7 @@ struct LatteDecompilerOutput_t
// mapping and binding information // mapping and binding information
LatteDecompilerShaderResourceMapping resourceMappingGL; LatteDecompilerShaderResourceMapping resourceMappingGL;
LatteDecompilerShaderResourceMapping resourceMappingVK; LatteDecompilerShaderResourceMapping resourceMappingVK;
LatteDecompilerShaderResourceMapping resourceMappingMTL;
}; };
struct LatteDecompilerSubroutineInfo; struct LatteDecompilerSubroutineInfo;
@ -322,4 +330,4 @@ struct LatteParsedGSCopyShader
}; };
LatteParsedGSCopyShader* LatteGSCopyShaderParser_parse(uint8* programData, uint32 programSize); LatteParsedGSCopyShader* LatteGSCopyShaderParser_parse(uint8* programData, uint32 programSize);
bool LatteGSCopyShaderParser_getExportTypeByOffset(LatteParsedGSCopyShader* shaderContext, uint32 offset, uint32* exportType, uint32* exportParam); bool LatteGSCopyShaderParser_getExportTypeByOffset(LatteParsedGSCopyShader* shaderContext, uint32 offset, uint32* exportType, uint32* exportParam);

View file

@ -8,6 +8,14 @@
#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Common/MemPtr.h"
#include "HW/Latte/ISA/LatteReg.h"
#if ENABLE_METAL
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#endif
// Defined in LatteTextureLegacy.cpp
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);
/* /*
* Return index of used color attachment based on shader pixel export index (0-7) * Return index of used color attachment based on shader pixel export index (0-7)
@ -289,15 +297,15 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
LatteDecompilerShader* shader = shaderContext->shader; LatteDecompilerShader* shader = shaderContext->shader;
for(auto& texInstruction : cfInstruction->instructionsTEX) for(auto& texInstruction : cfInstruction->instructionsTEX)
{ {
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ ||
texInstruction.opcode == GPU7_TEX_INST_FETCH4 || texInstruction.opcode == GPU7_TEX_INST_FETCH4 ||
texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G ||
texInstruction.opcode == GPU7_TEX_INST_LD ) texInstruction.opcode == GPU7_TEX_INST_LD )
{ {
if (texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS) if (texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS)
@ -315,7 +323,7 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] = texInstruction.textureFetch.samplerIndex; shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] = texInstruction.textureFetch.samplerIndex;
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ) if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ)
shader->textureUsesDepthCompare[texInstruction.textureFetch.textureIndex] = true; shader->textureUsesDepthCompare[texInstruction.textureFetch.textureIndex] = true;
bool useTexelCoords = false; bool useTexelCoords = false;
if (texInstruction.opcode == GPU7_TEX_INST_SAMPLE && (texInstruction.textureFetch.unnormalized[0] && texInstruction.textureFetch.unnormalized[1] && texInstruction.textureFetch.unnormalized[2] && texInstruction.textureFetch.unnormalized[3])) if (texInstruction.opcode == GPU7_TEX_INST_SAMPLE && (texInstruction.textureFetch.unnormalized[0] && texInstruction.textureFetch.unnormalized[1] && texInstruction.textureFetch.unnormalized[2] && texInstruction.textureFetch.unnormalized[3]))
useTexelCoords = true; useTexelCoords = true;
@ -384,7 +392,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
LatteDecompilerShader* shader = shaderContext->shader; LatteDecompilerShader* shader = shaderContext->shader;
if( shader->shaderType == LatteConst::ShaderType::Pixel ) if( shader->shaderType == LatteConst::ShaderType::Pixel )
{ {
if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 ) if (cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8)
{ {
// remember color outputs that are written // remember color outputs that are written
for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++) for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++)
@ -393,9 +401,10 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
shader->pixelColorOutputMask |= (1<<colorOutputIndex); shader->pixelColorOutputMask |= (1<<colorOutputIndex);
} }
} }
else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 ) else if (cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61)
{ {
// writes pixel depth if (LatteMRT::GetActiveDepthBufferMask(*shaderContext->contextRegistersNew))
shader->depthMask = true;
} }
else else
debugBreakpoint(); debugBreakpoint();
@ -421,7 +430,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext,
void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderContext, uint32 cfAddr) void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderContext, uint32 cfAddr)
{ {
// analyze CF and clauses up to RET statement // analyze CF and clauses up to RET statement
// todo - find cfInstruction index from cfAddr // todo - find cfInstruction index from cfAddr
cemu_assert_debug(false); cemu_assert_debug(false);
@ -500,6 +509,18 @@ namespace LatteDecompiler
} }
} }
void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext)
{
// for Vulkan we use consecutive indices
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!decompilerContext->output->textureUnitMask[i] || decompilerContext->shader->textureRenderTargetIndex[i] != 255)
continue;
decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL;
decompilerContext->currentTextureBindingPointMTL++;
}
}
void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext) void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext)
{ {
decompilerContext->hasUniformVarBlock = false; decompilerContext->hasUniformVarBlock = false;
@ -507,9 +528,9 @@ namespace LatteDecompiler
decompilerContext->hasUniformVarBlock = true; decompilerContext->hasUniformVarBlock = true;
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE) else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
decompilerContext->hasUniformVarBlock = true; decompilerContext->hasUniformVarBlock = true;
bool hasAnyViewportScaleDisabled = bool hasAnyViewportScaleDisabled =
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() || !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() || !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA(); !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
// we currently only support all on/off. Individual component scaling is not supported // we currently only support all on/off. Individual component scaling is not supported
@ -537,6 +558,13 @@ namespace LatteDecompiler
{ {
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase* decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
} }
if (g_renderer->GetType() == RendererAPI::Metal)
{
bool usesGeometryShader = UseGeometryShader(*decompilerContext->contextRegistersNew, decompilerContext->options->usesGeometryShader);
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && usesGeometryShader)
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance
}
} }
void _initUniformBindingPoints(LatteDecompilerShaderContext* decompilerContext) void _initUniformBindingPoints(LatteDecompilerShaderContext* decompilerContext)
@ -554,14 +582,13 @@ namespace LatteDecompiler
} }
} }
// assign binding point to uniform var block // assign binding point to uniform var block
decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block
if (decompilerContext->hasUniformVarBlock) if (decompilerContext->hasUniformVarBlock)
{ {
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++; decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
} }
else
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1;
// assign binding points to uniform buffers // assign binding points to uniform buffers
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{ {
@ -580,6 +607,8 @@ namespace LatteDecompiler
decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK; decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++; decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
} }
// for OpenGL we use the relative buffer index // for OpenGL we use the relative buffer index
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
@ -601,6 +630,8 @@ namespace LatteDecompiler
{ {
decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK;
decompilerContext->currentBindingPointVK++; decompilerContext->currentBindingPointVK++;
decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL;
decompilerContext->currentBufferBindingPointMTL++;
} }
} }
@ -617,6 +648,7 @@ namespace LatteDecompiler
{ {
decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex; decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex;
decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex; decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex;
decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex;
bindingIndex++; bindingIndex++;
} }
} }
@ -805,7 +837,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++) for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
{ {
if (!shaderContext->output->textureUnitMask[i]) if (!shaderContext->output->textureUnitMask[i])
{ {
// texture unit not used // texture unit not used
shader->textureUnitDim[i] = (Latte::E_DIM)0xFF; shader->textureUnitDim[i] = (Latte::E_DIM)0xFF;
@ -827,6 +859,81 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i; shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++; shader->textureUnitListCount++;
} }
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
struct {
sint32 index;
MPTR physAddr;
Latte::E_GX2SURFFMT format;
Latte::E_HWTILEMODE tileMode;
} colorBuffers[LATTE_NUM_COLOR_TARGET]{};
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
sint32 colorBufferCount = 0;
for (sint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto& colorBuffer = colorBuffers[colorBufferCount];
if (((colorBufferMask) & (1 << i)) == 0)
continue; // color buffer not enabled
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + i);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE];
MPTR colorBufferPhysMem = regColorBufferBase;
Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF);
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(i, *shaderContext->contextRegistersNew);
colorBuffer = {i, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode};
colorBufferCount++;
}
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];
// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data
auto tileMode = texRegister.word0.get_TILE_MODE();
// Check for dimension
auto dim = shader->textureUnitDim[textureIndex];
// TODO: 2D arrays could technically be supported as well
if (dim != Latte::E_DIM::DIM_2D)
continue;
// Check for mip level
// TODO: uncomment?
/*
auto lastMip = texRegister.word5.get_LAST_LEVEL();
// TODO: multiple mip levels could technically be supported as well
if (lastMip != 0)
continue;
*/
Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);
// Check if the texture is used as render target
for (sint32 j = 0; j < colorBufferCount; j++)
{
const auto& colorBuffer = colorBuffers[j];
if (physAddr == colorBuffer.physAddr && format == colorBuffer.format && tileMode == colorBuffer.tileMode)
{
shader->textureRenderTargetIndex[textureIndex] = colorBuffer.index;
break;
}
}
}
} }
// for geometry shaders check the copy shader for stream writes // for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false) if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
@ -1002,6 +1109,10 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shaderContext->output->resourceMappingVK.setIndex = 2; shaderContext->output->resourceMappingVK.setIndex = 2;
LatteDecompiler::_initTextureBindingPointsGL(shaderContext); LatteDecompiler::_initTextureBindingPointsGL(shaderContext);
LatteDecompiler::_initTextureBindingPointsVK(shaderContext); LatteDecompiler::_initTextureBindingPointsVK(shaderContext);
LatteDecompiler::_initTextureBindingPointsMTL(shaderContext);
LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext);
shaderContext->output->resourceMappingMTL.verticesPerInstanceBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
} }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,511 @@
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/Latte.h"
#include "Cafe/HW/Latte/Core/LatteDraw.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "util/helpers/StringBuf.h"
#define _CRLF "\r\n"
static void _readLittleEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
}
static void _readBigEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
src->add("attrDecoder = (attrDecoder>>24)|((attrDecoder>>8)&0xFF00)|((attrDecoder<<8)&0xFF0000)|((attrDecoder<<24));" _CRLF);
}
static void _readBigEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xyz = in.attrDataSem{}.xyz;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xyz = (attrDecoder.xyz>>24)|((attrDecoder.xyz>>8)&0xFF00)|((attrDecoder.xyz<<8)&0xFF0000)|((attrDecoder.xyz<<24));" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xy = (attrDecoder.xy>>24)|((attrDecoder.xy>>8)&0xFF00)|((attrDecoder.xy<<8)&0xFF0000)|((attrDecoder.xy<<24));" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.x = in.attrDataSem{}.x;" _CRLF, attributeInputIndex);
src->add("attrDecoder.x = (attrDecoder.x>>24)|((attrDecoder.x>>8)&0xFF00)|((attrDecoder.x<<8)&0xFF0000)|((attrDecoder.x<<24));" _CRLF);
src->add("attrDecoder.y = 0;" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.x = ((attrDecoder.x>>8)&0xFF)|((attrDecoder.x<<8)&0xFF00);" _CRLF);
src->add("attrDecoder.y = 0;" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xy = ((attrDecoder.xy>>8)&0xFF)|((attrDecoder.xy<<8)&0xFF00);" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("attrDecoder = ((attrDecoder>>8)&0xFF)|((attrDecoder<<8)&0xFF00);" _CRLF);
}
void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib)
{
if (attrib->attributeBufferIndex >= Latte::GPU_LIMITS::NUM_VERTEX_BUFFERS)
{
src->add("attrDecoder = int4(0);" _CRLF);
return;
}
uint32 attributeInputIndex = attrib->semanticId;
if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_U32 )
{
if( attrib->format == FMT_32_32_32_32_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_32_32_32_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_32_32_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_32_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_2_10_10_10 && attrib->nfa == 0 )
{
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
// Bayonetta 2 uses this format to store normals
src->add("attrDecoder.xyzw = uint4((attrDecoder.x>>0)&0x3FF,(attrDecoder.x>>10)&0x3FF,(attrDecoder.x>>20)&0x3FF,(attrDecoder.x>>30)&0x3);" _CRLF);
if (attrib->isSigned != 0)
{
src->add("if( (attrDecoder.x&0x200) != 0 ) attrDecoder.x |= 0xFFFFFC00;" _CRLF);
src->add("if( (attrDecoder.y&0x200) != 0 ) attrDecoder.y |= 0xFFFFFC00;" _CRLF);
src->add("if( (attrDecoder.z&0x200) != 0 ) attrDecoder.z |= 0xFFFFFC00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/511.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/511.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/511.0,-1.0));" _CRLF);
}
else
{
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/1023.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/1023.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/1023.0,-1.0));" _CRLF);
}
src->add("attrDecoder.w = as_type<uint>(float(attrDecoder.w));" _CRLF); // unsure?
}
else if( attrib->format == FMT_32_32_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_32_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_32_32 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 0)
{
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 1)
{
// we can just read the signed s32 as a u32 since no sign-extension is necessary
_readBigEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/127.0,-1.0));" _CRLF);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0)
{
// seen in Ben 10 Omniverse
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.wzyx));" _CRLF, attributeInputIndex);
}
else
{
cemuLog_log(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}\n", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
cemu_assert_unimplemented();
}
}
else if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_NONE )
{
if( attrib->format == FMT_32_32_32_32_FLOAT && attrib->nfa == 2 )
{
_readLittleEndianAttributeU32x4(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_32_32_32_FLOAT && attrib->nfa == 2)
{
_readLittleEndianAttributeU32x3(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_32_32_FLOAT && attrib->nfa == 2)
{
// seen in Cities of Gold
_readLittleEndianAttributeU32x2(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_32 && attrib->nfa == 1 && attrib->isSigned == 0)
{
// seen in Nano Assault Neo
_readLittleEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_2_10_10_10 && attrib->nfa == 0 && attrib->isSigned == 0)
{
// seen in Fast Racing Neo
_readLittleEndianAttributeU32x1(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xyzw = uint4((attrDecoder.x>>0)&0x3FF,(attrDecoder.x>>10)&0x3FF,(attrDecoder.x>>20)&0x3FF,(attrDecoder.x>>30)&0x3);" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/1023.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/1023.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/1023.0,-1.0));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(float(attrDecoder.w));" _CRLF); // todo - is this correct?
}
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
{
// seen in CoD ghosts
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/32767.0,-1.0));" _CRLF);
}
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 2 && attrib->isSigned == 1 )
{
// seen in Rabbids Land
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.xyzw = as_type<uint4>(float4(int4(attrDecoder)));" _CRLF);
}
else if (attrib->format == FMT_16_16_16_16_FLOAT && attrib->nfa == 2)
{
// seen in Giana Sisters: Twisted Dreams
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
// TODO: uint4?
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
}
else if (attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
{
// seen in Nano Assault Neo
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
}
else if (attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2)
{
// seen in Giana Sisters: Twisted Dreams
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
{
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/127.0,-1.0));" _CRLF);
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0)
{
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned != 0)
{
// seen in Sonic Lost World
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.w&0x80) != 0 ) attrDecoder.w |= 0xFFFFFF00;" _CRLF);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0 )
{
// seen in One Piece
// TODO: uint4?
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.xyzw));" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned == 0)
{
if( (attrib->offset&3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL )
{
// AMD workaround
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else
{
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
}
else if (attrib->format == FMT_8_8 && attrib->nfa == 2 && attrib->isSigned == 0)
{
// seen in BotW
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw));" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else
{
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy));" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
}
else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned != 0)
{
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xy = in.attrDataSem{}.zw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else
{
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/127.0,-1.0));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
}
else if (attrib->format == FMT_8_8 && attrib->nfa == 1 && attrib->isSigned == 0)
{
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex);
}
else
{
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
}
else if( attrib->format == FMT_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
// seen in Pikmin 3
src->addFmt("attrDecoder.x = as_type<uint>(float(in.attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.yzw = uint3(0);" _CRLF);
}
else if( attrib->format == FMT_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
}
else
{
cemuLog_log(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}\n", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
cemu_assert_debug(false);
}
}
else if( attrib->endianSwap == LatteConst::VertexFetchEndianMode::SWAP_U16 )
{
if( attrib->format == FMT_16_16_16_16_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
// TODO: uint4?
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
}
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
{
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(max(float(int(attrDecoder.z))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(max(float(int(attrDecoder.w))/32767.0,-1.0));" _CRLF);
}
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned == 0)
{
// seen in BotW
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x))/65535.0);" _CRLF);
src->add("attrDecoder.y = as_type<uint>(float(int(attrDecoder.y))/65535.0);" _CRLF);
src->add("attrDecoder.z = as_type<uint>(float(int(attrDecoder.z))/65535.0);" _CRLF);
src->add("attrDecoder.w = as_type<uint>(float(int(attrDecoder.w))/65535.0);" _CRLF);
}
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 2 && attrib->isSigned != 0 )
{
// seen in Minecraft Wii U Edition
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x)));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(float(int(attrDecoder.y)));" _CRLF);
src->add("attrDecoder.z = as_type<uint>(float(int(attrDecoder.z)));" _CRLF);
src->add("attrDecoder.w = as_type<uint>(float(int(attrDecoder.w)));" _CRLF);
}
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 1 && attrib->isSigned != 0 )
{
// seen in Minecraft Wii U Edition
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.z&0x8000) != 0 ) attrDecoder.z |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.w&0x8000) != 0 ) attrDecoder.w |= 0xFFFF0000;" _CRLF);
}
else if( attrib->format == FMT_16_16_16_16 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(float2(float(attrDecoder.x), float(attrDecoder.y))/65535.0);" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.y = as_type<uint>(max(float(int(attrDecoder.y))/32767.0,-1.0));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 1 && attrib->isSigned != 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 2 && attrib->isSigned == 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(float2(float(attrDecoder.x), float(attrDecoder.y)));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 2 && attrib->isSigned != 0 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("if( (attrDecoder.x&0x8000) != 0 ) attrDecoder.x |= 0xFFFF0000;" _CRLF);
src->add("if( (attrDecoder.y&0x8000) != 0 ) attrDecoder.y |= 0xFFFF0000;" _CRLF);
src->add("attrDecoder.xy = as_type<uint2>(float2(float(int(attrDecoder.x)), float(int(attrDecoder.y))));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if (attrib->format == FMT_16 && attrib->nfa == 1 && attrib->isSigned == 0)
{
_readBigEndianAttributeU16x1(shaderContext, src, attributeInputIndex);
}
else if (attrib->format == FMT_16 && attrib->nfa == 0 && attrib->isSigned == 0)
{
// seen in CoD ghosts
_readBigEndianAttributeU16x1(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.x = as_type<uint>(float(int(attrDecoder.x))/65535.0);" _CRLF);
}
else
{
cemuLog_logDebug(LogType::Force, "_emitAttributeDecode(): Unsupported fmt {:02x} nfa {} signed {} endian {}", attrib->format, attrib->nfa, attrib->isSigned, attrib->endianSwap);
}
}
else
{
cemu_assert_debug(false);
}
}

View file

@ -0,0 +1,554 @@
#pragma once
#include "Common/precompiled.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
namespace LatteDecompiler
{
static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext, bool usesGeometryShader)
{
auto src = decompilerContext->shaderSource;
auto& uniformOffsets = decompilerContext->output->uniformOffsetsVK;
src->add("struct SupportBuffer {" _CRLF);
sint32 uniformCurrentOffset = 0;
auto shader = decompilerContext->shader;
auto shaderType = decompilerContext->shader->shaderType;
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED)
{
// uniform registers or buffers are accessed statically with predictable offsets
// this allows us to remap the used entries into a more compact array
src->addFmt("int4 remapped[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
uniformOffsets.offset_remapped = uniformCurrentOffset;
uniformCurrentOffset += 16 * shader->list_remappedUniformEntries.size();
}
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
{
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
// full or partial uniform register file has to be present
src->addFmt("int4 uniformRegister[{}];" _CRLF, cfileSize);
uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
uniformOffsets.count_uniformRegister = cfileSize;
uniformCurrentOffset += 16 * cfileSize;
}
// special uniforms
bool hasAnyViewportScaleDisabled =
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
!decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && hasAnyViewportScaleDisabled)
{
// aka GX2 special state 0
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
src->add("float2 windowSpaceToClipSpaceTransform;" _CRLF);
uniformOffsets.offset_windowSpaceToClipSpaceTransform = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
bool alphaTestEnable = decompilerContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE();
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable)
{
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
src->add("float alphaTestRef;" _CRLF);
uniformOffsets.offset_alphaTestRef = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
{
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
src->add("float pointSize;" _CRLF);
uniformOffsets.offset_pointSize = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
}
// define fragCoordScale which holds the xy scale for render target resolution vs effective resolution
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
src->add("float2 fragCoordScale;" _CRLF);
uniformOffsets.offset_fragCoordScale = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
// provide scale factor for every texture that is accessed via texel coordinates (texelFetch)
for (sint32 t = 0; t < LATTE_NUM_MAX_TEX_UNITS; t++)
{
if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false)
continue;
uniformCurrentOffset = (uniformCurrentOffset + 7) & ~7;
src->addFmt("float2 tex{}Scale;" _CRLF, t);
uniformOffsets.offset_texScale[t] = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
// define verticesPerInstance + streamoutBufferBaseX
if ((shader->shaderType == LatteConst::ShaderType::Vertex &&
usesGeometryShader) ||
(decompilerContext->analyzer.useSSBOForStreamout &&
(shader->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
(shader->shaderType == LatteConst::ShaderType::Geometry)))
{
src->add("int verticesPerInstance;" _CRLF);
uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset;
uniformCurrentOffset += 4;
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if (decompilerContext->output->streamoutBufferWriteMask[i])
{
src->addFmt("int streamoutBufferBase{};" _CRLF, i);
uniformOffsets.offset_streamoutBufferBase[i] = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
}
}
src->add("};" _CRLF _CRLF);
uniformOffsets.offset_endOfBlock = uniformCurrentOffset;
}
static void _emitUniformBuffers(LatteDecompilerShaderContext* decompilerContext)
{
auto shaderSrc = decompilerContext->shaderSource;
// uniform buffer definition
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue;
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
shaderSrc->addFmt("struct UBuff{} {{" _CRLF, i);
shaderSrc->addFmt("float4 d[{}];" _CRLF, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
shaderSrc->add("};" _CRLF _CRLF);
}
}
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED)
{
// already generated in _emitUniformVariables
}
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
{
// already generated in _emitUniformVariables
}
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_NONE)
{
// no uniforms used
}
else
{
cemu_assert_debug(false);
}
}
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
std::string attributeNames;
if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex)
{
src->add("struct VertexIn {" _CRLF);
// attribute inputs
for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++)
{
if (decompilerContext->analyzer.inputAttributSemanticMask[i])
{
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0);
src->addFmt("uint4 attrDataSem{}", i);
if (fetchVertexManually)
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
else
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]);
src->add(";" _CRLF);
}
}
src->add("};" _CRLF _CRLF);
}
src->addFmt("{}", attributeNames);
}
static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext, bool isRectVertexShader)
{
auto* src = shaderContext->shaderSource;
src->add("struct VertexOut {" _CRLF);
src->add("float4 position [[position]] [[invariant]];" _CRLF);
if (shaderContext->analyzer.outputPointSize)
src->add("float pointSize [[point_size]];" _CRLF);
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
auto parameterMask = shaderContext->shader->outputParameterMask;
bool psInputsWritten[GPU7_PS_MAX_INPUTS] = {false};
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask&(1 << i)) == 0)
continue;
uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, i);
if (vsSemanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
continue;
// get import based on semanticId
sint32 psInputIndex = -1;
for (sint32 f = 0; f < psInputTable->count; f++)
{
if (psInputTable->import[f].semanticId == vsSemanticId)
{
psInputIndex = f;
break;
}
}
if (psInputIndex == -1)
continue; // no ps input
psInputsWritten[psInputIndex] = true;
src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId);
if (!isRectVertexShader)
{
src->addFmt(" [[user(locn{})]]", psInputIndex);
if (psInputTable->import[psInputIndex].isFlat)
src->add(" [[flat]]");
if (psInputTable->import[psInputIndex].isNoPerspective)
src->add(" [[center_no_perspective]]");
}
src->addFmt(";" _CRLF);
}
// TODO: handle this in the fragment shader instead?
// Declare all PS inputs that are not written by the VS
for (uint32 i = 0; i < psInputTable->count; i++)
{
if (psInputsWritten[i])
continue;
if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
continue;
src->addFmt("float4 unknown{} [[user(locn{})]];" _CRLF, psInputTable->import[i].semanticId, i);
}
src->add("};" _CRLF _CRLF);
if (isRectVertexShader)
{
src->add("struct ObjectPayload {" _CRLF);
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
src->add("};" _CRLF _CRLF);
}
}
static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext)
{
auto* src = shaderContext->shaderSource;
src->add("#define GET_FRAGCOORD() float4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF);
src->add("struct FragmentIn {" _CRLF);
src->add("float4 position [[position]];" _CRLF);
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
for (sint32 i = 0; i < psInputTable->count; i++)
{
if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX)
continue;
src->addFmt("float4 passParameterSem{}", psInputTable->import[i].semanticId);
src->addFmt(" [[user(locn{})]]", i);
if (psInputTable->import[i].isFlat)
src->add(" [[flat]]");
if (psInputTable->import[i].isNoPerspective)
src->add(" [[center_no_perspective]]");
src->add(";" _CRLF);
}
src->add("};" _CRLF _CRLF);
}
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{
_emitAttributes(decompilerContext, fetchVertexManually);
}
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
{
_emitPSInputs(decompilerContext);
src->add("struct FragmentOut {" _CRLF);
// generate pixel outputs for pixel shader
for (uint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
{
auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew);
if (dataType != MetalDataType::NONE)
{
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i);
}
}
}
// generate depth output for pixel shader
if (decompilerContext->shader->depthMask)
src->add("float passDepth [[depth(any)]];" _CRLF);
src->add("};" _CRLF _CRLF);
}
if (!usesGeometryShader || isRectVertexShader)
{
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && rasterizationEnabled)
_emitVSOutputs(decompilerContext, isRectVertexShader);
}
else
{
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
src->add("struct VertexOut {" _CRLF);
uint32 ringParameterCountVS2GS = 0;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount;
}
else
{
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage;
}
for (uint32 f = 0; f < ringParameterCountVS2GS; f++)
src->addFmt("int4 passParameterSem{};" _CRLF, f);
src->add("};" _CRLF _CRLF);
src->add("struct ObjectPayload {" _CRLF);
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
src->add("};" _CRLF _CRLF);
}
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
// parameters shared between geometry and pixel shader
uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF;
if ((ringItemSize & 0xF) != 0)
debugBreakpoint();
if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0)
debugBreakpoint();
src->add("struct GeometryOut {" _CRLF);
src->add("float4 position [[position]];" _CRLF);
for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++)
{
if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2)
continue;
src->addFmt("float4 passParameterSem{} [[user(locn{})]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F);
}
src->add("};" _CRLF _CRLF);
const uint32 MAX_VERTEX_COUNT = 32;
// Define the mesh shader output type
src->addFmt("using MeshType = mesh<GeometryOut, void, {}, GET_PRIMITIVE_COUNT({}), topology::MTL_PRIMITIVE_TYPE>;" _CRLF, MAX_VERTEX_COUNT, MAX_VERTEX_COUNT);
}
}
}
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if (usesGeometryShader && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
{
LattePrimitiveMode vsOutPrimType = decompilerContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
src->addFmt("#define VERTICES_PER_VERTEX_PRIMITIVE {}" _CRLF, GetVerticesPerPrimitive(vsOutPrimType));
uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
switch (gsOutPrimType)
{
case 0: // Point
src->add("#define MTL_PRIMITIVE_TYPE point" _CRLF);
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount / 1)" _CRLF);
break;
case 1: // Line strip
src->add("#define MTL_PRIMITIVE_TYPE line" _CRLF);
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 1)" _CRLF);
break;
case 2: // Triangle strip
src->add("#define MTL_PRIMITIVE_TYPE triangle" _CRLF);
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 2)" _CRLF);
break;
default:
cemuLog_log(LogType::Force, "Unknown geometry out primitive type {}", gsOutPrimType);
break;
}
}
}
if (decompilerContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
src->add("#define SET_POSITION(_v) out.position = _v" _CRLF);
else
src->add("#define SET_POSITION(_v) out.position = _v; out.position.z = (out.position.z + out.position.w) / 2.0" _CRLF);
const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled();
if(dump_shaders_enabled)
decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF);
// uniform variables
_emitUniformVariables(decompilerContext, usesGeometryShader);
// uniform buffers
_emitUniformBuffers(decompilerContext);
// inputs and outputs
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, usesGeometryShader, fetchVertexManually, rasterizationEnabled);
if (dump_shaders_enabled)
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
}
static void _emitUniformBufferDefinitions(LatteDecompilerShaderContext* decompilerContext)
{
auto src = decompilerContext->shaderSource;
// uniform buffer definition
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue;
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i]);
}
}
}
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
{
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
auto src = shaderContext->shaderSource;
// texture sampler definition
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
{
if (!shaderContext->output->textureUnitMask[i])
continue;
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
{
if (!renderTargetIndexUsed[renderTargetIndex])
{
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
renderTargetIndexUsed[renderTargetIndex] = true;
}
}
else
{
src->add(", ");
// Only certain texture dimensions can be used with comparison samplers
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
src->add("depth");
else
src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
{
// integer samplers
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<uint>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<uint>");
else
cemu_assert_unimplemented();
}
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("3d<float>");
else
{
cemu_assert_unimplemented();
}
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
//uint32 samplerBinding = textureBinding % 16;
src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
}
}
}
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool usesGeometryShader, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
switch (decompilerContext->shaderType)
{
case LatteConst::ShaderType::Vertex:
if (usesGeometryShader)
{
src->add("object_data ObjectPayload& objectPayload [[payload]]");
src->add(", mesh_grid_properties meshGridProperties");
src->add(", uint tig [[threadgroup_position_in_grid]]");
src->add(", uint tid [[thread_index_in_threadgroup]]");
// TODO: only include index buffer if needed
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: put into the support buffer?
src->addFmt(", constant uchar& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
}
else
{
// TODO: only include these if needed?
src->add("uint vid [[vertex_id]]");
src->add(", uint iid [[instance_id]]");
}
if (fetchVertexManually)
src->add(" VERTEX_BUFFER_DEFINITIONS");
else
src->add(", VertexIn in [[stage_in]]");
break;
case LatteConst::ShaderType::Geometry:
src->add("MeshType mesh");
src->add(", const object_data ObjectPayload& objectPayload [[payload]]");
break;
case LatteConst::ShaderType::Pixel:
src->add("FragmentIn in [[stage_in]]");
// TODO: only include these if needed?
src->add(", float2 pointCoord [[point_coord]]");
src->add(", bool frontFacing [[front_facing]]");
break;
default:
break;
}
if (decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint >= 0)
src->addFmt(", constant SupportBuffer& supportBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint);
// streamout buffer (transform feedback)
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint);
}
// uniform buffers
_emitUniformBufferDefinitions(decompilerContext);
// textures
_emitTextureDefinitions(decompilerContext);
}
}

View file

@ -47,7 +47,7 @@ struct LatteDecompilerTEXInstruction
sint32 dstGpr; sint32 dstGpr;
sint8 dstSel[4]; sint8 dstSel[4];
// texture fetch // texture fetch
struct struct
{ {
sint32 textureIndex{}; sint32 textureIndex{};
sint32 samplerIndex{}; sint32 samplerIndex{};
@ -216,7 +216,7 @@ struct LatteDecompilerShaderContext
bool genIntReg; // if set, generate R*i register variables bool genIntReg; // if set, generate R*i register variables
bool useArrayGPRs; // if set, an array is used to represent GPRs instead of individual variables bool useArrayGPRs; // if set, an array is used to represent GPRs instead of individual variables
}typeTracker; }typeTracker;
// analyzer // analyzer
struct struct
{ {
// general // general
@ -260,6 +260,8 @@ struct LatteDecompilerShaderContext
// emitter // emitter
bool hasUniformVarBlock; bool hasUniformVarBlock;
sint32 currentBindingPointVK{}; sint32 currentBindingPointVK{};
sint32 currentBufferBindingPointMTL{};
sint32 currentTextureBindingPointMTL{};
struct ALUClauseTemporariesState* aluPVPSState{nullptr}; struct ALUClauseTemporariesState* aluPVPSState{nullptr};
// misc // misc
std::vector<LatteDecompilerSubroutineInfo> list_subroutines; std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
@ -268,9 +270,10 @@ struct LatteDecompilerShaderContext
void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader); void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
void LatteDecompiler_analyzeDataTypes(LatteDecompilerShaderContext* shaderContext); void LatteDecompiler_analyzeDataTypes(LatteDecompilerShaderContext* shaderContext);
void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader); void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader);
void LatteDecompiler_cleanup(LatteDecompilerShaderContext* shaderContext); void LatteDecompiler_cleanup(LatteDecompilerShaderContext* shaderContext);
// helper functions // helper functions
sint32 LatteDecompiler_getColorOutputIndexFromExportIndex(LatteDecompilerShaderContext* shaderContext, sint32 exportIndex); sint32 LatteDecompiler_getColorOutputIndexFromExportIndex(LatteDecompilerShaderContext* shaderContext, sint32 exportIndex);

View file

@ -0,0 +1,64 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
CachedFBOMtl::CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key) : LatteCachedFBO(key)
{
m_renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
bool hasAttachment = false;
for (int i = 0; i < 8; ++i)
{
const auto& buffer = colorBuffer[i];
auto textureView = (LatteTextureViewMtl*)buffer.texture;
if (!textureView)
{
continue;
}
auto colorAttachment = m_renderPassDescriptor->colorAttachments()->object(i);
colorAttachment->setTexture(textureView->GetRGBAView());
colorAttachment->setLoadAction(MTL::LoadActionLoad);
colorAttachment->setStoreAction(MTL::StoreActionStore);
hasAttachment = true;
}
// setup depth attachment
if (depthBuffer.texture)
{
auto textureView = static_cast<LatteTextureViewMtl*>(depthBuffer.texture);
auto depthAttachment = m_renderPassDescriptor->depthAttachment();
depthAttachment->setTexture(textureView->GetRGBAView());
depthAttachment->setLoadAction(MTL::LoadActionLoad);
depthAttachment->setStoreAction(MTL::StoreActionStore);
// setup stencil attachment
if (depthBuffer.hasStencil && GetMtlPixelFormatInfo(depthBuffer.texture->format, true).hasStencil)
{
auto stencilAttachment = m_renderPassDescriptor->stencilAttachment();
stencilAttachment->setTexture(textureView->GetRGBAView());
stencilAttachment->setLoadAction(MTL::LoadActionLoad);
stencilAttachment->setStoreAction(MTL::StoreActionStore);
}
hasAttachment = true;
}
// HACK: setup a dummy color attachment to prevent Metal from discarding draws for stremout draws in Super Smash Bros. for Wii U (works fine on MoltenVK without this hack though)
if (!hasAttachment)
{
auto colorAttachment = m_renderPassDescriptor->colorAttachments()->object(0);
colorAttachment->setTexture(metalRenderer->GetNullTexture2D());
colorAttachment->setLoadAction(MTL::LoadActionDontCare);
colorAttachment->setStoreAction(MTL::StoreActionDontCare);
}
// Visibility buffer
m_renderPassDescriptor->setVisibilityResultBuffer(metalRenderer->GetOcclusionQueryResultBuffer());
}
CachedFBOMtl::~CachedFBOMtl()
{
m_renderPassDescriptor->release();
}

View file

@ -0,0 +1,22 @@
#pragma once
#include <Metal/Metal.hpp>
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "Cafe/HW/Latte/Core/LatteCachedFBO.h"
class CachedFBOMtl : public LatteCachedFBO
{
public:
CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key);
~CachedFBOMtl();
MTL::RenderPassDescriptor* GetRenderPassDescriptor()
{
return m_renderPassDescriptor;
}
private:
MTL::RenderPassDescriptor* m_renderPassDescriptor = nullptr;
};

View file

@ -0,0 +1,107 @@
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle,
Latte::E_HWTILEMODE tileMode, bool isDepth)
: LatteTexture(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth), m_mtlr(mtlRenderer)
{
NS_STACK_SCOPED MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
desc->setStorageMode(MTL::StorageModePrivate);
//desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined);
sint32 effectiveBaseWidth = width;
sint32 effectiveBaseHeight = height;
sint32 effectiveBaseDepth = depth;
if (overwriteInfo.hasResolutionOverwrite)
{
effectiveBaseWidth = overwriteInfo.width;
effectiveBaseHeight = overwriteInfo.height;
effectiveBaseDepth = overwriteInfo.depth;
}
effectiveBaseWidth = std::max(1, effectiveBaseWidth);
effectiveBaseHeight = std::max(1, effectiveBaseHeight);
effectiveBaseDepth = std::max(1, effectiveBaseDepth);
MTL::TextureType textureType;
switch (dim)
{
case Latte::E_DIM::DIM_1D:
textureType = MTL::TextureType1D;
effectiveBaseHeight = 1;
break;
case Latte::E_DIM::DIM_2D:
case Latte::E_DIM::DIM_2D_MSAA:
textureType = MTL::TextureType2D;
break;
case Latte::E_DIM::DIM_2D_ARRAY:
textureType = MTL::TextureType2DArray;
break;
case Latte::E_DIM::DIM_3D:
textureType = MTL::TextureType3D;
break;
case Latte::E_DIM::DIM_CUBEMAP:
cemu_assert_debug(effectiveBaseDepth % 6 == 0 && "cubemaps must have an array length multiple of 6");
textureType = MTL::TextureTypeCubeArray;
break;
default:
cemu_assert_unimplemented();
textureType = MTL::TextureType2D;
break;
}
desc->setTextureType(textureType);
// Clamp mip levels
mipLevels = std::min(mipLevels, (uint32)maxPossibleMipLevels);
mipLevels = std::max(mipLevels, (uint32)1);
desc->setWidth(effectiveBaseWidth);
desc->setHeight(effectiveBaseHeight);
desc->setMipmapLevelCount(mipLevels);
if (textureType == MTL::TextureType3D)
{
desc->setDepth(effectiveBaseDepth);
}
else if (textureType == MTL::TextureTypeCubeArray)
{
desc->setArrayLength(effectiveBaseDepth / 6);
}
else if (textureType == MTL::TextureType2DArray)
{
desc->setArrayLength(effectiveBaseDepth);
}
auto pixelFormat = GetMtlPixelFormat(format, isDepth);
desc->setPixelFormat(pixelFormat);
MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView;
if (FormatIsRenderable(format))
usage |= MTL::TextureUsageRenderTarget;
desc->setUsage(usage);
m_texture = mtlRenderer->GetDevice()->newTexture(desc);
}
LatteTextureMtl::~LatteTextureMtl()
{
m_texture->release();
}
LatteTextureView* LatteTextureMtl::CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount)
{
cemu_assert_debug(mipCount > 0);
cemu_assert_debug(sliceCount > 0);
cemu_assert_debug((firstMip + mipCount) <= this->mipLevels);
cemu_assert_debug((firstSlice + sliceCount) <= this->depth);
return new LatteTextureViewMtl(m_mtlr, this, dim, format, firstMip, mipCount, firstSlice, sliceCount);
}
// TODO: lazy allocation?
void LatteTextureMtl::AllocateOnHost()
{
// The texture is already allocated
}

View file

@ -0,0 +1,29 @@
#pragma once
#include <Metal/Metal.hpp>
#include "Cafe/HW/Latte/Core/LatteTexture.h"
#include "HW/Latte/ISA/LatteReg.h"
#include "util/ChunkedHeap/ChunkedHeap.h"
class LatteTextureMtl : public LatteTexture
{
public:
LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels,
uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth);
~LatteTextureMtl();
MTL::Texture* GetTexture() const {
return m_texture;
}
void AllocateOnHost() override;
protected:
LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override;
private:
class MetalRenderer* m_mtlr;
MTL::Texture* m_texture;
};

View file

@ -0,0 +1,52 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
LatteTextureReadbackInfoMtl::~LatteTextureReadbackInfoMtl()
{
if (m_commandBuffer)
m_commandBuffer->release();
}
void LatteTextureReadbackInfoMtl::StartTransfer()
{
cemu_assert(m_textureView);
auto* baseTexture = (LatteTextureMtl*)m_textureView->baseTexture;
cemu_assert_debug(m_textureView->firstSlice == 0);
cemu_assert_debug(m_textureView->firstMip == 0);
cemu_assert_debug(m_textureView->baseTexture->dim != Latte::E_DIM::DIM_3D);
size_t bytesPerRow = GetMtlTextureBytesPerRow(baseTexture->format, baseTexture->isDepth, baseTexture->width);
size_t bytesPerImage = GetMtlTextureBytesPerImage(baseTexture->format, baseTexture->isDepth, baseTexture->height, bytesPerRow);
auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
// TODO: uncomment?
//m_mtlr->RequestSoonCommit();
m_mtlr->CommitCommandBuffer();
}
bool LatteTextureReadbackInfoMtl::IsFinished()
{
// Command buffer wasn't even comitted, let's commit immediately
//if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
// m_mtlr->CommitCommandBuffer();
return CommandBufferCompleted(m_commandBuffer);
}
void LatteTextureReadbackInfoMtl::ForceFinish()
{
m_commandBuffer->waitUntilCompleted();
}
uint8* LatteTextureReadbackInfoMtl::GetData()
{
return (uint8*)m_mtlr->GetTextureReadbackBuffer()->contents() + m_bufferOffset;
}

View file

@ -0,0 +1,25 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Core/LatteTextureReadbackInfo.h"
class LatteTextureReadbackInfoMtl : public LatteTextureReadbackInfo
{
public:
LatteTextureReadbackInfoMtl(class MetalRenderer* mtlRenderer, LatteTextureView* textureView, uint32 bufferOffset) : LatteTextureReadbackInfo(textureView), m_mtlr{mtlRenderer}, m_bufferOffset{bufferOffset} {}
~LatteTextureReadbackInfoMtl();
void StartTransfer() override;
bool IsFinished() override;
void ForceFinish() override;
uint8* GetData() override;
private:
class MetalRenderer* m_mtlr;
MTL::CommandBuffer* m_commandBuffer = nullptr;
uint32 m_bufferOffset = 0;
};

View file

@ -0,0 +1,191 @@
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLTexture.hpp"
uint32 LatteTextureMtl_AdjustTextureCompSel(Latte::E_GX2SURFFMT format, uint32 compSel)
{
switch (format)
{
case Latte::E_GX2SURFFMT::R8_UNORM: // R8 is replicated on all channels (while OpenGL would return 1.0 for BGA instead)
case Latte::E_GX2SURFFMT::R8_SNORM: // probably the same as _UNORM, but needs testing
if (compSel >= 1 && compSel <= 3)
compSel = 0;
break;
case Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM: // order of components is reversed (RGBA -> ABGR)
if (compSel >= 0 && compSel <= 3)
compSel = 3 - compSel;
break;
case Latte::E_GX2SURFFMT::BC4_UNORM:
case Latte::E_GX2SURFFMT::BC4_SNORM:
if (compSel >= 1 && compSel <= 3)
compSel = 0;
break;
case Latte::E_GX2SURFFMT::BC5_UNORM:
case Latte::E_GX2SURFFMT::BC5_SNORM:
// RG maps to RG
// B maps to ?
// A maps to G (guessed)
if (compSel == 3)
compSel = 1; // read Alpha as Green
break;
case Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM:
// reverse components (Wii U: ABGR, OpenGL: RGBA)
// used in Resident Evil Revelations
if (compSel >= 0 && compSel <= 3)
compSel = 3 - compSel;
break;
case Latte::E_GX2SURFFMT::X24_G8_UINT:
// map everything to alpha?
if (compSel >= 0 && compSel <= 3)
compSel = 3;
break;
case Latte::E_GX2SURFFMT::R4_G4_UNORM:
// red and green swapped
if (compSel == 0)
compSel = 1;
else if (compSel == 1)
compSel = 0;
break;
default:
break;
}
return compSel;
}
LatteTextureViewMtl::LatteTextureViewMtl(MetalRenderer* mtlRenderer, LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount)
: LatteTextureView(texture, firstMip, mipCount, firstSlice, sliceCount, dim, format), m_mtlr(mtlRenderer), m_baseTexture(texture)
{
m_rgbaView = CreateSwizzledView(RGBA_SWIZZLE);
}
LatteTextureViewMtl::~LatteTextureViewMtl()
{
m_rgbaView->release();
for (sint32 i = 0; i < std::size(m_viewCache); i++)
{
if (m_viewCache[i].key != INVALID_SWIZZLE)
m_viewCache[i].texture->release();
}
for (auto& [key, texture] : m_fallbackViewCache)
{
texture->release();
}
}
MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle)
{
// Mask out
gpuSamplerSwizzle &= 0x0FFF0000;
// RGBA swizzle == no swizzle
if (gpuSamplerSwizzle == RGBA_SWIZZLE)
{
return m_rgbaView;
}
// First, try to find a view in the cache
// Fast cache
sint32 freeIndex = -1;
for (sint32 i = 0; i < std::size(m_viewCache); i++)
{
const auto& entry = m_viewCache[i];
if (entry.key == gpuSamplerSwizzle)
{
return entry.texture;
}
else if (entry.key == INVALID_SWIZZLE && freeIndex == -1)
{
freeIndex = i;
}
}
// Fallback cache
auto& fallbackEntry = m_fallbackViewCache[gpuSamplerSwizzle];
if (fallbackEntry)
{
return fallbackEntry;
}
MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle);
if (freeIndex != -1)
m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture};
else
fallbackEntry = texture;
return texture;
}
MTL::Texture* LatteTextureViewMtl::CreateSwizzledView(uint32 gpuSamplerSwizzle)
{
uint32 compSelR = (gpuSamplerSwizzle >> 16) & 0x7;
uint32 compSelG = (gpuSamplerSwizzle >> 19) & 0x7;
uint32 compSelB = (gpuSamplerSwizzle >> 22) & 0x7;
uint32 compSelA = (gpuSamplerSwizzle >> 25) & 0x7;
compSelR = LatteTextureMtl_AdjustTextureCompSel(format, compSelR);
compSelG = LatteTextureMtl_AdjustTextureCompSel(format, compSelG);
compSelB = LatteTextureMtl_AdjustTextureCompSel(format, compSelB);
compSelA = LatteTextureMtl_AdjustTextureCompSel(format, compSelA);
MTL::TextureType textureType;
switch (dim)
{
case Latte::E_DIM::DIM_1D:
textureType = MTL::TextureType1D;
break;
case Latte::E_DIM::DIM_2D:
case Latte::E_DIM::DIM_2D_MSAA:
textureType = MTL::TextureType2D;
break;
case Latte::E_DIM::DIM_2D_ARRAY:
textureType = MTL::TextureType2DArray;
break;
case Latte::E_DIM::DIM_3D:
textureType = MTL::TextureType3D;
break;
case Latte::E_DIM::DIM_CUBEMAP:
cemu_assert_debug(this->numSlice % 6 == 0 && "cubemaps must have an array length multiple of 6");
textureType = MTL::TextureTypeCubeArray;
break;
default:
cemu_assert_unimplemented();
textureType = MTL::TextureType2D;
break;
}
uint32 baseLevel = firstMip;
uint32 levelCount = this->numMip;
uint32 baseLayer = 0;
uint32 layerCount = 1;
// TODO: check if base texture is 3D texture as well
if (textureType == MTL::TextureType3D)
{
cemu_assert_debug(firstMip == 0);
cemu_assert_debug(this->numSlice == baseTexture->depth);
}
else
{
baseLayer = firstSlice;
if (textureType == MTL::TextureTypeCubeArray || textureType == MTL::TextureType2DArray)
layerCount = this->numSlice;
}
MTL::TextureSwizzleChannels swizzle;
swizzle.red = GetMtlTextureSwizzle(compSelR);
swizzle.green = GetMtlTextureSwizzle(compSelG);
swizzle.blue = GetMtlTextureSwizzle(compSelB);
swizzle.alpha = GetMtlTextureSwizzle(compSelA);
// Clamp mip levels
levelCount = std::min(levelCount, m_baseTexture->maxPossibleMipLevels - baseLevel);
levelCount = std::max(levelCount, (uint32)1);
auto pixelFormat = GetMtlPixelFormat(format, m_baseTexture->isDepth);
MTL::Texture* texture = m_baseTexture->GetTexture()->newTextureView(pixelFormat, textureType, NS::Range::Make(baseLevel, levelCount), NS::Range::Make(baseLayer, layerCount), swizzle);
return texture;
}

View file

@ -0,0 +1,37 @@
#pragma once
#include <Metal/Metal.hpp>
#include <unordered_map>
#include "Cafe/HW/Latte/Core/LatteTexture.h"
#define RGBA_SWIZZLE 0x06880000
#define INVALID_SWIZZLE 0xFFFFFFFF
class LatteTextureViewMtl : public LatteTextureView
{
public:
LatteTextureViewMtl(class MetalRenderer* mtlRenderer, class LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount);
~LatteTextureViewMtl();
MTL::Texture* GetSwizzledView(uint32 gpuSamplerSwizzle);
MTL::Texture* GetRGBAView()
{
return GetSwizzledView(RGBA_SWIZZLE);
}
private:
class MetalRenderer* m_mtlr;
class LatteTextureMtl* m_baseTexture;
MTL::Texture* m_rgbaView;
struct {
uint32 key;
MTL::Texture* texture;
} m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}};
std::unordered_map<uint32, MTL::Texture*> m_fallbackViewCache;
MTL::Texture* CreateSwizzledView(uint32 gpuSamplerSwizzle);
};

View file

@ -0,0 +1,511 @@
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cemu/Logging/CemuLogging.h"
#include "HW/Latte/Core/LatteTextureLoader.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::INVALID_FORMAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}},
{Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, MetalDataType::UINT, 1}},
{Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, MetalDataType::INT, 1}},
{Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: sRGB?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, MetalDataType::UINT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, MetalDataType::INT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, MetalDataType::FLOAT, 16}},
{Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
};
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::INVALID_FORMAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}},
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2, {1, 1}}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4, {1, 1}}},
};
// TODO: R10_G10_B10_A2_UINT and R10_G10_B10_A2_SINT
// TODO: A2_B10_G10_R10_UNORM and A2_B10_G10_R10_UINT
void CheckForPixelFormatSupport(const MetalPixelFormatSupport& support)
{
// Texture decoders
// Color
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT].textureDecoder = TextureDecoder_R32_G32_B32_A32_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT].textureDecoder = TextureDecoder_R32_G32_B32_A32_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT].textureDecoder = TextureDecoder_R16_G16_B16_A16_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT].textureDecoder = TextureDecoder_R16_G16_B16_A16_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM].textureDecoder = TextureDecoder_R16_G16_B16_A16::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM].textureDecoder = TextureDecoder_R16_G16_B16_A16::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT].textureDecoder = TextureDecoder_R8_G8_B8_A8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_FLOAT].textureDecoder = TextureDecoder_R32_G32_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_G32_UINT].textureDecoder = TextureDecoder_R32_G32_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_UNORM].textureDecoder = TextureDecoder_R16_G16::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_G16_FLOAT].textureDecoder = TextureDecoder_R16_G16_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_UNORM].textureDecoder = TextureDecoder_R8_G8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_G8_SNORM].textureDecoder = TextureDecoder_R8_G8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].textureDecoder = TextureDecoder_R4_G4_UNORM_To_ABGR4::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_FLOAT].textureDecoder = TextureDecoder_R32_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R32_UINT].textureDecoder = TextureDecoder_R32_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_FLOAT].textureDecoder = TextureDecoder_R16_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_UNORM].textureDecoder = TextureDecoder_R16_UNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_SNORM].textureDecoder = TextureDecoder_R16_SNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R16_UINT].textureDecoder = TextureDecoder_R16_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_UNORM].textureDecoder = TextureDecoder_R8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_SNORM].textureDecoder = TextureDecoder_R8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R8_UINT].textureDecoder = TextureDecoder_R8_UINT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].textureDecoder = TextureDecoder_R5_G6_B5_swappedRB::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].textureDecoder = TextureDecoder_R5_G5_B5_A1_UNORM_swappedRB::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].textureDecoder = TextureDecoder_A1_B5_G5_R5_UNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT].textureDecoder = TextureDecoder_R11_G11_B10_FLOAT::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].textureDecoder = TextureDecoder_R4_G4_B4_A4_UNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM].textureDecoder = TextureDecoder_R10_G10_B10_A2_UNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM].textureDecoder = TextureDecoder_R10_G10_B10_A2_SNORM_To_RGBA16::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB].textureDecoder = TextureDecoder_R10_G10_B10_A2_UNORM::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC1_SRGB].textureDecoder = TextureDecoder_BC1::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC1_UNORM].textureDecoder = TextureDecoder_BC1::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC2_UNORM].textureDecoder = TextureDecoder_BC2::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC2_SRGB].textureDecoder = TextureDecoder_BC2::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC3_UNORM].textureDecoder = TextureDecoder_BC3::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC3_SRGB].textureDecoder = TextureDecoder_BC3::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC4_UNORM].textureDecoder = TextureDecoder_BC4::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC4_SNORM].textureDecoder = TextureDecoder_BC4::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC5_UNORM].textureDecoder = TextureDecoder_BC5::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::BC5_SNORM].textureDecoder = TextureDecoder_BC5::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R24_X8_UNORM].textureDecoder = TextureDecoder_R24_X8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::X24_G8_UINT].textureDecoder = TextureDecoder_X24_G8_UINT::getInstance();
if (!support.m_supportsPacked16BitFormats)
{
// B5G6R5Unorm
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].bytesPerBlock = 4;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G6_B5_UNORM].textureDecoder = TextureDecoder_R5G6B5_UNORM_To_RGBA8::getInstance();
// A1BGR5Unorm
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM].textureDecoder = TextureDecoder_A1_B5_G5_R5_UNORM_vulkan_To_RGBA8::getInstance();
// ABGR4Unorm
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].pixelFormat = MTL::PixelFormatRG8Unorm;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].bytesPerBlock = 2;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_UNORM].textureDecoder = TextureDecoder_R4G4_UNORM_To_RG8::getInstance();
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].bytesPerBlock = 4;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM].textureDecoder = TextureDecoder_R4G4B4A4_UNORM_To_RGBA8::getInstance();
// BGR5A1Unorm
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].pixelFormat = MTL::PixelFormatRGBA8Unorm;
MTL_COLOR_FORMAT_TABLE[Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM].textureDecoder = TextureDecoder_R5_G5_B5_A1_UNORM_swappedRB_To_RGBA8::getInstance();
}
// Depth
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].textureDecoder = TextureDecoder_D24_S8::getInstance();
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_FLOAT].textureDecoder = TextureDecoder_NullData64::getInstance(); // TODO: why?
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D32_FLOAT].textureDecoder = TextureDecoder_R32_FLOAT::getInstance();
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D16_UNORM].textureDecoder = TextureDecoder_R16_UNORM::getInstance();
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D32_S8_FLOAT].textureDecoder = TextureDecoder_D32_S8_UINT_X24::getInstance();
if (!support.m_supportsDepth24Unorm_Stencil8)
{
// Depth24Unorm_Stencil8
MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
// TODO: implement the decoder
//MTL_DEPTH_FORMAT_TABLE[Latte::E_GX2SURFFMT::D24_S8_UNORM].textureDecoder = TextureDecoder_D24_S8_To_D32_S8::getInstance();
}
}
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
{
if (isDepth)
{
auto it = MTL_DEPTH_FORMAT_TABLE.find(format);
if (it == MTL_DEPTH_FORMAT_TABLE.end())
return {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}; // Fallback
else
return it->second;
}
else
{
auto it = MTL_COLOR_FORMAT_TABLE.find(format);
if (it == MTL_COLOR_FORMAT_TABLE.end())
return {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}; // Fallback
else
return it->second;
}
}
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth)
{
auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat;
if (pixelFormat == MTL::PixelFormatInvalid)
cemuLog_log(LogType::Force, "invalid pixel format 0x{:x}, is depth: {}\n", format, isDepth);
return pixelFormat;
}
inline uint32 CeilDivide(uint32 a, uint32 b) {
return (a + b - 1) / b;
}
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width)
{
const auto& formatInfo = GetMtlPixelFormatInfo(format, isDepth);
return CeilDivide(width, formatInfo.blockTexelSize.x) * formatInfo.bytesPerBlock;
}
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow)
{
const auto& formatInfo = GetMtlPixelFormatInfo(format, isDepth);
return CeilDivide(height, formatInfo.blockTexelSize.y) * bytesPerRow;
}
MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode)
{
switch (primitiveMode)
{
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS:
return MTL::PrimitiveTypePoint;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINES:
return MTL::PrimitiveTypeLine;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP:
return MTL::PrimitiveTypeLineStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_LOOP:
return MTL::PrimitiveTypeLineStrip; // line loops are emulated as line strips with an extra connecting strip at the end
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP_ADJACENT: // Tropical Freeze level 3-6
cemuLog_logOnce(LogType::Force, "Metal doesn't support line strip adjacent primitive, using line strip instead");
return MTL::PrimitiveTypeLineStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES:
return MTL::PrimitiveTypeTriangle;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN:
return MTL::PrimitiveTypeTriangleStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP:
return MTL::PrimitiveTypeTriangleStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUADS:
return MTL::PrimitiveTypeTriangle; // quads are emulated as 2 triangles
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUAD_STRIP:
return MTL::PrimitiveTypeTriangle; // quad strips are emulated as (count-2)/2 triangles
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS:
return MTL::PrimitiveTypeTriangle; // rects are emulated as 2 triangles
default:
cemuLog_log(LogType::Force, "Unsupported primitive mode {}", primitiveMode);
cemu_assert_debug(false);
return MTL::PrimitiveTypeTriangle;
}
}
MTL::VertexFormat GetMtlVertexFormat(uint8 format)
{
switch (format)
{
case FMT_32_32_32_32_FLOAT:
return MTL::VertexFormatUInt4;
case FMT_32_32_32_FLOAT:
return MTL::VertexFormatUInt3;
case FMT_32_32_FLOAT:
return MTL::VertexFormatUInt2;
case FMT_32_FLOAT:
return MTL::VertexFormatUInt;
case FMT_8_8_8_8:
return MTL::VertexFormatUChar4;
case FMT_8_8_8:
return MTL::VertexFormatUChar3;
case FMT_8_8:
return MTL::VertexFormatUChar2;
case FMT_8:
return MTL::VertexFormatUChar;
case FMT_32_32_32_32:
return MTL::VertexFormatUInt4;
case FMT_32_32_32:
return MTL::VertexFormatUInt3;
case FMT_32_32:
return MTL::VertexFormatUInt2;
case FMT_32:
return MTL::VertexFormatUInt;
case FMT_16_16_16_16:
return MTL::VertexFormatUShort4; // verified to match OpenGL
case FMT_16_16_16:
return MTL::VertexFormatUShort3;
case FMT_16_16:
return MTL::VertexFormatUShort2;
case FMT_16:
return MTL::VertexFormatUShort;
case FMT_16_16_16_16_FLOAT:
return MTL::VertexFormatUShort4; // verified to match OpenGL
case FMT_16_16_16_FLOAT:
return MTL::VertexFormatUShort3;
case FMT_16_16_FLOAT:
return MTL::VertexFormatUShort2;
case FMT_16_FLOAT:
return MTL::VertexFormatUShort;
case FMT_2_10_10_10:
return MTL::VertexFormatUInt; // verified to match OpenGL
default:
cemuLog_log(LogType::Force, "unsupported vertex format {}", (uint32)format);
assert_dbg();
return MTL::VertexFormatInvalid;
}
}
uint32 GetMtlVertexFormatSize(uint8 format)
{
switch (format)
{
case FMT_32_32_32_32_FLOAT:
return 16;
case FMT_32_32_32_FLOAT:
return 12;
case FMT_32_32_FLOAT:
return 8;
case FMT_32_FLOAT:
return 4;
case FMT_8_8_8_8:
return 4;
case FMT_8_8_8:
return 3;
case FMT_8_8:
return 2;
case FMT_8:
return 1;
case FMT_32_32_32_32:
return 16;
case FMT_32_32_32:
return 12;
case FMT_32_32:
return 8;
case FMT_32:
return 4;
case FMT_16_16_16_16:
return 8;
case FMT_16_16_16:
return 6;
case FMT_16_16:
return 4;
case FMT_16:
return 2;
case FMT_16_16_16_16_FLOAT:
return 8;
case FMT_16_16_16_FLOAT:
return 6;
case FMT_16_16_FLOAT:
return 4;
case FMT_16_FLOAT:
return 2;
case FMT_2_10_10_10:
return 4;
default:
return 0;
}
}
MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType)
{
switch (indexType)
{
case Renderer::INDEX_TYPE::U16:
return MTL::IndexTypeUInt16;
case Renderer::INDEX_TYPE::U32:
return MTL::IndexTypeUInt32;
default:
cemu_assert_suspicious();
return MTL::IndexTypeUInt32;
}
}
MTL::BlendOperation GetMtlBlendOp(Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC combineFunc)
{
switch (combineFunc)
{
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_PLUS_SRC:
return MTL::BlendOperationAdd;
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::SRC_MINUS_DST:
return MTL::BlendOperationSubtract;
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MIN_DST_SRC:
return MTL::BlendOperationMin;
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MAX_DST_SRC:
return MTL::BlendOperationMax;
case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_MINUS_SRC:
return MTL::BlendOperationReverseSubtract;
default:
cemu_assert_suspicious();
return MTL::BlendOperationAdd;
}
}
const MTL::BlendFactor MTL_BLEND_FACTORS[] =
{
/* 0x00 */ MTL::BlendFactorZero,
/* 0x01 */ MTL::BlendFactorOne,
/* 0x02 */ MTL::BlendFactorSourceColor,
/* 0x03 */ MTL::BlendFactorOneMinusSourceColor,
/* 0x04 */ MTL::BlendFactorSourceAlpha,
/* 0x05 */ MTL::BlendFactorOneMinusSourceAlpha,
/* 0x06 */ MTL::BlendFactorDestinationAlpha,
/* 0x07 */ MTL::BlendFactorOneMinusDestinationAlpha,
/* 0x08 */ MTL::BlendFactorDestinationColor,
/* 0x09 */ MTL::BlendFactorOneMinusDestinationColor,
/* 0x0A */ MTL::BlendFactorSourceAlphaSaturated,
/* 0x0B */ MTL::BlendFactorZero, // TODO
/* 0x0C */ MTL::BlendFactorZero, // TODO
/* 0x0D */ MTL::BlendFactorBlendColor,
/* 0x0E */ MTL::BlendFactorOneMinusBlendColor,
/* 0x0F */ MTL::BlendFactorSource1Color,
/* 0x10 */ MTL::BlendFactorOneMinusSource1Color,
/* 0x11 */ MTL::BlendFactorSource1Alpha,
/* 0x12 */ MTL::BlendFactorOneMinusSource1Alpha,
/* 0x13 */ MTL::BlendFactorBlendAlpha,
/* 0x14 */ MTL::BlendFactorOneMinusBlendAlpha
};
MTL::BlendFactor GetMtlBlendFactor(Latte::LATTE_CB_BLENDN_CONTROL::E_BLENDFACTOR factor)
{
cemu_assert_debug((uint32)factor < std::size(MTL_BLEND_FACTORS));
return MTL_BLEND_FACTORS[(uint32)factor];
}
const MTL::CompareFunction MTL_COMPARE_FUNCTIONS[8] =
{
MTL::CompareFunctionNever,
MTL::CompareFunctionLess,
MTL::CompareFunctionEqual,
MTL::CompareFunctionLessEqual,
MTL::CompareFunctionGreater,
MTL::CompareFunctionNotEqual,
MTL::CompareFunctionGreaterEqual,
MTL::CompareFunctionAlways
};
MTL::CompareFunction GetMtlCompareFunc(Latte::E_COMPAREFUNC func)
{
cemu_assert_debug((uint32)func < std::size(MTL_COMPARE_FUNCTIONS));
return MTL_COMPARE_FUNCTIONS[(uint32)func];
}
// TODO: clamp to border color? (should be fine though)
const MTL::SamplerAddressMode MTL_SAMPLER_ADDRESS_MODES[] = {
MTL::SamplerAddressModeRepeat, // WRAP
MTL::SamplerAddressModeMirrorRepeat, // MIRROR
MTL::SamplerAddressModeClampToEdge, // CLAMP_LAST_TEXEL
MTL::SamplerAddressModeMirrorClampToEdge, // MIRROR_ONCE_LAST_TEXEL
MTL::SamplerAddressModeClampToEdge, // unsupported HALF_BORDER
MTL::SamplerAddressModeClampToBorderColor, // unsupported MIRROR_ONCE_HALF_BORDER
MTL::SamplerAddressModeClampToBorderColor, // CLAMP_BORDER
MTL::SamplerAddressModeClampToBorderColor // MIRROR_ONCE_BORDER
};
MTL::SamplerAddressMode GetMtlSamplerAddressMode(Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_CLAMP clamp)
{
cemu_assert_debug((uint32)clamp < std::size(MTL_SAMPLER_ADDRESS_MODES));
return MTL_SAMPLER_ADDRESS_MODES[(uint32)clamp];
}
const MTL::TextureSwizzle MTL_TEXTURE_SWIZZLES[] = {
MTL::TextureSwizzleRed,
MTL::TextureSwizzleGreen,
MTL::TextureSwizzleBlue,
MTL::TextureSwizzleAlpha,
MTL::TextureSwizzleZero,
MTL::TextureSwizzleOne,
MTL::TextureSwizzleZero,
MTL::TextureSwizzleZero
};
MTL::TextureSwizzle GetMtlTextureSwizzle(uint32 swizzle)
{
cemu_assert_debug(swizzle < std::size(MTL_TEXTURE_SWIZZLES));
return MTL_TEXTURE_SWIZZLES[swizzle];
}
const MTL::StencilOperation MTL_STENCIL_OPERATIONS[8] = {
MTL::StencilOperationKeep,
MTL::StencilOperationZero,
MTL::StencilOperationReplace,
MTL::StencilOperationIncrementClamp,
MTL::StencilOperationDecrementClamp,
MTL::StencilOperationInvert,
MTL::StencilOperationIncrementWrap,
MTL::StencilOperationDecrementWrap
};
MTL::StencilOperation GetMtlStencilOp(Latte::LATTE_DB_DEPTH_CONTROL::E_STENCILACTION action)
{
cemu_assert_debug((uint32)action < std::size(MTL_STENCIL_OPERATIONS));
return MTL_STENCIL_OPERATIONS[(uint32)action];
}
MTL::ColorWriteMask GetMtlColorWriteMask(uint8 mask)
{
MTL::ColorWriteMask mtlMask = MTL::ColorWriteMaskNone;
if (mask & 0x1) mtlMask |= MTL::ColorWriteMaskRed;
if (mask & 0x2) mtlMask |= MTL::ColorWriteMaskGreen;
if (mask & 0x4) mtlMask |= MTL::ColorWriteMaskBlue;
if (mask & 0x8) mtlMask |= MTL::ColorWriteMaskAlpha;
return mtlMask;
}

View file

@ -0,0 +1,86 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
//#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Common/precompiled.h"
#include "HW/Latte/Core/LatteTextureLoader.h"
struct Uvec2 {
uint32 x;
uint32 y;
};
enum class MetalDataType
{
NONE,
INT,
UINT,
FLOAT,
};
struct MetalPixelFormatInfo {
MTL::PixelFormat pixelFormat;
MetalDataType dataType;
size_t bytesPerBlock;
Uvec2 blockTexelSize = {1, 1};
bool hasStencil = false;
TextureDecoder* textureDecoder = nullptr;
};
void CheckForPixelFormatSupport(const MetalPixelFormatSupport& support);
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth);
inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr)
{
auto format = LatteMRT::GetColorBufferFormat(index, lcr);
return GetMtlPixelFormatInfo(format, false).dataType;
}
inline const char* GetDataTypeStr(MetalDataType dataType)
{
switch (dataType)
{
case MetalDataType::INT:
return "int4";
case MetalDataType::UINT:
return "uint4";
case MetalDataType::FLOAT:
return "float4";
default:
cemu_assert_suspicious();
return "INVALID";
}
}
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow);
MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode);
MTL::VertexFormat GetMtlVertexFormat(uint8 format);
uint32 GetMtlVertexFormatSize(uint8 format);
MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType);
MTL::BlendOperation GetMtlBlendOp(Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC combineFunc);
MTL::BlendFactor GetMtlBlendFactor(Latte::LATTE_CB_BLENDN_CONTROL::E_BLENDFACTOR factor);
MTL::CompareFunction GetMtlCompareFunc(Latte::E_COMPAREFUNC func);
MTL::SamplerAddressMode GetMtlSamplerAddressMode(Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_CLAMP clamp);
MTL::TextureSwizzle GetMtlTextureSwizzle(uint32 swizzle);
MTL::StencilOperation GetMtlStencilOp(Latte::LATTE_DB_DEPTH_CONTROL::E_STENCILACTION action);
MTL::ColorWriteMask GetMtlColorWriteMask(uint8 mask);

View file

@ -0,0 +1,48 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
MetalAttachmentsInfo::MetalAttachmentsInfo(class CachedFBOMtl* fbo)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
const auto& colorBuffer = fbo->colorBuffer[i];
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
if (!texture)
continue;
colorFormats[i] = texture->format;
}
// Depth stencil attachment
if (fbo->depthBuffer.texture)
{
auto texture = static_cast<LatteTextureViewMtl*>(fbo->depthBuffer.texture);
depthFormat = texture->format;
hasStencil = fbo->depthBuffer.hasStencil;
}
}
MetalAttachmentsInfo::MetalAttachmentsInfo(const LatteContextRegister& lcr, const LatteDecompilerShader* pixelShader)
{
uint8 cbMask = LatteMRT::GetActiveColorBufferMask(pixelShader, lcr);
bool dbMask = LatteMRT::GetActiveDepthBufferMask(lcr);
// Color attachments
for (int i = 0; i < 8; ++i)
{
if ((cbMask & (1 << i)) == 0)
continue;
colorFormats[i] = LatteMRT::GetColorBufferFormat(i, lcr);
}
// Depth stencil attachment
if (dbMask)
{
Latte::E_GX2SURFFMT format = LatteMRT::GetDepthBufferFormat(lcr);
depthFormat = format;
hasStencil = GetMtlPixelFormatInfo(format, true).hasStencil;
}
}

View file

@ -0,0 +1,15 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
class MetalAttachmentsInfo
{
public:
MetalAttachmentsInfo() = default;
MetalAttachmentsInfo(class CachedFBOMtl* fbo);
MetalAttachmentsInfo(const LatteContextRegister& lcr, const class LatteDecompilerShader* pixelShader);
Latte::E_GX2SURFFMT colorFormats[LATTE_NUM_COLOR_TARGET] = {Latte::E_GX2SURFFMT::INVALID_FORMAT};
Latte::E_GX2SURFFMT depthFormat = Latte::E_GX2SURFFMT::INVALID_FORMAT;
bool hasStencil = false;
};

View file

@ -0,0 +1,217 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
MetalBufferChunkedHeap::~MetalBufferChunkedHeap()
{
for (auto& chunk : m_chunkBuffers)
chunk->release();
}
uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize)
{
size_t allocationSize = std::max<size_t>(m_minimumBufferAllocationSize, minimumAllocationSize);
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
cemu_assert_debug(buffer);
cemu_assert_debug(m_chunkBuffers.size() == chunkIndex);
m_chunkBuffers.emplace_back(buffer);
return allocationSize;
}
void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset)
{
auto commandBuffer = m_mtlr->GetCurrentCommandBuffer();
if (commandBuffer == buffer.lastSyncpointCommandBuffer)
return;
buffer.lastSyncpointCommandBuffer = commandBuffer;
buffer.queue_syncPoints.emplace(commandBuffer, offset);
}
void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc)
{
// calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc
uint32 bufferAllocSize = m_minimumBufferAllocSize;
while (bufferAllocSize < sizeRequiredForAlloc)
bufferAllocSize += m_minimumBufferAllocSize;
AllocatorBuffer_t newBuffer{};
newBuffer.writeIndex = 0;
newBuffer.basePtr = nullptr;
newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options);
newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents();
newBuffer.size = bufferAllocSize;
newBuffer.index = (uint32)m_buffers.size();
m_buffers.push_back(newBuffer);
}
MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
{
if (alignment < 128)
alignment = 128;
size = (size + 127) & ~127;
for (auto& itr : m_buffers)
{
// align pointer
uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment;
uint32 distanceToSyncPoint;
if (!itr.queue_syncPoints.empty())
{
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
distanceToSyncPoint = 0xFFFFFFFF;
else
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
}
else
distanceToSyncPoint = 0xFFFFFFFF;
uint32 spaceNeeded = alignmentPadding + size;
if (spaceNeeded > distanceToSyncPoint)
continue; // not enough space in current buffer
if ((itr.writeIndex + spaceNeeded) > itr.size)
{
// wrap-around
spaceNeeded = size;
alignmentPadding = 0;
// check if there is enough space in current buffer after wrap-around
if (!itr.queue_syncPoints.empty())
{
distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0;
if (spaceNeeded > distanceToSyncPoint)
continue;
}
else if (spaceNeeded > itr.size)
continue;
itr.writeIndex = 0;
}
addUploadBufferSyncPoint(itr, itr.writeIndex);
itr.writeIndex += alignmentPadding;
uint32 offset = itr.writeIndex;
itr.writeIndex += size;
itr.cleanupCounter = 0;
MetalSynchronizedRingAllocator::AllocatorReservation_t res;
res.mtlBuffer = itr.mtlBuffer;
res.memPtr = itr.basePtr + offset;
res.bufferOffset = offset;
res.size = size;
res.bufferIndex = itr.index;
return res;
}
// allocate new buffer
allocateAdditionalUploadBuffer(size);
return AllocateBufferMemory(size, alignment);
}
void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation)
{
if (RequiresFlush())
{
uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size));
}
}
void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
{
for (auto& itr : m_buffers)
{
while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer)
{
itr.queue_syncPoints.pop();
}
if (itr.queue_syncPoints.empty())
itr.cleanupCounter++;
}
// check if last buffer is available for deletion
if (m_buffers.size() >= 2)
{
auto& lastBuffer = m_buffers.back();
if (lastBuffer.cleanupCounter >= 1000)
{
// release buffer
lastBuffer.mtlBuffer->release();
m_buffers.pop_back();
}
}
}
MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const
{
return m_buffers[index].mtlBuffer;
}
void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
{
numBuffers = (uint32)m_buffers.size();
totalBufferSize = 0;
freeBufferSize = 0;
for (auto& itr : m_buffers)
{
totalBufferSize += itr.size;
// calculate free space in buffer
uint32 distanceToSyncPoint;
if (!itr.queue_syncPoints.empty())
{
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around
else
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
}
else
distanceToSyncPoint = itr.size;
freeBufferSize += distanceToSyncPoint;
}
}
/* MetalSynchronizedHeapAllocator */
MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
{
CHAddr addr = m_chunkedHeap.alloc(size, alignment);
m_activeAllocations.emplace_back(addr);
AllocatorReservation* res = m_poolAllocatorReservation.allocObj();
res->bufferIndex = addr.chunkIndex;
res->bufferOffset = addr.offset;
res->size = size;
res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex);
res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset;
return res;
}
void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation)
{
// put the allocation on a delayed release queue for the current command buffer
MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer();
auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; });
cemu_assert_debug(it != m_activeAllocations.end());
m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation);
m_activeAllocations.erase(it);
m_poolAllocatorReservation.freeObj(uploadReservation);
}
void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation)
{
if (m_chunkedHeap.RequiresFlush())
{
uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size));
}
}
void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
{
auto it = m_releaseQueue.find(latestFinishedCommandBuffer);
if (it == m_releaseQueue.end())
return;
// release allocations
for (auto& addr : it->second)
m_chunkedHeap.free(addr);
m_releaseQueue.erase(it);
}
void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
{
m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize);
}

View file

@ -0,0 +1,163 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLResource.hpp"
#include "util/ChunkedHeap/ChunkedHeap.h"
#include "util/helpers/MemoryPool.h"
#include <utility>
inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options)
{
if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged)
options |= MTL::ResourceCPUCacheModeWriteCombined;
return options;
}
class MetalBufferChunkedHeap : private ChunkedHeap<>
{
public:
MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { };
~MetalBufferChunkedHeap();
using ChunkedHeap::alloc;
using ChunkedHeap::free;
uint8* GetChunkPtr(uint32 index) const
{
if (index >= m_chunkBuffers.size())
return nullptr;
return (uint8*)m_chunkBuffers[index]->contents();
}
MTL::Buffer* GetBufferByIndex(uint32 index) const
{
cemu_assert_debug(index < m_chunkBuffers.size());
return m_chunkBuffers[index];
}
bool RequiresFlush() const
{
return m_options & MTL::ResourceStorageModeManaged;
}
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
{
numBuffers = m_chunkBuffers.size();
totalBufferSize = m_numHeapBytes;
freeBufferSize = m_numHeapBytes - m_numAllocatedBytes;
}
private:
uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override;
const class MetalRenderer* m_mtlr;
MTL::ResourceOptions m_options;
size_t m_minimumBufferAllocationSize;
std::vector<MTL::Buffer*> m_chunkBuffers;
};
// a circular ring-buffer which tracks and releases memory per command-buffer
class MetalSynchronizedRingAllocator
{
public:
MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy
struct BufferSyncPoint_t
{
// todo - modularize sync point
MTL::CommandBuffer* commandBuffer;
uint32 offset;
BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {};
};
struct AllocatorBuffer_t
{
MTL::Buffer* mtlBuffer;
uint8* basePtr;
uint32 size;
uint32 writeIndex;
std::queue<BufferSyncPoint_t> queue_syncPoints;
MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr };
uint32 index;
uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released
};
struct AllocatorReservation_t
{
MTL::Buffer* mtlBuffer;
uint8* memPtr;
uint32 bufferOffset;
uint32 size;
uint32 bufferIndex;
};
AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment);
void FlushReservation(AllocatorReservation_t& uploadReservation);
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
MTL::Buffer* GetBufferByIndex(uint32 index) const;
bool RequiresFlush() const
{
return m_options & MTL::ResourceStorageModeManaged;
}
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
private:
void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc);
void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset);
const class MetalRenderer* m_mtlr;
MTL::ResourceOptions m_options;
const uint32 m_minimumBufferAllocSize;
std::vector<AllocatorBuffer_t> m_buffers;
};
// heap style allocator with released memory being freed after the current command buffer finishes
class MetalSynchronizedHeapAllocator
{
struct TrackedAllocation
{
TrackedAllocation(CHAddr allocation) : allocation(allocation) {};
CHAddr allocation;
};
public:
MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {}
MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy
struct AllocatorReservation
{
MTL::Buffer* mtlBuffer;
uint8* memPtr;
uint32 bufferOffset;
uint32 size;
uint32 bufferIndex;
};
AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment);
void FreeReservation(AllocatorReservation* uploadReservation);
void FlushReservation(AllocatorReservation* uploadReservation);
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
private:
const class MetalRenderer* m_mtlr;
MetalBufferChunkedHeap m_chunkedHeap;
// allocations
std::vector<TrackedAllocation> m_activeAllocations;
MemoryPool<AllocatorReservation> m_poolAllocatorReservation{32};
// release queue
std::unordered_map<MTL::CommandBuffer*, std::vector<CHAddr>> m_releaseQueue;
};

View file

@ -0,0 +1,221 @@
#pragma once
#include <Foundation/Foundation.hpp>
#include <Metal/Metal.hpp>
#include "Cafe/HW/Latte/Core/LatteConst.h"
struct MetalPixelFormatSupport
{
bool m_supportsR8Unorm_sRGB;
bool m_supportsRG8Unorm_sRGB;
bool m_supportsPacked16BitFormats;
bool m_supportsDepth24Unorm_Stencil8;
MetalPixelFormatSupport() = default;
MetalPixelFormatSupport(MTL::Device* device)
{
m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported();
}
};
// TODO: don't define a new struct for this
struct MetalQueryRange
{
uint32 begin;
uint32 end;
};
#define MAX_MTL_BUFFERS 31
// Buffer indices 28-30 are reserved for the helper shaders
#define MTL_RESERVED_BUFFERS 3
#define MAX_MTL_VERTEX_BUFFERS (MAX_MTL_BUFFERS - MTL_RESERVED_BUFFERS)
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_VERTEX_BUFFERS - index - 1)
#define MAX_MTL_TEXTURES 31
#define MAX_MTL_SAMPLERS 16
#define GET_HELPER_BUFFER_BINDING(index) (28 + index)
#define GET_HELPER_TEXTURE_BINDING(index) (29 + index)
#define GET_HELPER_SAMPLER_BINDING(index) (14 + index)
constexpr uint32 INVALID_UINT32 = std::numeric_limits<uint32>::max();
constexpr size_t INVALID_OFFSET = std::numeric_limits<size_t>::max();
inline size_t Align(size_t size, size_t alignment)
{
return (size + alignment - 1) & ~(alignment - 1);
}
__attribute__((unused)) static inline void ETStackAutoRelease(void* object)
{
(*(NS::Object**)object)->release();
}
#define NS_STACK_SCOPED __attribute__((cleanup(ETStackAutoRelease))) __attribute__((unused))
// Cast from const char* to NS::String*
inline NS::String* ToNSString(const char* str)
{
return NS::String::string(str, NS::ASCIIStringEncoding);
}
// Cast from std::string to NS::String*
inline NS::String* ToNSString(const std::string& str)
{
return ToNSString(str.c_str());
}
// Cast from const char* to NS::URL*
inline NS::URL* ToNSURL(const char* str)
{
return NS::URL::fileURLWithPath(ToNSString(str));
}
// Cast from std::string to NS::URL*
inline NS::URL* ToNSURL(const std::string& str)
{
return ToNSURL(str.c_str());
}
inline NS::String* GetLabel(const std::string& label, const void* identifier)
{
return ToNSString(label + " (" + std::to_string(reinterpret_cast<uintptr_t>(identifier)) + ")");
}
constexpr MTL::RenderStages ALL_MTL_RENDER_STAGES = MTL::RenderStageVertex | MTL::RenderStageObject | MTL::RenderStageMesh | MTL::RenderStageFragment;
inline bool IsValidDepthTextureType(Latte::E_DIM dim)
{
return (dim == Latte::E_DIM::DIM_2D || dim == Latte::E_DIM::DIM_2D_MSAA || dim == Latte::E_DIM::DIM_2D_ARRAY || dim == Latte::E_DIM::DIM_2D_ARRAY_MSAA || dim == Latte::E_DIM::DIM_CUBEMAP);
}
inline bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer)
{
auto status = commandBuffer->status();
return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError);
}
inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format)
{
return !Latte::IsCompressedFormat(format);
}
template <typename... T>
inline bool executeCommand(fmt::format_string<T...> fmt, T&&... args) {
std::string command = fmt::format(fmt, std::forward<T>(args)...);
int res = system(command.c_str());
if (res != 0)
{
cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res);
return false;
}
return true;
}
/*
class MemoryMappedFile
{
public:
MemoryMappedFile(const std::string& filePath)
{
// Open the file
m_fd = open(filePath.c_str(), O_RDONLY);
if (m_fd == -1) {
cemuLog_log(LogType::Force, "failed to open file: {}", filePath);
return;
}
// Get the file size
// Use a loop to handle the case where the file size is 0 (more of a safety net)
struct stat fileStat;
while (true)
{
if (fstat(m_fd, &fileStat) == -1)
{
close(m_fd);
cemuLog_log(LogType::Force, "failed to get file size: {}", filePath);
return;
}
m_fileSize = fileStat.st_size;
if (m_fileSize == 0)
{
cemuLog_logOnce(LogType::Force, "file size is 0: {}", filePath);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
continue;
}
break;
}
// Memory map the file
m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0);
if (m_data == MAP_FAILED)
{
close(m_fd);
cemuLog_log(LogType::Force, "failed to memory map file: {}", filePath);
return;
}
}
~MemoryMappedFile()
{
if (m_data && m_data != MAP_FAILED)
munmap(m_data, m_fileSize);
if (m_fd != -1)
close(m_fd);
}
uint8* data() const { return static_cast<uint8*>(m_data); }
size_t size() const { return m_fileSize; }
private:
int m_fd = -1;
void* m_data = nullptr;
size_t m_fileSize = 0;
};
*/
inline uint32 GetVerticesPerPrimitive(LattePrimitiveMode primitiveMode)
{
switch (primitiveMode)
{
case LattePrimitiveMode::POINTS:
return 1;
case LattePrimitiveMode::LINES:
return 2;
case LattePrimitiveMode::LINE_STRIP:
// Same as line, but requires connection
return 2;
case LattePrimitiveMode::TRIANGLES:
return 3;
case LattePrimitiveMode::RECTS:
return 3;
default:
cemuLog_log(LogType::Force, "Unimplemented primitive type {}", primitiveMode);
return 0;
}
}
inline bool PrimitiveRequiresConnection(LattePrimitiveMode primitiveMode)
{
if (primitiveMode == LattePrimitiveMode::LINE_STRIP)
return true;
else
return false;
}
inline bool UseRectEmulation(const LatteContextRegister& lcr) {
const LattePrimitiveMode primitiveMode = lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
return (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
}
inline bool UseGeometryShader(const LatteContextRegister& lcr, bool hasGeometryShader) {
return hasGeometryShader || UseRectEmulation(lcr);
}

View file

@ -0,0 +1,6 @@
#define NS_PRIVATE_IMPLEMENTATION
#define CA_PRIVATE_IMPLEMENTATION
#define MTL_PRIVATE_IMPLEMENTATION
#include <Foundation/Foundation.hpp>
#include <QuartzCore/QuartzCore.hpp>
#include <Metal/Metal.hpp>

View file

@ -0,0 +1,119 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/ISA/RegDefines.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLDepthStencil.hpp"
MetalDepthStencilCache::~MetalDepthStencilCache()
{
for (auto& pair : m_depthStencilCache)
{
pair.second->release();
}
m_depthStencilCache.clear();
}
MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const LatteContextRegister& lcr)
{
uint64 stateHash = CalculateDepthStencilHash(lcr);
auto& depthStencilState = m_depthStencilCache[stateHash];
if (depthStencilState)
return depthStencilState;
// Depth stencil state
bool depthEnable = lcr.DB_DEPTH_CONTROL.get_Z_ENABLE();
auto depthFunc = lcr.DB_DEPTH_CONTROL.get_Z_FUNC();
bool depthWriteEnable = lcr.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE();
NS_STACK_SCOPED MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
if (depthEnable)
{
desc->setDepthWriteEnabled(depthWriteEnable);
desc->setDepthCompareFunction(GetMtlCompareFunc(depthFunc));
}
// Stencil state
bool stencilEnable = lcr.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
if (stencilEnable)
{
// get stencil control parameters
bool backStencilEnable = lcr.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
auto frontStencilFunc = lcr.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F();
auto frontStencilZPass = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F();
auto frontStencilZFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F();
auto frontStencilFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F();
auto backStencilFunc = lcr.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B();
auto backStencilZPass = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B();
auto backStencilZFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B();
auto backStencilFail = lcr.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B();
// get stencil control parameters
uint32 stencilCompareMaskFront = lcr.DB_STENCILREFMASK.get_STENCILMASK_F();
uint32 stencilWriteMaskFront = lcr.DB_STENCILREFMASK.get_STENCILWRITEMASK_F();
uint32 stencilCompareMaskBack = lcr.DB_STENCILREFMASK_BF.get_STENCILMASK_B();
uint32 stencilWriteMaskBack = lcr.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B();
NS_STACK_SCOPED MTL::StencilDescriptor* frontStencil = MTL::StencilDescriptor::alloc()->init();
frontStencil->setReadMask(stencilCompareMaskFront);
frontStencil->setWriteMask(stencilWriteMaskFront);
frontStencil->setStencilCompareFunction(GetMtlCompareFunc(frontStencilFunc));
frontStencil->setDepthFailureOperation(GetMtlStencilOp(frontStencilZFail));
frontStencil->setStencilFailureOperation(GetMtlStencilOp(frontStencilFail));
frontStencil->setDepthStencilPassOperation(GetMtlStencilOp(frontStencilZPass));
desc->setFrontFaceStencil(frontStencil);
NS_STACK_SCOPED MTL::StencilDescriptor* backStencil = MTL::StencilDescriptor::alloc()->init();
if (backStencilEnable)
{
backStencil->setReadMask(stencilCompareMaskBack);
backStencil->setWriteMask(stencilWriteMaskBack);
backStencil->setStencilCompareFunction(GetMtlCompareFunc(backStencilFunc));
backStencil->setDepthFailureOperation(GetMtlStencilOp(backStencilZFail));
backStencil->setStencilFailureOperation(GetMtlStencilOp(backStencilFail));
backStencil->setDepthStencilPassOperation(GetMtlStencilOp(backStencilZPass));
}
else
{
backStencil->setReadMask(stencilCompareMaskFront);
backStencil->setWriteMask(stencilWriteMaskFront);
backStencil->setStencilCompareFunction(GetMtlCompareFunc(frontStencilFunc));
backStencil->setDepthFailureOperation(GetMtlStencilOp(frontStencilZFail));
backStencil->setStencilFailureOperation(GetMtlStencilOp(frontStencilFail));
backStencil->setDepthStencilPassOperation(GetMtlStencilOp(frontStencilZPass));
}
desc->setBackFaceStencil(backStencil);
}
depthStencilState = m_mtlr->GetDevice()->newDepthStencilState(desc);
return depthStencilState;
}
uint64 MetalDepthStencilCache::CalculateDepthStencilHash(const LatteContextRegister& lcr)
{
uint32* ctxRegister = lcr.GetRawView();
// Hash
uint64 stateHash = 0;
uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL];
bool stencilTestEnable = depthControl & 1;
if (stencilTestEnable)
{
stateHash += ctxRegister[mmDB_STENCILREFMASK];
stateHash = std::rotl<uint64>(stateHash, 17);
if(depthControl & (1<<7)) // back stencil enable
{
stateHash += ctxRegister[mmDB_STENCILREFMASK_BF];
stateHash = std::rotl<uint64>(stateHash, 13);
}
}
else
{
// zero out stencil related bits (8-31)
depthControl &= 0xFF;
}
stateHash = std::rotl<uint64>(stateHash, 17);
stateHash += depthControl;
return stateHash;
}

View file

@ -0,0 +1,21 @@
#pragma once
#include <Metal/Metal.hpp>
#include "HW/Latte/ISA/LatteReg.h"
class MetalDepthStencilCache
{
public:
MetalDepthStencilCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalDepthStencilCache();
MTL::DepthStencilState* GetDepthStencilState(const LatteContextRegister& lcr);
private:
class MetalRenderer* m_mtlr;
std::map<uint64, MTL::DepthStencilState*> m_depthStencilCache;
uint64 CalculateDepthStencilHash(const LatteContextRegister& lcr);
};

View file

@ -0,0 +1,3 @@
#pragma once
void* CreateMetalLayer(void* handle, float& scaleX, float& scaleY);

View file

@ -0,0 +1,22 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h"
#include "Cafe/HW/Latte/Renderer/MetalView.h"
void* CreateMetalLayer(void* handle, float& scaleX, float& scaleY)
{
NSView* view = (NSView*)handle;
MetalView* childView = [[MetalView alloc] initWithFrame:view.bounds];
childView.autoresizingMask = NSViewWidthSizable | NSViewHeightSizable;
childView.wantsLayer = YES;
[view addSubview:childView];
const NSRect points = [childView frame];
const NSRect pixels = [childView convertRectToBacking:points];
scaleX = (float)(pixels.size.width / points.size.width);
scaleY = (float)(pixels.size.height / points.size.height);
return childView.layer;
}

View file

@ -0,0 +1,46 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h"
#include "gui/guiWrapper.h"
MetalLayerHandle::MetalLayerHandle(MTL::Device* device, const Vector2i& size, bool mainWindow)
{
const auto& windowInfo = (mainWindow ? gui_getWindowInfo().window_main : gui_getWindowInfo().window_pad);
m_layer = (CA::MetalLayer*)CreateMetalLayer(windowInfo.handle, m_layerScaleX, m_layerScaleY);
m_layer->setDevice(device);
m_layer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY});
m_layer->setFramebufferOnly(true);
}
MetalLayerHandle::~MetalLayerHandle()
{
if (m_layer)
m_layer->release();
}
void MetalLayerHandle::Resize(const Vector2i& size)
{
m_layer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY});
}
bool MetalLayerHandle::AcquireDrawable()
{
if (m_drawable)
return true;
m_drawable = m_layer->nextDrawable();
if (!m_drawable)
{
cemuLog_log(LogType::Force, "layer {} failed to acquire next drawable", (void*)this);
return false;
}
return true;
}
void MetalLayerHandle::PresentDrawable(MTL::CommandBuffer* commandBuffer)
{
commandBuffer->presentDrawable(m_drawable);
m_drawable = nullptr;
}

View file

@ -0,0 +1,31 @@
#pragma once
#include <QuartzCore/QuartzCore.hpp>
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "util/math/vector2.h"
class MetalLayerHandle
{
public:
MetalLayerHandle() = default;
MetalLayerHandle(MTL::Device* device, const Vector2i& size, bool mainWindow);
~MetalLayerHandle();
void Resize(const Vector2i& size);
bool AcquireDrawable();
void PresentDrawable(MTL::CommandBuffer* commandBuffer);
CA::MetalLayer* GetLayer() const { return m_layer; }
CA::MetalDrawable* GetDrawable() const { return m_drawable; }
private:
CA::MetalLayer* m_layer = nullptr;
float m_layerScaleX, m_layerScaleY;
CA::MetalDrawable* m_drawable = nullptr;
};

View file

@ -0,0 +1,128 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
#include "CafeSystem.h"
#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h"
#include "HW/MMU/MMU.h"
#include "config/CemuConfig.h"
MetalMemoryManager::~MetalMemoryManager()
{
if (m_bufferCache)
{
m_bufferCache->release();
}
}
void* MetalMemoryManager::AcquireTextureUploadBuffer(size_t size)
{
if (m_textureUploadBuffer.size() < size)
{
m_textureUploadBuffer.resize(size);
}
return m_textureUploadBuffer.data();
}
void MetalMemoryManager::ReleaseTextureUploadBuffer(uint8* mem)
{
cemu_assert_debug(m_textureUploadBuffer.data() == mem);
m_textureUploadBuffer.clear();
}
void MetalMemoryManager::InitBufferCache(size_t size)
{
cemu_assert_debug(!m_bufferCache);
m_bufferCacheMode = g_current_game_profile->GetBufferCacheMode();
if (m_bufferCacheMode == BufferCacheMode::Auto)
{
// TODO: do this for all unified memory systems?
if (m_mtlr->IsAppleGPU())
{
switch (CafeSystem::GetForegroundTitleId())
{
// The Legend of Zelda: Wind Waker HD
case 0x0005000010143600: // EUR
case 0x0005000010143500: // USA
case 0x0005000010143400: // JPN
// TODO: use host instead?
m_bufferCacheMode = BufferCacheMode::DeviceShared;
break;
default:
m_bufferCacheMode = BufferCacheMode::DevicePrivate;
break;
}
}
else
{
m_bufferCacheMode = BufferCacheMode::DevicePrivate;
}
}
// First, try to import the host memory as a buffer
if (m_bufferCacheMode == BufferCacheMode::Host)
{
if (m_mtlr->HasUnifiedMemory())
{
m_importedMemBaseAddress = mmuRange_MEM2.getBase();
m_hostAllocationSize = mmuRange_MEM2.getSize();
m_bufferCache = m_mtlr->GetDevice()->newBuffer(memory_getPointerFromVirtualOffset(m_importedMemBaseAddress), m_hostAllocationSize, MTL::ResourceStorageModeShared, nullptr);
if (!m_bufferCache)
{
cemuLog_log(LogType::Force, "Failed to import host memory as a buffer, using device shared mode instead");
m_bufferCacheMode = BufferCacheMode::DeviceShared;
}
}
else
{
cemuLog_log(LogType::Force, "Host buffer cache mode is only available on unified memory systems, using device shared mode instead");
m_bufferCacheMode = BufferCacheMode::DeviceShared;
}
}
if (!m_bufferCache)
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, (m_bufferCacheMode == BufferCacheMode::DevicePrivate ? MTL::ResourceStorageModePrivate : MTL::ResourceStorageModeShared));
#ifdef CEMU_DEBUG_ASSERT
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
#endif
}
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
{
cemu_assert_debug(m_bufferCacheMode != BufferCacheMode::Host);
cemu_assert_debug(m_bufferCache);
cemu_assert_debug((offset + size) <= m_bufferCache->length());
if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
{
auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();
auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1);
memcpy(allocation.memPtr, data, size);
m_stagingAllocator.FlushReservation(allocation);
blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size);
//m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
}
else
{
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
}
}
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
{
cemu_assert_debug(m_bufferCacheMode != BufferCacheMode::Host);
cemu_assert_debug(m_bufferCache);
if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
else
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size);
}

View file

@ -0,0 +1,76 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
#include "GameProfile/GameProfile.h"
class MetalMemoryManager
{
public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {}
~MetalMemoryManager();
MetalSynchronizedRingAllocator& GetStagingAllocator()
{
return m_stagingAllocator;
}
MetalSynchronizedHeapAllocator& GetIndexAllocator()
{
return m_indexAllocator;
}
MTL::Buffer* GetBufferCache()
{
return m_bufferCache;
}
void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer)
{
m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer);
m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer);
}
// Texture upload buffer
void* AcquireTextureUploadBuffer(size_t size);
void ReleaseTextureUploadBuffer(uint8* mem);
// Buffer cache
void InitBufferCache(size_t size);
void UploadToBufferCache(const void* data, size_t offset, size_t size);
void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size);
// Getters
bool UseHostMemoryForCache() const
{
return (m_bufferCacheMode == BufferCacheMode::Host);
}
bool NeedsReducedLatency() const
{
return (m_bufferCacheMode == BufferCacheMode::DeviceShared || m_bufferCacheMode == BufferCacheMode::Host);
}
MPTR GetImportedMemBaseAddress() const
{
return m_importedMemBaseAddress;
}
size_t GetHostAllocationSize() const
{
return m_hostAllocationSize;
}
private:
class MetalRenderer* m_mtlr;
std::vector<uint8> m_textureUploadBuffer;
MetalSynchronizedRingAllocator m_stagingAllocator;
MetalSynchronizedHeapAllocator m_indexAllocator;
MTL::Buffer* m_bufferCache = nullptr;
BufferCacheMode m_bufferCacheMode;
MPTR m_importedMemBaseAddress;
size_t m_hostAllocationSize = 0;
};

View file

@ -0,0 +1,37 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
MetalOutputShaderCache::~MetalOutputShaderCache()
{
for (uint8 i = 0; i < METAL_OUTPUT_SHADER_CACHE_SIZE; i++)
{
if (m_cache[i])
m_cache[i]->release();
}
}
MTL::RenderPipelineState* MetalOutputShaderCache::GetPipeline(RendererOutputShader* shader, uint8 shaderIndex, bool usesSRGB)
{
uint8 cacheIndex = (usesSRGB ? METAL_SHADER_TYPE_COUNT : 0) + shaderIndex;
auto& renderPipelineState = m_cache[cacheIndex];
if (renderPipelineState)
return renderPipelineState;
// Create a new render pipeline state
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(shader->GetVertexShader())->GetFunction();
auto fragmentShaderMtl = static_cast<RendererShaderMtl*>(shader->GetFragmentShader())->GetFunction();
NS_STACK_SCOPED auto renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
renderPipelineDescriptor->setVertexFunction(vertexShaderMtl);
renderPipelineDescriptor->setFragmentFunction(fragmentShaderMtl);
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(usesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm);
NS::Error* error = nullptr;
renderPipelineState = m_mtlr->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
if (error)
{
cemuLog_log(LogType::Force, "error creating output render pipeline state: {}", error->localizedDescription()->utf8String());
}
return renderPipelineState;
}

View file

@ -0,0 +1,20 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
constexpr uint8 METAL_SHADER_TYPE_COUNT = 6;
constexpr uint8 METAL_OUTPUT_SHADER_CACHE_SIZE = 2 * METAL_SHADER_TYPE_COUNT;
class MetalOutputShaderCache
{
public:
MetalOutputShaderCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalOutputShaderCache();
MTL::RenderPipelineState* GetPipeline(RendererOutputShader* shader, uint8 shaderIndex, bool usesSRGB);
private:
class MetalRenderer* m_mtlr;
MTL::RenderPipelineState* m_cache[METAL_OUTPUT_SHADER_CACHE_SIZE] = {nullptr};
};

View file

@ -0,0 +1,26 @@
#pragma once
class MetalPerformanceMonitor
{
public:
// Per frame data
uint32 m_commandBuffers = 0;
uint32 m_renderPasses = 0;
uint32 m_clears = 0;
uint32 m_manualVertexFetchDraws = 0;
uint32 m_meshDraws = 0;
uint32 m_triangleFans = 0;
MetalPerformanceMonitor() = default;
~MetalPerformanceMonitor() = default;
void ResetPerFrameData()
{
m_commandBuffers = 0;
m_renderPasses = 0;
m_clears = 0;
m_manualVertexFetchDraws = 0;
m_meshDraws = 0;
m_triangleFans = 0;
}
};

View file

@ -0,0 +1,621 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
#include "Cafe/HW/Latte/Core/LatteShaderCache.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cemu/FileCache/FileCache.h"
#include "Common/precompiled.h"
#include "util/helpers/helpers.h"
#include "config/ActiveSettings.h"
#include <openssl/sha.h>
static bool g_compilePipelineThreadInit{false};
static std::mutex g_compilePipelineMutex;
static std::condition_variable g_compilePipelineCondVar;
static std::queue<MetalPipelineCompiler*> g_compilePipelineRequests;
static void compileThreadFunc(sint32 threadIndex)
{
SetThreadName("compilePl");
// one thread runs at normal priority while the others run at lower priority
if (threadIndex != 0)
; // TODO: set thread priority
while (true)
{
std::unique_lock lock(g_compilePipelineMutex);
while (g_compilePipelineRequests.empty())
g_compilePipelineCondVar.wait(lock);
MetalPipelineCompiler* request = g_compilePipelineRequests.front();
g_compilePipelineRequests.pop();
lock.unlock();
request->Compile(true, false, true);
delete request;
}
}
static void initCompileThread()
{
uint32 numCompileThreads;
uint32 cpuCoreCount = GetPhysicalCoreCount();
if (cpuCoreCount <= 2)
numCompileThreads = 1;
else
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
for (uint32 i = 0; i < numCompileThreads; i++)
{
std::thread compileThread(compileThreadFunc, i);
compileThread.detach();
}
}
static void queuePipeline(MetalPipelineCompiler* v)
{
std::unique_lock lock(g_compilePipelineMutex);
g_compilePipelineRequests.push(std::move(v));
lock.unlock();
g_compilePipelineCondVar.notify_one();
}
// make a guess if a pipeline is not essential
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
bool IsAsyncPipelineAllowed(const MetalAttachmentsInfo& attachmentsInfo, Vector2i extend, uint32 indexCount)
{
if (extend.x == 1600 && extend.y == 1600)
return false; // Splatoon ink mechanics use 1600x1600 R8 and R8G8 framebuffers, this resolution is rare enough that we can just blacklist it globally
if (attachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
return true; // aggressive filter but seems to work well so far
// small index count (3,4,5,6) is often associated with full-viewport quads (which are considered essential due to often being used to generate persistent textures)
if (indexCount <= 6)
return false;
return true;
}
MetalPipelineCache* g_mtlPipelineCache = nullptr;
MetalPipelineCache& MetalPipelineCache::GetInstance()
{
return *g_mtlPipelineCache;
}
MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}
{
g_mtlPipelineCache = this;
}
MetalPipelineCache::~MetalPipelineCache()
{
for (auto& [key, pipelineObj] : m_pipelineCache)
{
pipelineObj->m_pipeline->release();
delete pipelineObj;
}
}
PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr)
{
uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
PipelineObject*& pipelineObj = m_pipelineCache[hash];
if (pipelineObj)
return pipelineObj;
pipelineObj = new PipelineObject();
MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj);
compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
bool allowAsyncCompile = false;
if (GetConfig().async_compile)
allowAsyncCompile = IsAsyncPipelineAllowed(activeAttachmentsInfo, extend, indexCount);
if (allowAsyncCompile)
{
if (!g_compilePipelineThreadInit)
{
initCompileThread();
g_compilePipelineThreadInit = true;
}
queuePipeline(compiler);
}
else
{
// Also force compile to ensure that the pipeline is ready
cemu_assert_debug(compiler->Compile(true, true, true));
delete compiler;
}
// Save to cache
AddCurrentStateToCache(hash, lastUsedAttachmentsInfo);
return pipelineObj;
}
uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{
// Hash
uint64 stateHash = 0;
for (int i = 0; i < Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS; ++i)
{
Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i];
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
continue;
stateHash += GetMtlPixelFormat(format, false) + i * 31;
stateHash = std::rotl<uint64>(stateHash, 7);
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
stateHash += 1;
stateHash = std::rotl<uint64>(stateHash, 1);
}
}
if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
stateHash += GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true);
stateHash = std::rotl<uint64>(stateHash, 7);
if (activeAttachmentsInfo.depthFormat == Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
stateHash += 1;
stateHash = std::rotl<uint64>(stateHash, 1);
}
}
for (auto& group : fetchShader->bufferGroups)
{
uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView());
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += bufferStride * 3;
}
stateHash += fetchShader->getVkPipelineHashFragment();
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN];
stateHash = std::rotl<uint64>(stateHash, 7);
if(lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL())
stateHash += 0x333333;
stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull;
uint32* ctxRegister = lcr.GetRawView();
if (vertexShader)
stateHash += vertexShader->baseHash;
stateHash = std::rotl<uint64>(stateHash, 13);
if (pixelShader)
stateHash += pixelShader->baseHash + pixelShader->auxHash;
stateHash = std::rotl<uint64>(stateHash, 13);
uint32 polygonCtrl = lcr.PA_SU_SC_MODE_CNTL.getRawValue();
stateHash += polygonCtrl;
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += ctxRegister[Latte::REGADDR::PA_CL_CLIP_CNTL];
stateHash = std::rotl<uint64>(stateHash, 7);
const auto colorControlReg = ctxRegister[Latte::REGADDR::CB_COLOR_CONTROL];
stateHash += colorControlReg;
stateHash += ctxRegister[Latte::REGADDR::CB_TARGET_MASK];
const uint32 blendEnableMask = (colorControlReg >> 8) & 0xFF;
if (blendEnableMask)
{
for (auto i = 0; i < 8; ++i)
{
if (((blendEnableMask & (1 << i))) == 0)
continue;
stateHash = std::rotl<uint64>(stateHash, 7);
stateHash += ctxRegister[Latte::REGADDR::CB_BLEND0_CONTROL + i];
}
}
// Mesh pipeline
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
if (usesGeometryShader)
{
stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE];
stateHash = std::rotl<uint64>(stateHash, 7);
}
return stateHash;
}
struct
{
uint32 pipelineLoadIndex;
uint32 pipelineMaxFileIndex;
std::atomic_uint32_t pipelinesQueued;
std::atomic_uint32_t pipelinesLoaded;
} g_mtlCacheState;
uint32 MetalPipelineCache::BeginLoading(uint64 cacheTitleId)
{
std::error_code ec;
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/transferable"), ec);
const auto pathCacheFile = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_mtlpipeline.bin", cacheTitleId);
// init cache loader state
g_mtlCacheState.pipelineLoadIndex = 0;
g_mtlCacheState.pipelineMaxFileIndex = 0;
g_mtlCacheState.pipelinesLoaded = 0;
g_mtlCacheState.pipelinesQueued = 0;
// start async compilation threads
m_compilationCount.store(0);
m_compilationQueue.clear();
// get core count
uint32 cpuCoreCount = GetPhysicalCoreCount();
m_numCompilationThreads = std::clamp(cpuCoreCount, 1u, 8u);
// TODO: uncomment?
//if (VulkanRenderer::GetInstance()->GetDisableMultithreadedCompilation())
// m_numCompilationThreads = 1;
for (uint32 i = 0; i < m_numCompilationThreads; i++)
{
std::thread compileThread(&MetalPipelineCache::CompilerThread, this);
compileThread.detach();
}
// open cache file or create it
cemu_assert_debug(s_cache == nullptr);
s_cache = FileCache::Open(pathCacheFile, true, LatteShaderCache_getPipelineCacheExtraVersion(cacheTitleId));
if (!s_cache)
{
cemuLog_log(LogType::Force, "Failed to open or create Metal pipeline cache file: {}", _pathToUtf8(pathCacheFile));
return 0;
}
else
{
s_cache->UseCompression(false);
g_mtlCacheState.pipelineMaxFileIndex = s_cache->GetMaximumFileIndex();
}
return s_cache->GetFileCount();
}
bool MetalPipelineCache::UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders)
{
pipelinesLoadedTotal = g_mtlCacheState.pipelinesLoaded;
pipelinesMissingShaders = 0;
while (g_mtlCacheState.pipelineLoadIndex <= g_mtlCacheState.pipelineMaxFileIndex)
{
if (m_compilationQueue.size() >= 50)
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
return true; // queue up to 50 entries at a time
}
uint64 fileNameA, fileNameB;
std::vector<uint8> fileData;
if (s_cache->GetFileByIndex(g_mtlCacheState.pipelineLoadIndex, &fileNameA, &fileNameB, fileData))
{
// queue for async compilation
g_mtlCacheState.pipelinesQueued++;
m_compilationQueue.push(std::move(fileData));
g_mtlCacheState.pipelineLoadIndex++;
return true;
}
g_mtlCacheState.pipelineLoadIndex++;
}
if (g_mtlCacheState.pipelinesLoaded != g_mtlCacheState.pipelinesQueued)
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
return true; // pipelines still compiling
}
return false; // done
}
void MetalPipelineCache::EndLoading()
{
// shut down compilation threads
uint32 threadCount = m_numCompilationThreads;
m_numCompilationThreads = 0; // signal thread shutdown
for (uint32 i = 0; i < threadCount; i++)
{
m_compilationQueue.push({}); // push empty workload for every thread. Threads then will shutdown after checking for m_numCompilationThreads == 0
}
// keep cache file open for writing of new pipelines
}
void MetalPipelineCache::Close()
{
if(s_cache)
{
delete s_cache;
s_cache = nullptr;
}
}
struct CachedPipeline
{
struct ShaderHash
{
uint64 baseHash;
uint64 auxHash;
bool isPresent{};
void set(uint64 baseHash, uint64 auxHash)
{
this->baseHash = baseHash;
this->auxHash = auxHash;
this->isPresent = true;
}
};
ShaderHash vsHash; // includes fetch shader
ShaderHash gsHash;
ShaderHash psHash;
MetalAttachmentsInfo lastUsedAttachmentsInfo;
Latte::GPUCompactedRegisterState gpuState;
};
void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
{
static FSpinlock s_spinlockSharedInternal;
// deserialize file
LatteContextRegister* lcr = new LatteContextRegister();
s_spinlockSharedInternal.lock();
CachedPipeline* cachedPipeline = new CachedPipeline();
s_spinlockSharedInternal.unlock();
MemStreamReader streamReader(fileData.data(), fileData.size());
if (!DeserializePipeline(streamReader, *cachedPipeline))
{
// failed to deserialize
s_spinlockSharedInternal.lock();
delete lcr;
delete cachedPipeline;
s_spinlockSharedInternal.unlock();
return;
}
// restored register view from compacted state
Latte::LoadGPURegisterState(*lcr, cachedPipeline->gpuState);
LatteDecompilerShader* vertexShader = nullptr;
LatteDecompilerShader* geometryShader = nullptr;
LatteDecompilerShader* pixelShader = nullptr;
// find vertex shader
if (cachedPipeline->vsHash.isPresent)
{
vertexShader = LatteSHRC_FindVertexShader(cachedPipeline->vsHash.baseHash, cachedPipeline->vsHash.auxHash);
if (!vertexShader)
{
cemuLog_log(LogType::Force, "Vertex shader not found in cache");
return;
}
}
// find geometry shader
if (cachedPipeline->gsHash.isPresent)
{
geometryShader = LatteSHRC_FindGeometryShader(cachedPipeline->gsHash.baseHash, cachedPipeline->gsHash.auxHash);
if (!geometryShader)
{
cemuLog_log(LogType::Force, "Geometry shader not found in cache");
return;
}
}
// find pixel shader
if (cachedPipeline->psHash.isPresent)
{
pixelShader = LatteSHRC_FindPixelShader(cachedPipeline->psHash.baseHash, cachedPipeline->psHash.auxHash);
if (!pixelShader)
{
cemuLog_log(LogType::Force, "Pixel shader not found in cache");
return;
}
}
if (!pixelShader)
{
cemu_assert_debug(false);
return;
}
MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader);
PipelineObject* pipelineObject = new PipelineObject();
// compile
{
MetalPipelineCompiler pp(m_mtlr, *pipelineObject);
pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
pp.Compile(true, true, false);
// destroy pp early
}
// Cache the pipeline
uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
m_pipelineCacheLock.lock();
m_pipelineCache[pipelineStateHash] = pipelineObject;
m_pipelineCacheLock.unlock();
// clean up
s_spinlockSharedInternal.lock();
delete lcr;
delete cachedPipeline;
s_spinlockSharedInternal.unlock();
}
ConcurrentQueue<CachedPipeline*> g_mtlPipelineCachingQueue;
void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash, const MetalAttachmentsInfo& lastUsedAttachmentsInfo)
{
if (!m_pipelineCacheStoreThread)
{
m_pipelineCacheStoreThread = new std::thread(&MetalPipelineCache::WorkerThread, this);
m_pipelineCacheStoreThread->detach();
}
// fill job structure with cached GPU state
// for each cached pipeline we store:
// - Active shaders (referenced by hash)
// - An almost-complete register state of the GPU (minus some ALU uniform constants which aren't relevant)
CachedPipeline* job = new CachedPipeline();
auto vs = LatteSHRC_GetActiveVertexShader();
auto gs = LatteSHRC_GetActiveGeometryShader();
auto ps = LatteSHRC_GetActivePixelShader();
if (vs)
job->vsHash.set(vs->baseHash, vs->auxHash);
if (gs)
job->gsHash.set(gs->baseHash, gs->auxHash);
if (ps)
job->psHash.set(ps->baseHash, ps->auxHash);
job->lastUsedAttachmentsInfo = lastUsedAttachmentsInfo;
Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState);
// queue job
g_mtlPipelineCachingQueue.push(job);
}
bool MetalPipelineCache::SerializePipeline(MemStreamWriter& memWriter, CachedPipeline& cachedPipeline)
{
memWriter.writeBE<uint8>(0x01); // version
uint8 presentMask = 0;
if (cachedPipeline.vsHash.isPresent)
presentMask |= 1;
if (cachedPipeline.gsHash.isPresent)
presentMask |= 2;
if (cachedPipeline.psHash.isPresent)
presentMask |= 4;
memWriter.writeBE<uint8>(presentMask);
if (cachedPipeline.vsHash.isPresent)
{
memWriter.writeBE<uint64>(cachedPipeline.vsHash.baseHash);
memWriter.writeBE<uint64>(cachedPipeline.vsHash.auxHash);
}
if (cachedPipeline.gsHash.isPresent)
{
memWriter.writeBE<uint64>(cachedPipeline.gsHash.baseHash);
memWriter.writeBE<uint64>(cachedPipeline.gsHash.auxHash);
}
if (cachedPipeline.psHash.isPresent)
{
memWriter.writeBE<uint64>(cachedPipeline.psHash.baseHash);
memWriter.writeBE<uint64>(cachedPipeline.psHash.auxHash);
}
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i]);
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.depthFormat);
Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter);
return true;
}
bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedPipeline& cachedPipeline)
{
// version
if (memReader.readBE<uint8>() != 1)
{
cemuLog_log(LogType::Force, "Cached Metal pipeline corrupted or has unknown version");
return false;
}
// shader hashes
uint8 presentMask = memReader.readBE<uint8>();
if (presentMask & 1)
{
uint64 baseHash = memReader.readBE<uint64>();
uint64 auxHash = memReader.readBE<uint64>();
cachedPipeline.vsHash.set(baseHash, auxHash);
}
if (presentMask & 2)
{
uint64 baseHash = memReader.readBE<uint64>();
uint64 auxHash = memReader.readBE<uint64>();
cachedPipeline.gsHash.set(baseHash, auxHash);
}
if (presentMask & 4)
{
uint64 baseHash = memReader.readBE<uint64>();
uint64 auxHash = memReader.readBE<uint64>();
cachedPipeline.psHash.set(baseHash, auxHash);
}
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i] = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
cachedPipeline.lastUsedAttachmentsInfo.depthFormat = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
// deserialize GPU state
if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader))
{
return false;
}
cemu_assert_debug(!memReader.hasError());
return true;
}
int MetalPipelineCache::CompilerThread()
{
SetThreadName("plCacheCompiler");
while (m_numCompilationThreads != 0)
{
std::vector<uint8> pipelineData = m_compilationQueue.pop();
if(pipelineData.empty())
continue;
LoadPipelineFromCache(pipelineData);
++g_mtlCacheState.pipelinesLoaded;
}
return 0;
}
void MetalPipelineCache::WorkerThread()
{
SetThreadName("plCacheWriter");
while (true)
{
CachedPipeline* job;
g_mtlPipelineCachingQueue.pop(job);
if (!s_cache)
{
delete job;
continue;
}
// serialize
MemStreamWriter memWriter(1024 * 4);
SerializePipeline(memWriter, *job);
auto blob = memWriter.getResult();
// file name is derived from data hash
uint8 hash[SHA256_DIGEST_LENGTH];
SHA256(blob.data(), blob.size(), hash);
uint64 nameA = *(uint64be*)(hash + 0);
uint64 nameB = *(uint64be*)(hash + 8);
s_cache->AddFileAsync({ nameA, nameB }, blob.data(), blob.size());
delete job;
}
}

View file

@ -0,0 +1,52 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "util/helpers/ConcurrentQueue.h"
#include "util/helpers/fspinlock.h"
#include "util/math/vector2.h"
class MetalPipelineCache
{
public:
static MetalPipelineCache& GetInstance();
MetalPipelineCache(class MetalRenderer* metalRenderer);
~MetalPipelineCache();
PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr);
// Cache loading
uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache
bool UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders);
void EndLoading();
void LoadPipelineFromCache(std::span<uint8> fileData);
void Close(); // called on title exit
// Debug
size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); }
private:
class MetalRenderer* m_mtlr;
std::map<uint64, PipelineObject*> m_pipelineCache;
FSpinlock m_pipelineCacheLock;
std::thread* m_pipelineCacheStoreThread;
class FileCache* s_cache;
std::atomic_uint32_t m_numCompilationThreads{ 0 };
ConcurrentQueue<std::vector<uint8>> m_compilationQueue;
std::atomic_uint32_t m_compilationCount;
static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
void AddCurrentStateToCache(uint64 pipelineStateHash, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo);
// pipeline serialization for file
bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline);
bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline);
int CompilerThread();
void WorkerThread();
};

View file

@ -0,0 +1,484 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include <chrono>
extern std::atomic_int g_compiling_pipelines;
extern std::atomic_int g_compiling_pipelines_async;
extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum;
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId));
}
gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx));
gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx));
}
static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, const char* variant, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
}
gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant));
gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n"));
}
static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
{
sint32 pList[4] = { p0, p1, p2, p3 };
for (sint32 i = 0; i < 4; i++)
{
if (pList[i] == 3)
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
else
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
}
gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0]));
gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1]));
gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2]));
gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1]));
gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2]));
gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3]));
}
static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister)
{
std::string gsSrc;
gsSrc.append("#include <metal_stdlib>\r\n");
gsSrc.append("using namespace metal;\r\n");
LatteShaderPSInputTable psInputTable;
LatteShader_CreatePSInputTable(&psInputTable, latteRegister.GetRawView());
// inputs & outputs
std::string vertexOutDefinition = "struct VertexOut {\r\n";
vertexOutDefinition += "float4 position;\r\n";
std::string geometryOutDefinition = "struct GeometryOut {\r\n";
geometryOutDefinition += "float4 position [[position]];\r\n";
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
auto psImport = psInputTable.getPSImportBySemanticId(vsSemanticId);
if (psImport == nullptr)
continue;
// VertexOut
vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId);
// GeometryOut
geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId);
geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable.getPSImportLocationBySemanticId(vsSemanticId));
if (psImport->isFlat)
geometryOutDefinition += " [[flat]]";
if (psImport->isNoPerspective)
geometryOutDefinition += " [[center_no_perspective]]";
geometryOutDefinition += ";\r\n";
}
vertexOutDefinition += "};\r\n";
geometryOutDefinition += "};\r\n";
gsSrc.append(vertexOutDefinition);
gsSrc.append(geometryOutDefinition);
gsSrc.append("struct ObjectPayload {\r\n");
gsSrc.append("VertexOut vertexOut[3];\r\n");
gsSrc.append("};\r\n");
// gen function
gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n");
gsSrc.append("{\r\n");
gsSrc.append("return b - (c - a);\r\n");
gsSrc.append("}\r\n");
gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n");
gsSrc.append("{\r\n");
gsSrc.append("return c - (b - a);\r\n");
gsSrc.append("}\r\n");
gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n");
gsSrc.append("{\r\n");
gsSrc.append("return c + (b - a);\r\n");
gsSrc.append("}\r\n");
// main
gsSrc.append("using MeshType = mesh<GeometryOut, void, 4, 2, topology::triangle>;\r\n");
gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n");
gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n");
gsSrc.append("{\r\n");
gsSrc.append("GeometryOut out;\r\n");
// there are two possible winding orders that need different triangle generation:
// 0 1
// 2 3
// and
// 0 1
// 3 2
// all others are just symmetries of these cases
// we can determine the case by comparing the distance 0<->1 and 0<->2
gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n");
// emit vertices
gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n");
gsSrc.append("{\r\n");
// p0 to p1 is diagonal
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister);
gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n");
// p0 to p2 is diagonal
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister);
gsSrc.append("} else {\r\n");
// p1 to p2 is diagonal
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister);
gsSrc.append("}\r\n");
gsSrc.append("mesh.set_primitive_count(2);\r\n");
gsSrc.append("}\r\n");
auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc);
mtlShader->PreponeCompilation(true);
return mtlShader;
}
#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF
uint64 s_cacheTitleId = INVALID_TITLE_ID;
extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;
template<typename T>
void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr)
{
// TODO: check if the pixel shader is valid as well?
if (!rasterizationEnabled/* || !pixelShaderMtl*/)
{
desc->setRasterizationEnabled(false);
return;
}
// Color attachments
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i];
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
continue;
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(format, false);
auto colorAttachment = desc->colorAttachments()->object(i);
colorAttachment->setPixelFormat(pixelFormat);
// Disable writes if not in the active FBO
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
colorAttachment->setWriteMask(MTL::ColorWriteMaskNone);
continue;
}
colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF));
// Blending
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
// Only float data type is blendable
if (blendEnabled && GetMtlPixelFormatInfo(format, false).dataType == MetalDataType::FLOAT)
{
colorAttachment->setBlendingEnabled(true);
const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i];
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());
auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND());
colorAttachment->setRgbBlendOperation(rgbBlendOp);
colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor);
colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor);
if (blendControlReg.get_SEPARATE_ALPHA_BLEND())
{
colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN()));
colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()));
colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()));
}
else
{
colorAttachment->setAlphaBlendOperation(rgbBlendOp);
colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor);
colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor);
}
}
}
// Depth stencil attachment
if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true);
desc->setDepthAttachmentPixelFormat(pixelFormat);
if (lastUsedAttachmentsInfo.hasStencil)
desc->setStencilAttachmentPixelFormat(pixelFormat);
}
}
MetalPipelineCompiler::~MetalPipelineCompiler()
{
/*
for (auto& pair : m_pipelineCache)
{
pair.second->release();
}
m_pipelineCache.clear();
NS::Error* error = nullptr;
m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error);
if (error)
{
cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String());
error->release();
}
m_binaryArchive->release();
m_binaryArchiveURL->release();
*/
if (m_pipelineDescriptor)
m_pipelineDescriptor->release();
}
void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{
m_usesGeometryShader = UseGeometryShader(lcr, geometryShader != nullptr);
if (m_usesGeometryShader && !m_mtlr->SupportsMeshShaders())
return;
// Rasterization
m_rasterizationEnabled = lcr.IsRasterizationEnabled();
// Shaders
m_vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
if (geometryShader)
m_geometryShaderMtl = static_cast<RendererShaderMtl*>(geometryShader->shader);
else if (UseRectEmulation(lcr))
m_geometryShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
else
m_geometryShaderMtl = nullptr;
m_pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
if (m_usesGeometryShader)
InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
else
InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
}
bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay)
{
if (m_usesGeometryShader && !m_mtlr->SupportsMeshShaders())
return false;
if (forceCompile)
{
// if some shader stages are not compiled yet, compile them now
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
m_vertexShaderMtl->PreponeCompilation(isRenderThread);
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
m_geometryShaderMtl->PreponeCompilation(isRenderThread);
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
m_pixelShaderMtl->PreponeCompilation(isRenderThread);
}
else
{
// fail early if some shader stages are not compiled
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
return false;
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
return false;
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
return false;
}
// Compile
MTL::RenderPipelineState* pipeline = nullptr;
NS::Error* error = nullptr;
auto start = std::chrono::high_resolution_clock::now();
if (m_usesGeometryShader)
{
auto desc = static_cast<MTL::MeshRenderPipelineDescriptor*>(m_pipelineDescriptor);
// Shaders
desc->setObjectFunction(m_vertexShaderMtl->GetFunction());
desc->setMeshFunction(m_geometryShaderMtl->GetFunction());
if (m_rasterizationEnabled)
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
#ifdef CEMU_DEBUG_ASSERT
desc->setLabel(GetLabel("Mesh render pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
}
else
{
auto desc = static_cast<MTL::RenderPipelineDescriptor*>(m_pipelineDescriptor);
// Shaders
desc->setVertexFunction(m_vertexShaderMtl->GetFunction());
if (m_rasterizationEnabled)
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
#ifdef CEMU_DEBUG_ASSERT
desc->setLabel(GetLabel("Render pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
}
auto end = std::chrono::high_resolution_clock::now();
auto creationDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
if (error)
{
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
}
if (showInOverlay)
{
if (isRenderThread)
g_compiling_pipelines_syncTimeSum += creationDuration;
else
g_compiling_pipelines_async++;
g_compiling_pipelines++;
}
m_pipelineObj.m_pipeline = pipeline;
return true;
}
void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
// Vertex descriptor
if (!fetchShader->mtlFetchVertexManually)
{
NS_STACK_SCOPED MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 minBufferStride = 0;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
if (bufferStride == 0)
{
// Buffer stride cannot be zero, let's use the minimum stride
bufferStride = minBufferStride;
// Additionally, constant vertex function must be used
layout->setStepFunction(MTL::VertexStepFunctionConstant);
layout->setStepRate(0);
}
else
{
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
else
{
cemuLog_log(LogType::Force, "unimplemented vertex fetch type {}", (uint32)fetchType.value());
cemu_assert(false);
}
}
bufferStride = Align(bufferStride, 4);
layout->setStride(bufferStride);
}
desc->setVertexDescriptor(vertexDescriptor);
}
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
m_pipelineDescriptor = desc;
}
void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{
// Render pipeline state
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
m_pipelineDescriptor = desc;
}

View file

@ -0,0 +1,38 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
struct PipelineObject
{
MTL::RenderPipelineState* m_pipeline = nullptr;
};
class MetalPipelineCompiler
{
public:
MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {}
~MetalPipelineCompiler();
void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay);
private:
class MetalRenderer* m_mtlr;
PipelineObject& m_pipelineObj;
class RendererShaderMtl* m_vertexShaderMtl;
class RendererShaderMtl* m_geometryShaderMtl;
class RendererShaderMtl* m_pixelShaderMtl;
bool m_usesGeometryShader;
bool m_rasterizationEnabled;
NS::Object* m_pipelineDescriptor = nullptr;
void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
};

View file

@ -0,0 +1,38 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed)
{
if (m_commandBuffer && !CommandBufferCompleted(m_commandBuffer))
return false;
uint64* resultPtr = m_mtlr->GetOcclusionQueryResultsPtr();
numSamplesPassed = 0;
for (uint32 i = m_range.begin; i != m_range.end; i = (i + 1) % MetalRenderer::OCCLUSION_QUERY_POOL_SIZE)
numSamplesPassed += resultPtr[i];
return true;
}
LatteQueryObjectMtl::~LatteQueryObjectMtl()
{
if (m_commandBuffer)
m_commandBuffer->release();
}
void LatteQueryObjectMtl::begin()
{
m_range.begin = m_mtlr->GetOcclusionQueryIndex();
m_mtlr->BeginOcclusionQuery();
}
void LatteQueryObjectMtl::end()
{
m_range.end = m_mtlr->GetOcclusionQueryIndex();
m_mtlr->EndOcclusionQuery();
m_commandBuffer = m_mtlr->GetAndRetainCurrentCommandBufferIfNotCompleted();
if (m_commandBuffer)
m_mtlr->RequestSoonCommit();
}

View file

@ -0,0 +1,28 @@
#pragma once
#include "Cafe/HW/Latte/Core/LatteQueryObject.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
class LatteQueryObjectMtl : public LatteQueryObject
{
public:
LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {}
~LatteQueryObjectMtl();
bool getResult(uint64& numSamplesPassed) override;
void begin() override;
void end() override;
void GrowRange()
{
m_range.end++;
}
private:
class MetalRenderer* m_mtlr;
MetalQueryRange m_range = {INVALID_UINT32, INVALID_UINT32};
// TODO: make this a list of command buffers?
MTL::CommandBuffer* m_commandBuffer = nullptr;
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,570 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
enum MetalGeneralShaderType
{
METAL_GENERAL_SHADER_TYPE_VERTEX,
METAL_GENERAL_SHADER_TYPE_GEOMETRY,
METAL_GENERAL_SHADER_TYPE_FRAGMENT,
METAL_GENERAL_SHADER_TYPE_TOTAL
};
inline MetalGeneralShaderType GetMtlGeneralShaderType(LatteConst::ShaderType shaderType)
{
switch (shaderType)
{
case LatteConst::ShaderType::Vertex:
return METAL_GENERAL_SHADER_TYPE_VERTEX;
case LatteConst::ShaderType::Geometry:
return METAL_GENERAL_SHADER_TYPE_GEOMETRY;
case LatteConst::ShaderType::Pixel:
return METAL_GENERAL_SHADER_TYPE_FRAGMENT;
default:
return METAL_GENERAL_SHADER_TYPE_TOTAL;
}
}
enum MetalShaderType
{
METAL_SHADER_TYPE_VERTEX,
METAL_SHADER_TYPE_OBJECT,
METAL_SHADER_TYPE_MESH,
METAL_SHADER_TYPE_FRAGMENT,
METAL_SHADER_TYPE_TOTAL
};
inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType, bool usesGeometryShader)
{
switch (shaderType)
{
case LatteConst::ShaderType::Vertex:
if (usesGeometryShader)
return METAL_SHADER_TYPE_OBJECT;
else
return METAL_SHADER_TYPE_VERTEX;
case LatteConst::ShaderType::Geometry:
return METAL_SHADER_TYPE_MESH;
case LatteConst::ShaderType::Pixel:
return METAL_SHADER_TYPE_FRAGMENT;
default:
return METAL_SHADER_TYPE_TOTAL;
}
}
struct MetalEncoderState
{
MTL::RenderPipelineState* m_renderPipelineState = nullptr;
MTL::DepthStencilState* m_depthStencilState = nullptr;
MTL::CullMode m_cullMode = MTL::CullModeNone;
MTL::Winding m_frontFaceWinding = MTL::WindingClockwise;
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
uint32 m_stencilRefFront = 0;
uint32 m_stencilRefBack = 0;
uint32 m_blendColor[4] = {0};
uint32 m_depthBias = 0;
uint32 m_depthSlope = 0;
uint32 m_depthClamp = 0;
bool m_depthClipEnable = true;
struct {
MTL::Buffer* m_buffer;
size_t m_offset;
} m_buffers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Texture* m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS];
};
struct MetalStreamoutState
{
struct
{
bool enabled;
uint32 ringBufferOffset;
} buffers[LATTE_NUM_STREAMOUT_BUFFER];
sint32 verticesPerInstance;
};
struct MetalActiveFBOState
{
class CachedFBOMtl* m_fbo = nullptr;
MetalAttachmentsInfo m_attachmentsInfo;
};
struct MetalState
{
MetalEncoderState m_encoderState{};
bool m_usesSRGB = false;
bool m_skipDrawSequence = false;
bool m_isFirstDrawInRenderPass = true;
MetalActiveFBOState m_activeFBO;
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change
MetalActiveFBOState m_lastUsedFBO;
bool m_fboChanged = false;
size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS];
class LatteTextureViewMtl* m_textures[LATTE_NUM_MAX_TEX_UNITS * 3] = {nullptr};
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
MetalStreamoutState m_streamoutState;
};
struct MetalCommandBuffer
{
MTL::CommandBuffer* m_commandBuffer = nullptr;
bool m_commited = false;
};
enum class MetalEncoderType
{
None,
Render,
Compute,
Blit,
};
class MetalRenderer : public Renderer
{
public:
static constexpr uint32 OCCLUSION_QUERY_POOL_SIZE = 1024;
static constexpr uint32 TEXTURE_READBACK_SIZE = 32 * 1024 * 1024; // 32 MB
struct DeviceInfo
{
std::string name;
uint64 uuid;
};
static std::vector<DeviceInfo> GetDevices();
MetalRenderer();
~MetalRenderer() override;
RendererAPI GetType() override
{
return RendererAPI::Metal;
}
static MetalRenderer* GetInstance() {
return static_cast<MetalRenderer*>(g_renderer.get());
}
// Helper functions
MTL::Device* GetDevice() const {
return m_device;
}
void InitializeLayer(const Vector2i& size, bool mainWindow);
void ShutdownLayer(bool mainWindow);
void ResizeLayer(const Vector2i& size, bool mainWindow);
void Initialize() override;
void Shutdown() override;
bool IsPadWindowActive() override;
bool GetVRAMInfo(int& usageInMB, int& totalInMB) const override;
void ClearColorbuffer(bool padView) override;
void DrawEmptyFrame(bool mainWindow) override;
void SwapBuffers(bool swapTV, bool swapDRC) override;
void HandleScreenshotRequest(LatteTextureView* texView, bool padView) override;
void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter,
sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight,
bool padView, bool clearBackground) override;
bool BeginFrame(bool mainWindow) override;
// flush control
void Flush(bool waitIdle = false) override; // called when explicit flush is required (e.g. by imgui)
void NotifyLatteCommandProcessorIdle() override; // called when command processor has no more commands available or when stalled
// imgui
bool ImguiBegin(bool mainWindow) override;
void ImguiEnd() override;
ImTextureID GenerateTexture(const std::vector<uint8>& data, const Vector2i& size) override;
void DeleteTexture(ImTextureID id) override;
void DeleteFontTextures() override;
bool UseTFViaSSBO() const override { return true; }
void AppendOverlayDebugInfo() override;
// rendertarget
void renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ = false) override;
void renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) override;
LatteCachedFBO* rendertarget_createCachedFBO(uint64 key) override;
void rendertarget_deleteCachedFBO(LatteCachedFBO* fbo) override;
void rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) override;
// texture functions
void* texture_acquireTextureUploadBuffer(uint32 size) override;
void texture_releaseTextureUploadBuffer(uint8* mem) override;
TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override;
void texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) override;
void texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) override;
void texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) override;
void texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue) override;
LatteTexture* texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) override;
void texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) override;
void texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth) override;
LatteTextureReadbackInfo* texture_createReadback(LatteTextureView* textureView) override;
// surface copy
void surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) override;
// buffer cache
void bufferCache_init(const sint32 bufferSize) override;
void bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) override;
void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
void bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) override;
void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override;
// shader
RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool compileAsync, bool isGfxPackSource) override;
// streamout
void streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize) override;
void streamout_begin() override;
void streamout_rendererFinishDrawcall() override;
// core drawing logic
void draw_beginSequence() override;
void draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) override;
void draw_endSequence() override;
void draw_updateVertexBuffersDirectAccess();
void draw_updateUniformBuffersDirectAccess(LatteDecompilerShader* shader, const uint32 uniformBufferRegOffset);
void draw_handleSpecialState5();
// index
IndexAllocation indexData_reserveIndexMemory(uint32 size) override;
void indexData_releaseIndexMemory(IndexAllocation& allocation) override;
void indexData_uploadIndexMemory(IndexAllocation& allocation) override;
// occlusion queries
LatteQueryObject* occlusionQuery_create() override;
void occlusionQuery_destroy(LatteQueryObject* queryObj) override;
void occlusionQuery_flush() override;
void occlusionQuery_updateState() override;
// Helpers
MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; }
void SetShouldMaximizeConcurrentCompilation(bool shouldMaximizeConcurrentCompilation)
{
if (m_supportsMetal3)
m_device->setShouldMaximizeConcurrentCompilation(shouldMaximizeConcurrentCompilation);
}
bool IsCommandBufferActive() const
{
return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited);
}
MTL::CommandBuffer* GetCurrentCommandBuffer() const
{
cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer);
return m_currentCommandBuffer.m_commandBuffer;
}
MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const
{
// The command buffer has been commited and has finished execution
if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0)
return nullptr;
return GetCurrentCommandBuffer()->retain();
}
void RequestSoonCommit()
{
m_commitTreshold = m_recordedDrawcalls + 8;
}
MTL::CommandEncoder* GetCommandEncoder()
{
return m_commandEncoder;
}
MetalEncoderType GetEncoderType()
{
return m_encoderType;
}
void ResetEncoderState()
{
m_state.m_encoderState = {};
// TODO: set viewport and scissor to render target dimensions if render commands
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
m_state.m_encoderState.m_buffers[i][j] = {nullptr};
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
m_state.m_encoderState.m_textures[i][j] = nullptr;
for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++)
m_state.m_encoderState.m_samplers[i][j] = nullptr;
}
}
MetalEncoderState& GetEncoderState()
{
return m_state.m_encoderState;
}
void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index);
void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index);
void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index);
MTL::CommandBuffer* GetCommandBuffer();
MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor);
MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false);
MTL::ComputeCommandEncoder* GetComputeCommandEncoder();
MTL::BlitCommandEncoder* GetBlitCommandEncoder();
void EndEncoding();
void CommitCommandBuffer();
void ProcessFinishedCommandBuffers();
bool AcquireDrawable(bool mainWindow);
//bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before);
// Getters
bool GetPositionInvariance() const
{
return m_positionInvariance;
}
bool IsAppleGPU() const
{
return m_isAppleGPU;
}
bool SupportsFramebufferFetch() const
{
return m_supportsFramebufferFetch;
}
bool HasUnifiedMemory() const
{
return m_hasUnifiedMemory;
}
bool SupportsMetal3() const
{
return m_supportsMetal3;
}
bool SupportsMeshShaders() const
{
return m_supportsMeshShaders;
}
//MTL::StorageMode GetOptimalTextureStorageMode() const
//{
// return (m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModePrivate);
//}
MTL::ResourceOptions GetOptimalBufferStorageMode() const
{
return (m_hasUnifiedMemory ? MTL::ResourceStorageModeShared : MTL::ResourceStorageModeManaged);
}
MTL::Texture* GetNullTexture2D() const
{
return m_nullTexture2D;
}
MTL::Buffer* GetTextureReadbackBuffer()
{
if (!m_readbackBuffer)
{
m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
m_readbackBuffer->setLabel(GetLabel("Texture readback buffer", m_readbackBuffer));
#endif
}
return m_readbackBuffer;
}
MTL::Buffer* GetXfbRingBuffer()
{
if (!m_xfbRingBuffer)
{
// HACK: using just LatteStreamout_GetRingBufferSize will cause page faults
m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize() * 4, MTL::ResourceStorageModePrivate);
#ifdef CEMU_DEBUG_ASSERT
m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer));
#endif
}
return m_xfbRingBuffer;
}
MTL::Buffer* GetOcclusionQueryResultBuffer() const
{
return m_occlusionQuery.m_resultBuffer;
}
uint64* GetOcclusionQueryResultsPtr()
{
return m_occlusionQuery.m_resultsPtr;
}
uint32 GetOcclusionQueryIndex()
{
return m_occlusionQuery.m_currentIndex;
}
void BeginOcclusionQuery()
{
m_occlusionQuery.m_active = true;
}
void EndOcclusionQuery()
{
m_occlusionQuery.m_active = false;
// Release the old command buffer
if (m_occlusionQuery.m_lastCommandBuffer)
m_occlusionQuery.m_lastCommandBuffer->release();
// Get and retain the current command buffer
m_occlusionQuery.m_lastCommandBuffer = GetAndRetainCurrentCommandBufferIfNotCompleted();
}
// GPU capture
void CaptureFrame()
{
m_captureFrame = true;
}
private:
MetalLayerHandle m_mainLayer;
MetalLayerHandle m_padLayer;
MetalPerformanceMonitor m_performanceMonitor;
// Options
bool m_positionInvariance;
// Metal objects
MTL::Device* m_device = nullptr;
MTL::CommandQueue* m_commandQueue;
// Feature support
bool m_isAppleGPU;
bool m_supportsFramebufferFetch;
bool m_hasUnifiedMemory;
bool m_supportsMetal3;
bool m_supportsMeshShaders;
uint32 m_recommendedMaxVRAMUsage;
MetalPixelFormatSupport m_pixelFormatSupport;
// Managers and caches
class MetalMemoryManager* m_memoryManager;
class MetalOutputShaderCache* m_outputShaderCache;
class MetalPipelineCache* m_pipelineCache;
class MetalDepthStencilCache* m_depthStencilCache;
class MetalSamplerCache* m_samplerCache;
// Pipelines
MTL::RenderPipelineDescriptor* m_copyDepthToColorDesc;
std::map<MTL::PixelFormat, MTL::RenderPipelineState*> m_copyDepthToColorPipelines;
// Void vertex pipelines
class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline;
// Synchronization resources
MTL::Event* m_event;
int32_t m_eventValue = -1;
// Resources
MTL::SamplerState* m_nearestSampler;
MTL::SamplerState* m_linearSampler;
// Null resources
MTL::Texture* m_nullTexture1D;
MTL::Texture* m_nullTexture2D;
// Texture readback
MTL::Buffer* m_readbackBuffer = nullptr;
uint32 m_readbackBufferWriteOffset = 0;
// Transform feedback
MTL::Buffer* m_xfbRingBuffer = nullptr;
// Occlusion queries
struct
{
MTL::Buffer* m_resultBuffer;
uint64* m_resultsPtr;
uint32 m_currentIndex = 0;
bool m_active = false;
MTL::CommandBuffer* m_lastCommandBuffer = nullptr;
} m_occlusionQuery;
// Autorelease pool
NS::AutoreleasePool* m_autoreleasePool;
// Active objects
MetalCommandBuffer m_currentCommandBuffer{};
std::vector<MTL::CommandBuffer*> m_executingCommandBuffers;
MetalEncoderType m_encoderType = MetalEncoderType::None;
MTL::CommandEncoder* m_commandEncoder = nullptr;
uint32 m_recordedDrawcalls;
uint32 m_defaultCommitTreshlod;
uint32 m_commitTreshold;
// State
MetalState m_state;
// GPU capture
bool m_captureFrame = false;
bool m_capturing = false;
// Helpers
MetalLayerHandle& GetLayer(bool mainWindow)
{
return (mainWindow ? m_mainLayer : m_padLayer);
}
void SwapBuffer(bool mainWindow);
void EnsureImGuiBackend();
// GPU capture
void StartCapture();
void EndCapture();
};

View file

@ -0,0 +1,190 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
MTL::SamplerBorderColor GetBorderColor(LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords, bool logWorkaround = false)
{
auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE();
MTL::SamplerBorderColor borderColor;
if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK)
borderColor = MTL::SamplerBorderColorTransparentBlack;
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK)
borderColor = MTL::SamplerBorderColorOpaqueBlack;
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE)
borderColor = MTL::SamplerBorderColorOpaqueWhite;
else [[unlikely]]
{
_LatteRegisterSetSamplerBorderColor* borderColorReg;
if (shaderType == LatteConst::ShaderType::Vertex)
borderColorReg = LatteGPUState.contextNew.TD_VS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
else if (shaderType == LatteConst::ShaderType::Pixel)
borderColorReg = LatteGPUState.contextNew.TD_PS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
else // geometry
borderColorReg = LatteGPUState.contextNew.TD_GS_SAMPLER_BORDER_COLOR + stageSamplerIndex;
float r = borderColorReg->red.get_channelValue();
float g = borderColorReg->green.get_channelValue();
float b = borderColorReg->blue.get_channelValue();
float a = borderColorReg->alpha.get_channelValue();
// Metal doesn't support custom border color
// Let's find the best match
bool opaque = (a == 1.0f);
bool white = (r == 1.0f);
if (opaque)
{
if (white)
borderColor = MTL::SamplerBorderColorOpaqueWhite;
else
borderColor = MTL::SamplerBorderColorOpaqueBlack;
}
else
{
borderColor = MTL::SamplerBorderColorTransparentBlack;
}
if (logWorkaround)
{
float newR, newG, newB, newA;
switch (borderColor)
{
case MTL::SamplerBorderColorTransparentBlack:
newR = 0.0f;
newG = 0.0f;
newB = 0.0f;
newA = 0.0f;
break;
case MTL::SamplerBorderColorOpaqueBlack:
newR = 0.0f;
newG = 0.0f;
newB = 0.0f;
newA = 1.0f;
break;
case MTL::SamplerBorderColorOpaqueWhite:
newR = 1.0f;
newG = 1.0f;
newB = 1.0f;
newA = 1.0f;
break;
}
if (r != newR || g != newG || b != newB || a != newA)
cemuLog_log(LogType::Force, "Custom border color ({}, {}, {}, {}) is not supported on Metal, using ({}, {}, {}, {}) instead", r, g, b, a, newR, newG, newB, newA);
}
}
return borderColor;
}
MetalSamplerCache::~MetalSamplerCache()
{
for (auto& pair : m_samplerCache)
{
pair.second->release();
}
m_samplerCache.clear();
}
MTL::SamplerState* MetalSamplerCache::GetSamplerState(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords)
{
uint64 stateHash = CalculateSamplerHash(lcr, shaderType, stageSamplerIndex, samplerWords);
auto& samplerState = m_samplerCache[stateHash];
if (samplerState)
return samplerState;
// Sampler state
NS_STACK_SCOPED MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
// lod
uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD();
uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD();
//sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS();
auto filterMip = samplerWords->WORD0.get_MIP_FILTER();
if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp(0.0f);
samplerDescriptor->setLodMaxClamp(0.25f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else
{
// fallback for invalid constants
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER();
cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo
samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER();
samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterZ = samplerWords->WORD0.get_Z_FILTER();
// todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality?
auto clampX = samplerWords->WORD0.get_CLAMP_X();
auto clampY = samplerWords->WORD0.get_CLAMP_Y();
auto clampZ = samplerWords->WORD0.get_CLAMP_Z();
samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX));
samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY));
samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ));
auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO();
if (maxAniso > 0)
samplerDescriptor->setMaxAnisotropy(1 << maxAniso);
// TODO: set lod bias
//samplerInfo.mipLodBias = (float)iLodBias / 64.0f;
// depth compare
//uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0;
// TODO: is it okay to just cast?
samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION()));
// Border color
auto borderColor = GetBorderColor(shaderType, stageSamplerIndex, samplerWords, true);
samplerDescriptor->setBorderColor(borderColor);
samplerState = m_mtlr->GetDevice()->newSamplerState(samplerDescriptor);
return samplerState;
}
uint64 MetalSamplerCache::CalculateSamplerHash(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords)
{
uint64 hash = 0;
hash = std::rotl<uint64>(hash, 17);
hash += (uint64)samplerWords->WORD0.getRawValue();
hash = std::rotl<uint64>(hash, 17);
hash += (uint64)samplerWords->WORD1.getRawValue();
hash = std::rotl<uint64>(hash, 17);
hash += (uint64)samplerWords->WORD2.getRawValue();
auto borderColor = GetBorderColor(shaderType, stageSamplerIndex, samplerWords);
hash = std::rotl<uint64>(hash, 5);
hash += (uint64)borderColor;
// TODO: check this
return hash;
}

View file

@ -0,0 +1,22 @@
#pragma once
#include <Metal/Metal.hpp>
#include "HW/Latte/Core/LatteConst.h"
#include "HW/Latte/ISA/LatteReg.h"
class MetalSamplerCache
{
public:
MetalSamplerCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalSamplerCache();
MTL::SamplerState* GetSamplerState(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords);
private:
class MetalRenderer* m_mtlr;
std::map<uint64, MTL::SamplerState*> m_samplerCache;
uint64 CalculateSamplerHash(const LatteContextRegister& lcr, LatteConst::ShaderType shaderType, uint32 stageSamplerIndex, const _LatteRegisterSetSampler* samplerWords);
};

View file

@ -0,0 +1,23 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
MetalVoidVertexPipeline::MetalVoidVertexPipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const std::string& vertexFunctionName)
{
// Render pipeline state
NS_STACK_SCOPED MTL::Function* vertexFunction = library->newFunction(ToNSString(vertexFunctionName));
NS_STACK_SCOPED MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
renderPipelineDescriptor->setVertexFunction(vertexFunction);
renderPipelineDescriptor->setRasterizationEnabled(false);
NS::Error* error = nullptr;
m_renderPipelineState = mtlRenderer->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
if (error)
{
cemuLog_log(LogType::Force, "error creating hybrid render pipeline state: {}", error->localizedDescription()->utf8String());
}
}
MetalVoidVertexPipeline::~MetalVoidVertexPipeline()
{
m_renderPipelineState->release();
}

View file

@ -0,0 +1,16 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLLibrary.hpp"
#include "Metal/MTLRenderPipeline.hpp"
class MetalVoidVertexPipeline
{
public:
MetalVoidVertexPipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const std::string& vertexFunctionName);
~MetalVoidVertexPipeline();
MTL::RenderPipelineState* GetRenderPipelineState() const { return m_renderPipelineState; }
private:
MTL::RenderPipelineState* m_renderPipelineState;
};

View file

@ -0,0 +1,407 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
//#include "Cemu/FileCache/FileCache.h"
//#include "config/ActiveSettings.h"
#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h"
#include "GameProfile/GameProfile.h"
#include "util/helpers/helpers.h"
#define METAL_AIR_CACHE_NAME "Cemu_AIR_cache"
#define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME
#define METAL_AIR_CACHE_SIZE (16 * 1024 * 1024)
#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512)
static bool s_isLoadingShadersMtl{false};
//static bool s_hasRAMFilesystem{false};
//class FileCache* s_airCache{nullptr};
extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;
class ShaderMtlThreadPool
{
public:
void StartThreads()
{
if (m_threadsActive.exchange(true))
return;
// Create thread pool
const uint32 threadCount = 2;
for (uint32 i = 0; i < threadCount; ++i)
s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this);
// Create AIR cache thread
/*
s_airCacheThread = new std::thread(&ShaderMtlThreadPool::AIRCacheThreadFunc, this);
// Set priority
sched_param schedParam;
schedParam.sched_priority = 20;
if (pthread_setschedparam(s_airCacheThread->native_handle(), SCHED_FIFO, &schedParam) != 0) {
cemuLog_log(LogType::Force, "failed to set FIFO thread priority");
}
if (pthread_setschedparam(s_airCacheThread->native_handle(), SCHED_RR, &schedParam) != 0) {
cemuLog_log(LogType::Force, "failed to set RR thread priority");
}
*/
}
void StopThreads()
{
if (!m_threadsActive.exchange(false))
return;
for (uint32 i = 0; i < s_threads.size(); ++i)
s_compilationQueueCount.increment();
for (auto& it : s_threads)
it.join();
s_threads.clear();
/*
if (s_airCacheThread)
{
s_airCacheQueueCount.increment();
s_airCacheThread->join();
delete s_airCacheThread;
}
*/
}
~ShaderMtlThreadPool()
{
StopThreads();
}
void CompilerThreadFunc()
{
SetThreadName("mtlShaderComp");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_compilationQueueCount.decrementWithWait();
s_compilationQueueMutex.lock();
if (s_compilationQueue.empty())
{
// queue empty again, shaders compiled synchronously via PreponeCompilation()
s_compilationQueueMutex.unlock();
continue;
}
RendererShaderMtl* job = s_compilationQueue.front();
s_compilationQueue.pop_front();
// set compilation state
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING);
s_compilationQueueMutex.unlock();
// compile
job->CompileInternal();
if (job->ShouldCountCompilation())
++g_compiled_shaders_async;
// mark as compiled
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE);
}
}
/*
void AIRCacheThreadFunc()
{
SetThreadName("mtlAIRCache");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_airCacheQueueCount.decrementWithWait();
s_airCacheQueueMutex.lock();
if (s_airCacheQueue.empty())
{
s_airCacheQueueMutex.unlock();
continue;
}
// Create RAM filesystem
if (!s_hasRAMFilesystem)
{
executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT);
s_hasRAMFilesystem = true;
}
RendererShaderMtl* job = s_airCacheQueue.front();
s_airCacheQueue.pop_front();
s_airCacheQueueMutex.unlock();
// compile
job->CompileToAIR();
}
}
*/
bool HasThreadsRunning() const { return m_threadsActive; }
public:
std::vector<std::thread> s_threads;
//std::thread* s_airCacheThread{nullptr};
std::deque<RendererShaderMtl*> s_compilationQueue;
CounterSemaphore s_compilationQueueCount;
std::mutex s_compilationQueueMutex;
/*
std::deque<RendererShaderMtl*> s_airCacheQueue;
CounterSemaphore s_airCacheQueueCount;
std::mutex s_airCacheQueueMutex;
*/
private:
std::atomic<bool> m_threadsActive;
} shaderMtlThreadPool;
// TODO: find out if it would be possible to cache compiled Metal shaders
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
{
s_isLoadingShadersMtl = true;
// Open AIR cache
/*
if (s_airCache)
{
delete s_airCache;
s_airCache = nullptr;
}
uint32 airCacheMagic = GeneratePrecompiledCacheId();
const std::string cacheFilename = fmt::format("{:016x}_air.bin", cacheTitleId);
const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename);
s_airCache = FileCache::Open(cachePath, true, airCacheMagic);
if (!s_airCache)
cemuLog_log(LogType::Force, "Unable to open AIR cache {}", cacheFilename);
*/
// Maximize shader compilation speed
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true);
}
void RendererShaderMtl::ShaderCacheLoading_end()
{
s_isLoadingShadersMtl = false;
// Reset shader compilation speed
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false);
}
void RendererShaderMtl::ShaderCacheLoading_Close()
{
// Close the AIR cache
/*
if (s_airCache)
{
delete s_airCache;
s_airCache = nullptr;
}
// Close RAM filesystem
if (s_hasRAMFilesystem)
executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH);
*/
}
void RendererShaderMtl::Initialize()
{
shaderMtlThreadPool.StartThreads();
}
void RendererShaderMtl::Shutdown()
{
shaderMtlThreadPool.StopThreads();
}
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode}
{
// start async compilation
shaderMtlThreadPool.s_compilationQueueMutex.lock();
m_compilationState.setValue(COMPILATION_STATE::QUEUED);
shaderMtlThreadPool.s_compilationQueue.push_back(this);
shaderMtlThreadPool.s_compilationQueueCount.increment();
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called
}
RendererShaderMtl::~RendererShaderMtl()
{
if (m_function)
m_function->release();
}
void RendererShaderMtl::PreponeCompilation(bool isRenderThread)
{
shaderMtlThreadPool.s_compilationQueueMutex.lock();
bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED);
if (isStillQueued)
{
// remove from queue
shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end());
m_compilationState.setValue(COMPILATION_STATE::COMPILING);
}
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
if (!isStillQueued)
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
if (ShouldCountCompilation())
--g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async
return;
}
else
{
// compile synchronously
CompileInternal();
m_compilationState.setValue(COMPILATION_STATE::DONE);
}
}
bool RendererShaderMtl::IsCompiled()
{
return m_compilationState.hasState(COMPILATION_STATE::DONE);
};
bool RendererShaderMtl::WaitForCompiled()
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
return true;
}
bool RendererShaderMtl::ShouldCountCompilation() const
{
return !s_isLoadingShadersMtl && m_isGameShader;
}
MTL::Library* RendererShaderMtl::LibraryFromSource()
{
// Compile from source
NS_STACK_SCOPED MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init();
if (g_current_game_profile->GetFastMath())
options->setFastMathEnabled(true);
if (m_mtlr->GetPositionInvariance())
{
// TODO: filter out based on GPU state
options->setPreserveInvariance(true);
}
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str());
return nullptr;
}
return library;
}
/*
MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span<uint8> data)
{
dispatch_data_t dispatchData = dispatch_data_create(data.data(), data.size(), nullptr, DISPATCH_DATA_DESTRUCTOR_DEFAULT);
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String());
return nullptr;
}
return library;
}
*/
void RendererShaderMtl::CompileInternal()
{
MTL::Library* library = nullptr;
// First, try to retrieve the compiled shader from the AIR cache
/*
if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache)
{
cemu_assert_debug(m_baseHash != 0);
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
std::vector<uint8> cacheFileData;
if (s_airCache->GetFile({ h1, h2 }, cacheFileData))
{
library = LibraryFromAIR(std::span<uint8>(cacheFileData.data(), cacheFileData.size()));
FinishCompilation();
}
}
*/
// Not in the cache, compile from source
if (!library)
{
// Compile from source
library = LibraryFromSource();
FinishCompilation();
if (!library)
return;
// Store in the AIR cache
/*
shaderMtlThreadPool.s_airCacheQueueMutex.lock();
shaderMtlThreadPool.s_airCacheQueue.push_back(this);
shaderMtlThreadPool.s_airCacheQueueCount.increment();
shaderMtlThreadPool.s_airCacheQueueMutex.unlock();
*/
}
m_function = library->newFunction(ToNSString("main0"));
library->release();
// Count shader compilation
if (ShouldCountCompilation())
g_compiled_shaders_total++;
}
/*
void RendererShaderMtl::CompileToAIR()
{
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
// The shader is not in the cache, compile it
std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2);
// Source
std::ofstream mslFile;
mslFile.open(fmt::format("{}.metal", baseFilename));
mslFile << m_mslCode;
mslFile.close();
// Compile
if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename))
return;
if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename))
return;
// Clean up
executeCommand("rm {}.metal", baseFilename);
executeCommand("rm {}.ir", baseFilename);
// Load from the newly generated AIR
MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename));
std::span<uint8> airData = std::span<uint8>(airFile.data(), airFile.size());
//library = LibraryFromAIR(std::span<uint8>(airData.data(), airData.size()));
// Store in the cache
s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size());
// Clean up
executeCommand("rm {}.metallib", baseFilename);
FinishCompilation();
}
*/
void RendererShaderMtl::FinishCompilation()
{
m_mslCode.clear();
m_mslCode.shrink_to_fit();
}

View file

@ -0,0 +1,79 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/RendererShader.h"
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "util/helpers/ConcurrentQueue.h"
#include "util/helpers/Semaphore.h"
#include <Metal/Metal.hpp>
class RendererShaderMtl : public RendererShader
{
friend class ShaderMtlThreadPool;
enum class COMPILATION_STATE : uint32
{
NONE,
QUEUED,
COMPILING,
DONE
};
public:
static void ShaderCacheLoading_begin(uint64 cacheTitleId);
static void ShaderCacheLoading_end();
static void ShaderCacheLoading_Close();
static void Initialize();
static void Shutdown();
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();
MTL::Function* GetFunction() const
{
return m_function;
}
sint32 GetUniformLocation(const char* name) override
{
cemu_assert_suspicious();
return 0;
}
void SetUniform2fv(sint32 location, void* data, sint32 count) override
{
cemu_assert_suspicious();
}
void SetUniform4iv(sint32 location, void* data, sint32 count) override
{
cemu_assert_suspicious();
}
void PreponeCompilation(bool isRenderThread) override;
bool IsCompiled() override;
bool WaitForCompiled() override;
private:
class MetalRenderer* m_mtlr;
MTL::Function* m_function = nullptr;
StateSemaphore<COMPILATION_STATE> m_compilationState{ COMPILATION_STATE::NONE };
std::string m_mslCode;
bool ShouldCountCompilation() const;
MTL::Library* LibraryFromSource();
//MTL::Library* LibraryFromAIR(std::span<uint8> data);
void CompileInternal();
//void CompileToAIR();
void FinishCompilation();
};

View file

@ -0,0 +1,51 @@
#pragma once
#define __STRINGIFY(x) #x
#define _STRINGIFY(x) __STRINGIFY(x)
constexpr const char* utilityShaderSource = R"(#include <metal_stdlib>
using namespace metal;
#define GET_BUFFER_BINDING(index) (28 + index)
#define GET_TEXTURE_BINDING(index) (29 + index)
#define GET_SAMPLER_BINDING(index) (14 + index)
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
struct VertexOut {
float4 position [[position]];
float2 texCoord;
};
vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {
VertexOut out;
out.position = float4(positions[vid], 0.0, 1.0);
out.texCoord = positions[vid] * 0.5 + 0.5;
out.texCoord.y = 1.0 - out.texCoord.y;
return out;
}
//fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], //sampler samplr [[sampler(0)]]) {
// return tex.sample(samplr, in.texCoord);
//}
vertex void vertexCopyBufferToBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]]) {
dst[vid] = src[vid];
}
fragment float4 fragmentCopyDepthToColor(VertexOut in [[stage_in]], texture2d<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]]) {
return float4(src.read(uint2(in.position.xy)).r, 0.0, 0.0, 0.0);
}
//struct RestrideParams {
// uint oldStride;
// uint newStride;
//};
//vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer//(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant //RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) {
// for (uint32_t i = 0; i < params.oldStride; i++) {
// dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
// }
//}
)";

View file

@ -0,0 +1,7 @@
#pragma once
#import <Cocoa/Cocoa.h>
#import <QuartzCore/CAMetalLayer.h>
@interface MetalView : NSView
@end

View file

@ -0,0 +1,26 @@
#include "Cafe/HW/Latte/Renderer/MetalView.h"
@implementation MetalView
-(BOOL) wantsUpdateLayer { return YES; }
+(Class) layerClass { return [CAMetalLayer class]; }
// copied from https://github.com/KhronosGroup/MoltenVK/blob/master/Demos/Cube/macOS/DemoViewController.m
-(CALayer*) makeBackingLayer
{
CALayer* layer = [self.class.layerClass layer];
CGSize viewScale = [self convertSizeToBacking: CGSizeMake(1.0, 1.0)];
layer.contentsScale = MIN(viewScale.width, viewScale.height);
return layer;
}
-(BOOL) layer: (CALayer *)layer shouldInheritContentsScale: (CGFloat)newScale fromWindow: (NSWindow *)window
{
if (newScale == layer.contentsScale) { return NO; }
layer.contentsScale = newScale;
return YES;
}
@end

View file

@ -248,11 +248,11 @@ private:
// occlusion queries // occlusion queries
std::vector<class LatteQueryObjectGL*> list_queryCacheOcclusion; // cache for unused queries std::vector<class LatteQueryObjectGL*> list_queryCacheOcclusion; // cache for unused queries
// resource garbage collection // resource garbage collection
struct BufferCacheReleaseQueueEntry struct BufferCacheReleaseQueueEntry
{ {
BufferCacheReleaseQueueEntry(VirtualBufferHeap_t* heap, VirtualBufferHeapEntry_t* entry) : m_heap(heap), m_entry(entry) {}; BufferCacheReleaseQueueEntry(VirtualBufferHeap_t* heap, VirtualBufferHeapEntry_t* entry) : m_heap(heap), m_entry(entry) {};
void free() void free()
{ {
virtualBufferHeap_free(m_heap, m_entry); virtualBufferHeap_free(m_heap, m_entry);

View file

@ -33,6 +33,7 @@ enum class RendererAPI
{ {
OpenGL, OpenGL,
Vulkan, Vulkan,
Metal,
MAX MAX
}; };
@ -66,9 +67,9 @@ public:
virtual void SwapBuffers(bool swapTV, bool swapDRC) = 0; virtual void SwapBuffers(bool swapTV, bool swapDRC) = 0;
virtual void HandleScreenshotRequest(LatteTextureView* texView, bool padView){} virtual void HandleScreenshotRequest(LatteTextureView* texView, bool padView){}
virtual void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter, virtual void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter,
sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight, sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight,
bool padView, bool clearBackground) = 0; bool padView, bool clearBackground) = 0;
virtual bool BeginFrame(bool mainWindow) = 0; virtual bool BeginFrame(bool mainWindow) = 0;
@ -84,6 +85,7 @@ public:
virtual void DeleteFontTextures() = 0; virtual void DeleteFontTextures() = 0;
GfxVendor GetVendor() const { return m_vendor; } GfxVendor GetVendor() const { return m_vendor; }
virtual bool UseTFViaSSBO() const { return false; }
virtual void AppendOverlayDebugInfo() = 0; virtual void AppendOverlayDebugInfo() = 0;
// rendertarget // rendertarget

View file

@ -1,5 +1,6 @@
#include "Cafe/HW/Latte/Renderer/RendererOuputShader.h" #include "Cafe/HW/Latte/Renderer/RendererOuputShader.h"
#include "Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h" #include "Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h"
#include "HW/Latte/Renderer/Renderer.h"
const std::string RendererOutputShader::s_copy_shader_source = const std::string RendererOutputShader::s_copy_shader_source =
R"( R"(
@ -9,6 +10,19 @@ void main()
} }
)"; )";
const std::string RendererOutputShader::s_copy_shader_source_mtl =
R"(#include <metal_stdlib>
using namespace metal;
struct VertexOut {
float2 uv;
};
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]]) {
return float4(textureSrc.sample(samplr, in.uv).rgb, 1.0);
}
)";
const std::string RendererOutputShader::s_bicubic_shader_source = const std::string RendererOutputShader::s_bicubic_shader_source =
R"( R"(
vec4 cubic(float x) vec4 cubic(float x)
@ -55,6 +69,57 @@ void main(){
} }
)"; )";
const std::string RendererOutputShader::s_bicubic_shader_source_mtl =
R"(#include <metal_stdlib>
using namespace metal;
float4 cubic(float x) {
float x2 = x * x;
float x3 = x2 * x;
float4 w;
w.x = -x3 + 3 * x2 - 3 * x + 1;
w.y = 3 * x3 - 6 * x2 + 4;
w.z = -3 * x3 + 3 * x2 + 3 * x + 1;
w.w = x3;
return w / 6.0;
}
float4 bcFilter(texture2d<float> textureSrc, sampler samplr, float2 texcoord, float2 texscale) {
float fx = fract(texcoord.x);
float fy = fract(texcoord.y);
texcoord.x -= fx;
texcoord.y -= fy;
float4 xcubic = cubic(fx);
float4 ycubic = cubic(fy);
float4 c = float4(texcoord.x - 0.5, texcoord.x + 1.5, texcoord.y - 0.5, texcoord.y + 1.5);
float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w);
float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s;
float4 sample0 = textureSrc.sample(samplr, float2(offset.x, offset.z) * texscale);
float4 sample1 = textureSrc.sample(samplr, float2(offset.y, offset.z) * texscale);
float4 sample2 = textureSrc.sample(samplr, float2(offset.x, offset.w) * texscale);
float4 sample3 = textureSrc.sample(samplr, float2(offset.y, offset.w) * texscale);
float sx = s.x / (s.x + s.y);
float sy = s.z / (s.z + s.w);
return mix(
mix(sample3, sample2, sx),
mix(sample1, sample0, sx), sy);
}
struct VertexOut {
float2 uv;
};
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]]) {
float2 textureSrcResolution = float2(textureSrc.get_width(), textureSrc.get_height());
return float4(bcFilter(textureSrc, samplr, in.uv * textureSrcResolution, float2(1.0, 1.0) / textureSrcResolution).rgb, 1.0);
}
)";
const std::string RendererOutputShader::s_hermite_shader_source = const std::string RendererOutputShader::s_hermite_shader_source =
R"( R"(
// https://www.shadertoy.com/view/MllSzX // https://www.shadertoy.com/view/MllSzX
@ -67,7 +132,7 @@ vec3 CubicHermite (vec3 A, vec3 B, vec3 C, vec3 D, float t)
vec3 b = A - (5.0*B)/2.0 + 2.0*C - D / 2.0; vec3 b = A - (5.0*B)/2.0 + 2.0*C - D / 2.0;
vec3 c = -A/2.0 + C/2.0; vec3 c = -A/2.0 + C/2.0;
vec3 d = B; vec3 d = B;
return a*t3 + b*t2 + c*t + d; return a*t3 + b*t2 + c*t + d;
} }
@ -75,36 +140,36 @@ vec3 CubicHermite (vec3 A, vec3 B, vec3 C, vec3 D, float t)
vec3 BicubicHermiteTexture(vec2 uv, vec4 texelSize) vec3 BicubicHermiteTexture(vec2 uv, vec4 texelSize)
{ {
vec2 pixel = uv*texelSize.zw + 0.5; vec2 pixel = uv*texelSize.zw + 0.5;
vec2 frac = fract(pixel); vec2 frac = fract(pixel);
pixel = floor(pixel) / texelSize.zw - vec2(texelSize.xy/2.0); pixel = floor(pixel) / texelSize.zw - vec2(texelSize.xy/2.0);
vec4 doubleSize = texelSize*2.0; vec4 doubleSize = texelSize*2.0;
vec3 C00 = texture(textureSrc, pixel + vec2(-texelSize.x ,-texelSize.y)).rgb; vec3 C00 = texture(textureSrc, pixel + vec2(-texelSize.x ,-texelSize.y)).rgb;
vec3 C10 = texture(textureSrc, pixel + vec2( 0.0 ,-texelSize.y)).rgb; vec3 C10 = texture(textureSrc, pixel + vec2( 0.0 ,-texelSize.y)).rgb;
vec3 C20 = texture(textureSrc, pixel + vec2( texelSize.x ,-texelSize.y)).rgb; vec3 C20 = texture(textureSrc, pixel + vec2( texelSize.x ,-texelSize.y)).rgb;
vec3 C30 = texture(textureSrc, pixel + vec2( doubleSize.x,-texelSize.y)).rgb; vec3 C30 = texture(textureSrc, pixel + vec2( doubleSize.x,-texelSize.y)).rgb;
vec3 C01 = texture(textureSrc, pixel + vec2(-texelSize.x , 0.0)).rgb; vec3 C01 = texture(textureSrc, pixel + vec2(-texelSize.x , 0.0)).rgb;
vec3 C11 = texture(textureSrc, pixel + vec2( 0.0 , 0.0)).rgb; vec3 C11 = texture(textureSrc, pixel + vec2( 0.0 , 0.0)).rgb;
vec3 C21 = texture(textureSrc, pixel + vec2( texelSize.x , 0.0)).rgb; vec3 C21 = texture(textureSrc, pixel + vec2( texelSize.x , 0.0)).rgb;
vec3 C31 = texture(textureSrc, pixel + vec2( doubleSize.x, 0.0)).rgb; vec3 C31 = texture(textureSrc, pixel + vec2( doubleSize.x, 0.0)).rgb;
vec3 C02 = texture(textureSrc, pixel + vec2(-texelSize.x , texelSize.y)).rgb; vec3 C02 = texture(textureSrc, pixel + vec2(-texelSize.x , texelSize.y)).rgb;
vec3 C12 = texture(textureSrc, pixel + vec2( 0.0 , texelSize.y)).rgb; vec3 C12 = texture(textureSrc, pixel + vec2( 0.0 , texelSize.y)).rgb;
vec3 C22 = texture(textureSrc, pixel + vec2( texelSize.x , texelSize.y)).rgb; vec3 C22 = texture(textureSrc, pixel + vec2( texelSize.x , texelSize.y)).rgb;
vec3 C32 = texture(textureSrc, pixel + vec2( doubleSize.x, texelSize.y)).rgb; vec3 C32 = texture(textureSrc, pixel + vec2( doubleSize.x, texelSize.y)).rgb;
vec3 C03 = texture(textureSrc, pixel + vec2(-texelSize.x , doubleSize.y)).rgb; vec3 C03 = texture(textureSrc, pixel + vec2(-texelSize.x , doubleSize.y)).rgb;
vec3 C13 = texture(textureSrc, pixel + vec2( 0.0 , doubleSize.y)).rgb; vec3 C13 = texture(textureSrc, pixel + vec2( 0.0 , doubleSize.y)).rgb;
vec3 C23 = texture(textureSrc, pixel + vec2( texelSize.x , doubleSize.y)).rgb; vec3 C23 = texture(textureSrc, pixel + vec2( texelSize.x , doubleSize.y)).rgb;
vec3 C33 = texture(textureSrc, pixel + vec2( doubleSize.x, doubleSize.y)).rgb; vec3 C33 = texture(textureSrc, pixel + vec2( doubleSize.x, doubleSize.y)).rgb;
vec3 CP0X = CubicHermite(C00, C10, C20, C30, frac.x); vec3 CP0X = CubicHermite(C00, C10, C20, C30, frac.x);
vec3 CP1X = CubicHermite(C01, C11, C21, C31, frac.x); vec3 CP1X = CubicHermite(C01, C11, C21, C31, frac.x);
vec3 CP2X = CubicHermite(C02, C12, C22, C32, frac.x); vec3 CP2X = CubicHermite(C02, C12, C22, C32, frac.x);
vec3 CP3X = CubicHermite(C03, C13, C23, C33, frac.x); vec3 CP3X = CubicHermite(C03, C13, C23, C33, frac.x);
return CubicHermite(CP0X, CP1X, CP2X, CP3X, frac.y); return CubicHermite(CP0X, CP1X, CP2X, CP3X, frac.y);
} }
@ -114,9 +179,77 @@ void main(){
} }
)"; )";
const std::string RendererOutputShader::s_hermite_shader_source_mtl =
R"(#include <metal_stdlib>
using namespace metal;
// https://www.shadertoy.com/view/MllSzX
float3 CubicHermite(float3 A, float3 B, float3 C, float3 D, float t) {
float t2 = t*t;
float t3 = t*t*t;
float3 a = -A/2.0 + (3.0*B)/2.0 - (3.0*C)/2.0 + D/2.0;
float3 b = A - (5.0*B)/2.0 + 2.0*C - D / 2.0;
float3 c = -A/2.0 + C/2.0;
float3 d = B;
return a*t3 + b*t2 + c*t + d;
}
float3 BicubicHermiteTexture(texture2d<float> textureSrc, sampler samplr, float2 uv, float4 texelSize) {
float2 pixel = uv*texelSize.zw + 0.5;
float2 frac = fract(pixel);
pixel = floor(pixel) / texelSize.zw - float2(texelSize.xy/2.0);
float4 doubleSize = texelSize*texelSize;
float3 C00 = textureSrc.sample(samplr, pixel + float2(-texelSize.x ,-texelSize.y)).rgb;
float3 C10 = textureSrc.sample(samplr, pixel + float2( 0.0 ,-texelSize.y)).rgb;
float3 C20 = textureSrc.sample(samplr, pixel + float2( texelSize.x ,-texelSize.y)).rgb;
float3 C30 = textureSrc.sample(samplr, pixel + float2( doubleSize.x,-texelSize.y)).rgb;
float3 C01 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , 0.0)).rgb;
float3 C11 = textureSrc.sample(samplr, pixel + float2( 0.0 , 0.0)).rgb;
float3 C21 = textureSrc.sample(samplr, pixel + float2( texelSize.x , 0.0)).rgb;
float3 C31 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, 0.0)).rgb;
float3 C02 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , texelSize.y)).rgb;
float3 C12 = textureSrc.sample(samplr, pixel + float2( 0.0 , texelSize.y)).rgb;
float3 C22 = textureSrc.sample(samplr, pixel + float2( texelSize.x , texelSize.y)).rgb;
float3 C32 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, texelSize.y)).rgb;
float3 C03 = textureSrc.sample(samplr, pixel + float2(-texelSize.x , doubleSize.y)).rgb;
float3 C13 = textureSrc.sample(samplr, pixel + float2( 0.0 , doubleSize.y)).rgb;
float3 C23 = textureSrc.sample(samplr, pixel + float2( texelSize.x , doubleSize.y)).rgb;
float3 C33 = textureSrc.sample(samplr, pixel + float2( doubleSize.x, doubleSize.y)).rgb;
float3 CP0X = CubicHermite(C00, C10, C20, C30, frac.x);
float3 CP1X = CubicHermite(C01, C11, C21, C31, frac.x);
float3 CP2X = CubicHermite(C02, C12, C22, C32, frac.x);
float3 CP3X = CubicHermite(C03, C13, C23, C33, frac.x);
return CubicHermite(CP0X, CP1X, CP2X, CP3X, frac.y);
}
struct VertexOut {
float4 position [[position]];
float2 uv;
};
fragment float4 main0(VertexOut in [[stage_in]], texture2d<float> textureSrc [[texture(0)]], sampler samplr [[sampler(0)]], constant float2& outputResolution [[buffer(0)]]) {
float4 texelSize = float4(1.0 / outputResolution.xy, outputResolution.xy);
return float4(BicubicHermiteTexture(textureSrc, samplr, in.uv, texelSize), 1.0);
}
)";
RendererOutputShader::RendererOutputShader(const std::string& vertex_source, const std::string& fragment_source) RendererOutputShader::RendererOutputShader(const std::string& vertex_source, const std::string& fragment_source)
{ {
auto finalFragmentSrc = PrependFragmentPreamble(fragment_source); std::string finalFragmentSrc;
if (g_renderer->GetType() == RendererAPI::Metal)
finalFragmentSrc = fragment_source;
else
finalFragmentSrc = PrependFragmentPreamble(fragment_source);
m_vertex_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kVertex, 0, 0, vertex_source, false, false)); m_vertex_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kVertex, 0, 0, vertex_source, false, false));
m_fragment_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kFragment, 0, 0, finalFragmentSrc, false, false)); m_fragment_shader.reset(g_renderer->shader_create(RendererShader::ShaderType::kFragment, 0, 0, finalFragmentSrc, false, false));
@ -190,9 +323,9 @@ std::string RendererOutputShader::GetOpenGlVertexSource(bool render_upside_down)
R"(#version 420 R"(#version 420
layout(location = 0) smooth out vec2 passUV; layout(location = 0) smooth out vec2 passUV;
out gl_PerVertex out gl_PerVertex
{ {
vec4 gl_Position; vec4 gl_Position;
}; };
void main(){ void main(){
@ -226,7 +359,7 @@ void main(){
vertex_source << vertex_source <<
R"( passUV = vUV; R"( passUV = vUV;
gl_Position = vec4(vPos, 0.0, 1.0); gl_Position = vec4(vPos, 0.0, 1.0);
} }
)"; )";
return vertex_source.str(); return vertex_source.str();
@ -240,9 +373,9 @@ std::string RendererOutputShader::GetVulkanVertexSource(bool render_upside_down)
R"(#version 450 R"(#version 450
layout(location = 0) out vec2 passUV; layout(location = 0) out vec2 passUV;
out gl_PerVertex out gl_PerVertex
{ {
vec4 gl_Position; vec4 gl_Position;
}; };
void main(){ void main(){
@ -276,7 +409,45 @@ void main(){
vertex_source << vertex_source <<
R"( passUV = vUV; R"( passUV = vUV;
gl_Position = vec4(vPos, 0.0, 1.0); gl_Position = vec4(vPos, 0.0, 1.0);
}
)";
return vertex_source.str();
}
std::string RendererOutputShader::GetMetalVertexSource(bool render_upside_down)
{
// vertex shader
std::ostringstream vertex_source;
vertex_source <<
R"(#include <metal_stdlib>
using namespace metal;
struct VertexOut {
float4 position [[position]];
float2 uv;
};
vertex VertexOut main0(ushort vid [[vertex_id]]) {
VertexOut out;
float2 pos;
if (vid == 0) pos = float2(-1.0, -3.0);
else if (vid == 1) pos = float2(-1.0, 1.0);
else if (vid == 2) pos = float2(3.0, 1.0);
out.uv = pos * 0.5 + 0.5;
out.uv.y = 1.0 - out.uv.y;
)";
if (render_upside_down)
{
vertex_source <<
R"( pos.y = -pos.y;
)";
}
vertex_source <<
R"( out.position = float4(pos, 0.0, 1.0);
return out;
} }
)"; )";
return vertex_source.str(); return vertex_source.str();
@ -304,26 +475,43 @@ layout(location = 0) out vec4 colorOut0;
} }
void RendererOutputShader::InitializeStatic() void RendererOutputShader::InitializeStatic()
{ {
std::string vertex_source, vertex_source_ud; if (g_renderer->GetType() == RendererAPI::Metal)
// vertex shader {
if (g_renderer->GetType() == RendererAPI::OpenGL) std::string vertex_source = GetMetalVertexSource(false);
{ std::string vertex_source_ud = GetMetalVertexSource(true);
vertex_source = GetOpenGlVertexSource(false);
vertex_source_ud = GetOpenGlVertexSource(true);
}
else
{
vertex_source = GetVulkanVertexSource(false);
vertex_source_ud = GetVulkanVertexSource(true);
}
s_copy_shader = new RendererOutputShader(vertex_source, s_copy_shader_source);
s_copy_shader_ud = new RendererOutputShader(vertex_source_ud, s_copy_shader_source);
s_bicubic_shader = new RendererOutputShader(vertex_source, s_bicubic_shader_source); s_copy_shader = new RendererOutputShader(vertex_source, s_copy_shader_source_mtl);
s_bicubic_shader_ud = new RendererOutputShader(vertex_source_ud, s_bicubic_shader_source); s_copy_shader_ud = new RendererOutputShader(vertex_source_ud, s_copy_shader_source_mtl);
s_hermit_shader = new RendererOutputShader(vertex_source, s_hermite_shader_source); s_bicubic_shader = new RendererOutputShader(vertex_source, s_bicubic_shader_source_mtl);
s_hermit_shader_ud = new RendererOutputShader(vertex_source_ud, s_hermite_shader_source); s_bicubic_shader_ud = new RendererOutputShader(vertex_source_ud, s_bicubic_shader_source_mtl);
s_hermit_shader = new RendererOutputShader(vertex_source, s_hermite_shader_source_mtl);
s_hermit_shader_ud = new RendererOutputShader(vertex_source_ud, s_hermite_shader_source_mtl);
}
else
{
std::string vertex_source, vertex_source_ud;
// vertex shader
if (g_renderer->GetType() == RendererAPI::OpenGL)
{
vertex_source = GetOpenGlVertexSource(false);
vertex_source_ud = GetOpenGlVertexSource(true);
}
else if (g_renderer->GetType() == RendererAPI::Vulkan)
{
vertex_source = GetVulkanVertexSource(false);
vertex_source_ud = GetVulkanVertexSource(true);
}
s_copy_shader = new RendererOutputShader(vertex_source, s_copy_shader_source);
s_copy_shader_ud = new RendererOutputShader(vertex_source_ud, s_copy_shader_source);
s_bicubic_shader = new RendererOutputShader(vertex_source, s_bicubic_shader_source);
s_bicubic_shader_ud = new RendererOutputShader(vertex_source_ud, s_bicubic_shader_source);
s_hermit_shader = new RendererOutputShader(vertex_source, s_hermite_shader_source);
s_hermit_shader_ud = new RendererOutputShader(vertex_source_ud, s_hermite_shader_source);
}
} }
void RendererOutputShader::ShutdownStatic() void RendererOutputShader::ShutdownStatic()

View file

@ -41,8 +41,9 @@ public:
static RendererOutputShader* s_hermit_shader; static RendererOutputShader* s_hermit_shader;
static RendererOutputShader* s_hermit_shader_ud; static RendererOutputShader* s_hermit_shader_ud;
static std::string GetVulkanVertexSource(bool render_upside_down);
static std::string GetOpenGlVertexSource(bool render_upside_down); static std::string GetOpenGlVertexSource(bool render_upside_down);
static std::string GetVulkanVertexSource(bool render_upside_down);
static std::string GetMetalVertexSource(bool render_upside_down);
static std::string PrependFragmentPreamble(const std::string& shaderSrc); static std::string PrependFragmentPreamble(const std::string& shaderSrc);
@ -64,4 +65,8 @@ private:
static const std::string s_bicubic_shader_source_vk; static const std::string s_bicubic_shader_source_vk;
static const std::string s_hermite_shader_source_vk; static const std::string s_hermite_shader_source_vk;
static const std::string s_copy_shader_source_mtl;
static const std::string s_bicubic_shader_source_mtl;
static const std::string s_hermite_shader_source_mtl;
}; };

View file

@ -1,36 +1,7 @@
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h" #include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
#import <Cocoa/Cocoa.h> #include "Cafe/HW/Latte/Renderer/MetalView.h"
#import <QuartzCore/CAMetalLayer.h>
@interface MetalView : NSView
@end
@implementation MetalView
-(BOOL) wantsUpdateLayer { return YES; }
+(Class) layerClass { return [CAMetalLayer class]; }
// copied from https://github.com/KhronosGroup/MoltenVK/blob/master/Demos/Cube/macOS/DemoViewController.m
-(CALayer*) makeBackingLayer
{
CALayer* layer = [self.class.layerClass layer];
CGSize viewScale = [self convertSizeToBacking: CGSizeMake(1.0, 1.0)];
layer.contentsScale = MIN(viewScale.width, viewScale.height);
return layer;
}
-(BOOL) layer: (CALayer *)layer shouldInheritContentsScale: (CGFloat)newScale fromWindow: (NSWindow *)window
{
if (newScale == layer.contentsScale) { return NO; }
layer.contentsScale = newScale;
return YES;
}
@end
VkSurfaceKHR CreateCocoaSurface(VkInstance instance, void* handle) VkSurfaceKHR CreateCocoaSurface(VkInstance instance, void* handle)
{ {

View file

@ -66,7 +66,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(VkDebugUtilsMessageSeverityFla
if (strstr(pCallbackData->pMessage, "consumes input location")) if (strstr(pCallbackData->pMessage, "consumes input location"))
return VK_FALSE; // false means we dont care return VK_FALSE; // false means we dont care
if (strstr(pCallbackData->pMessage, "blend")) if (strstr(pCallbackData->pMessage, "blend"))
return VK_FALSE; // return VK_FALSE; //
// note: Check if previously used location in VK_EXT_debug_report callback is the same as messageIdNumber under the new extension // note: Check if previously used location in VK_EXT_debug_report callback is the same as messageIdNumber under the new extension
// validation errors which are difficult to fix // validation errors which are difficult to fix
@ -391,8 +391,8 @@ VulkanRenderer::VulkanRenderer()
auto surface = CreateFramebufferSurface(m_instance, gui_getWindowInfo().window_main); auto surface = CreateFramebufferSurface(m_instance, gui_getWindowInfo().window_main);
auto& config = GetConfig(); auto& config = GetConfig();
decltype(config.graphic_device_uuid) zero{}; decltype(config.vk_graphic_device_uuid) zero{};
const bool has_device_set = config.graphic_device_uuid != zero; const bool has_device_set = config.vk_graphic_device_uuid != zero;
VkPhysicalDevice fallbackDevice = VK_NULL_HANDLE; VkPhysicalDevice fallbackDevice = VK_NULL_HANDLE;
@ -412,7 +412,7 @@ VulkanRenderer::VulkanRenderer()
physDeviceProps.pNext = &physDeviceIDProps; physDeviceProps.pNext = &physDeviceIDProps;
vkGetPhysicalDeviceProperties2(device, &physDeviceProps); vkGetPhysicalDeviceProperties2(device, &physDeviceProps);
if (memcmp(config.graphic_device_uuid.data(), physDeviceIDProps.deviceUUID, VK_UUID_SIZE) != 0) if (memcmp(config.vk_graphic_device_uuid.data(), physDeviceIDProps.deviceUUID, VK_UUID_SIZE) != 0)
continue; continue;
} }
@ -425,7 +425,7 @@ VulkanRenderer::VulkanRenderer()
{ {
cemuLog_log(LogType::Force, "The selected GPU could not be found or is not suitable. Falling back to first available device instead"); cemuLog_log(LogType::Force, "The selected GPU could not be found or is not suitable. Falling back to first available device instead");
m_physicalDevice = fallbackDevice; m_physicalDevice = fallbackDevice;
config.graphic_device_uuid = {}; // resetting device selection config.vk_graphic_device_uuid = {}; // resetting device selection
} }
else if (m_physicalDevice == VK_NULL_HANDLE) else if (m_physicalDevice == VK_NULL_HANDLE)
{ {
@ -2369,7 +2369,7 @@ void VulkanRenderer::GetTextureFormatInfoVK(Latte::E_GX2SURFFMT format, bool isD
} }
else { else {
formatInfoOut->vkImageFormat = VK_FORMAT_R4G4B4A4_UNORM_PACK16; formatInfoOut->vkImageFormat = VK_FORMAT_R4G4B4A4_UNORM_PACK16;
formatInfoOut->decoder = TextureDecoder_R4_G4_UNORM_To_RGBA4_vk::getInstance(); formatInfoOut->decoder = TextureDecoder_R4_G4_UNORM_To_ABGR4::getInstance();
} }
} }
else else

View file

@ -73,11 +73,11 @@ public:
return true; return true;
} }
template<typename T> template<typename T>
struct direct_hash struct direct_hash
{ {
size_t operator()(const uint64& k) const noexcept size_t operator()(const uint64& k) const noexcept
{ {
return k; return k;
} }
@ -277,7 +277,6 @@ public:
// texture functions // texture functions
void* texture_acquireTextureUploadBuffer(uint32 size) override; void* texture_acquireTextureUploadBuffer(uint32 size) override;
void texture_releaseTextureUploadBuffer(uint8* mem) override; void texture_releaseTextureUploadBuffer(uint8* mem) override;
TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override; TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override;
@ -371,7 +370,7 @@ private:
VkRect2D currentScissorRect{}; VkRect2D currentScissorRect{};
// vertex bindings // vertex bindings
struct struct
{ {
uint32 offset; uint32 offset;
}currentVertexBinding[LATTE_MAX_VERTEX_BUFFERS]{}; }currentVertexBinding[LATTE_MAX_VERTEX_BUFFERS]{};
@ -465,12 +464,12 @@ private:
bool debug_utils = false; // VK_EXT_DEBUG_UTILS bool debug_utils = false; // VK_EXT_DEBUG_UTILS
}instanceExtensions; }instanceExtensions;
struct struct
{ {
bool useTFEmulationViaSSBO = true; // emulate transform feedback via shader writes to a storage buffer bool useTFEmulationViaSSBO = true; // emulate transform feedback via shader writes to a storage buffer
}mode; }mode;
struct struct
{ {
uint32 minUniformBufferOffsetAlignment = 256; uint32 minUniformBufferOffsetAlignment = 256;
uint32 nonCoherentAtomSize = 256; uint32 nonCoherentAtomSize = 256;
@ -500,7 +499,7 @@ private:
void CreateCommandBuffers(); void CreateCommandBuffers();
void swapchain_createDescriptorSetLayout(); void swapchain_createDescriptorSetLayout();
// shader // shader
bool IsAsyncPipelineAllowed(uint32 numIndices); bool IsAsyncPipelineAllowed(uint32 numIndices);
@ -515,6 +514,8 @@ private:
void DeleteFontTextures() override; void DeleteFontTextures() override;
bool BeginFrame(bool mainWindow) override; bool BeginFrame(bool mainWindow) override;
bool UseTFViaSSBO() const override { return m_featureControl.mode.useTFEmulationViaSSBO; }
// drawcall emulation // drawcall emulation
PipelineInfo* draw_createGraphicsPipeline(uint32 indexCount); PipelineInfo* draw_createGraphicsPipeline(uint32 indexCount);
PipelineInfo* draw_getOrCreateGraphicsPipeline(uint32 indexCount); PipelineInfo* draw_getOrCreateGraphicsPipeline(uint32 indexCount);
@ -577,7 +578,7 @@ private:
VkDevice m_logicalDevice = VK_NULL_HANDLE; VkDevice m_logicalDevice = VK_NULL_HANDLE;
VkDebugUtilsMessengerEXT m_debugCallback = nullptr; VkDebugUtilsMessengerEXT m_debugCallback = nullptr;
volatile bool m_destructionRequested = false; volatile bool m_destructionRequested = false;
QueueFamilyIndices m_indices{}; QueueFamilyIndices m_indices{};
Semaphore m_pipeline_cache_semaphore; Semaphore m_pipeline_cache_semaphore;
@ -588,7 +589,7 @@ private:
std::unordered_map<uint64, VkDescriptorSet> m_backbufferBlitDescriptorSetCache; std::unordered_map<uint64, VkDescriptorSet> m_backbufferBlitDescriptorSetCache;
VkPipelineLayout m_pipelineLayout{nullptr}; VkPipelineLayout m_pipelineLayout{nullptr};
VkCommandPool m_commandPool{ nullptr }; VkCommandPool m_commandPool{ nullptr };
// buffer to cache uniform vars // buffer to cache uniform vars
VkBuffer m_uniformVarBuffer = VK_NULL_HANDLE; VkBuffer m_uniformVarBuffer = VK_NULL_HANDLE;
VkDeviceMemory m_uniformVarBufferMemory = VK_NULL_HANDLE; VkDeviceMemory m_uniformVarBufferMemory = VK_NULL_HANDLE;
@ -660,19 +661,19 @@ private:
bool m_submitOnIdle{}; // submit current buffer if Latte command processor goes into idle state (no more commands or waiting for externally signaled condition) bool m_submitOnIdle{}; // submit current buffer if Latte command processor goes into idle state (no more commands or waiting for externally signaled condition)
// tracking for dynamic offsets // tracking for dynamic offsets
struct struct
{ {
uint32 uniformVarBufferOffset[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; uint32 uniformVarBufferOffset[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT];
struct struct
{ {
uint32 uniformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS]; uint32 uniformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS];
}shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT]; }shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT];
}dynamicOffsetInfo{}; }dynamicOffsetInfo{};
// streamout // streamout
struct struct
{ {
struct struct
{ {
bool enabled; bool enabled;
uint32 ringBufferOffset; uint32 ringBufferOffset;
@ -722,11 +723,11 @@ private:
accessFlags = 0; accessFlags = 0;
if constexpr ((TSyncOp & BUFFER_SHADER_READ) != 0) if constexpr ((TSyncOp & BUFFER_SHADER_READ) != 0)
{ {
// in theory: VK_ACCESS_INDEX_READ_BIT should be set here too but indices are currently separated // in theory: VK_ACCESS_INDEX_READ_BIT should be set here too but indices are currently separated
stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
accessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT; accessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
} }
if constexpr ((TSyncOp & BUFFER_SHADER_WRITE) != 0) if constexpr ((TSyncOp & BUFFER_SHADER_WRITE) != 0)
{ {
stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
@ -929,7 +930,6 @@ private:
public: public:
bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; } bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; }
bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; }
bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; } bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; }
bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; } bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; }
@ -939,7 +939,7 @@ private:
void debug_genericBarrier(); void debug_genericBarrier();
// shaders // shaders
struct struct
{ {
RendererShaderVk* copySurface_vs{}; RendererShaderVk* copySurface_vs{};
RendererShaderVk* copySurface_psDepth2Color{}; RendererShaderVk* copySurface_psDepth2Color{};

View file

@ -60,7 +60,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
uint64 stateHash; uint64 stateHash;
stateHash = draw_calculateMinimalGraphicsPipelineHash(fetchShader, lcr); stateHash = draw_calculateMinimalGraphicsPipelineHash(fetchShader, lcr);
stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull; stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull;
uint32* ctxRegister = lcr.GetRawView(); uint32* ctxRegister = lcr.GetRawView();
if (vertexShader) if (vertexShader)
@ -103,7 +103,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
} }
stateHash += renderPassObj->m_hashForPipeline; stateHash += renderPassObj->m_hashForPipeline;
uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL]; uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL];
bool stencilTestEnable = depthControl & 1; bool stencilTestEnable = depthControl & 1;
if (stencilTestEnable) if (stencilTestEnable)
@ -111,7 +111,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader
stateHash += ctxRegister[mmDB_STENCILREFMASK]; stateHash += ctxRegister[mmDB_STENCILREFMASK];
stateHash = std::rotl<uint64>(stateHash, 17); stateHash = std::rotl<uint64>(stateHash, 17);
if(depthControl & (1<<7)) // back stencil enable if(depthControl & (1<<7)) // back stencil enable
{ {
stateHash += ctxRegister[mmDB_STENCILREFMASK_BF]; stateHash += ctxRegister[mmDB_STENCILREFMASK_BF];
stateHash = std::rotl<uint64>(stateHash, 13); stateHash = std::rotl<uint64>(stateHash, 13);
} }
@ -302,7 +302,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash); pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash);
// use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable) // use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable)
bool allowAsyncCompile = false; bool allowAsyncCompile = false;
if (GetConfig().async_compile) if (GetConfig().async_compile)
allowAsyncCompile = IsAsyncPipelineAllowed(indexCount); allowAsyncCompile = IsAsyncPipelineAllowed(indexCount);
@ -735,8 +735,8 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
LatteTexture* baseTexture = textureView->baseTexture; LatteTexture* baseTexture = textureView->baseTexture;
// get texture register word 0 // get texture register word 0
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
auto imageViewObj = textureView->GetSamplerView(word4); auto imageViewObj = textureView->GetSamplerView(word4);
info.imageView = imageViewObj->m_textureImageView; info.imageView = imageViewObj->m_textureImageView;
vkObjDS->addRef(imageViewObj); vkObjDS->addRef(imageViewObj);
@ -806,7 +806,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
VK_SAMPLER_ADDRESS_MODE_REPEAT, // WRAP VK_SAMPLER_ADDRESS_MODE_REPEAT, // WRAP
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, // MIRROR VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, // MIRROR
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // CLAMP_LAST_TEXEL VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // CLAMP_LAST_TEXEL
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // MIRROR_ONCE_LAST_TEXEL VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // MIRROR_ONCE_LAST_TEXEL
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // unsupported HALF_BORDER VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // unsupported HALF_BORDER
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // unsupported MIRROR_ONCE_HALF_BORDER VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // unsupported MIRROR_ONCE_HALF_BORDER
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // CLAMP_BORDER VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // CLAMP_BORDER
@ -934,7 +934,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
uniformVarsBufferInfo.buffer = m_uniformVarBuffer; uniformVarsBufferInfo.buffer = m_uniformVarBuffer;
uniformVarsBufferInfo.offset = 0; // fixed offset is always zero since we only use dynamic offsets uniformVarsBufferInfo.offset = 0; // fixed offset is always zero since we only use dynamic offsets
uniformVarsBufferInfo.range = shader->uniform.uniformRangeSize; uniformVarsBufferInfo.range = shader->uniform.uniformRangeSize;
VkWriteDescriptorSet write_descriptor{}; VkWriteDescriptorSet write_descriptor{};
write_descriptor.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write_descriptor.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write_descriptor.dstSet = result; write_descriptor.dstSet = result;
@ -1234,7 +1234,7 @@ void VulkanRenderer::draw_setRenderPass()
draw_endRenderPass(); draw_endRenderPass();
if (m_state.descriptorSetsChanged) if (m_state.descriptorSetsChanged)
sync_inputTexturesChanged(); sync_inputTexturesChanged();
// assume that FBO changed, update self-dependency state // assume that FBO changed, update self-dependency state
m_state.hasRenderSelfDependency = fboVk->CheckForCollision(m_state.activeVertexDS, m_state.activeGeometryDS, m_state.activePixelDS); m_state.hasRenderSelfDependency = fboVk->CheckForCollision(m_state.activeVertexDS, m_state.activeGeometryDS, m_state.activePixelDS);

View file

@ -111,7 +111,7 @@ FileCache* FileCache::Create(const fs::path& path, uint32 extraVersion)
fileCache->fileTableEntries[0].fileOffset = fileCache->fileTableOffset; fileCache->fileTableEntries[0].fileOffset = fileCache->fileTableOffset;
fileCache->fileTableEntries[0].fileSize = fileCache->fileTableSize; fileCache->fileTableEntries[0].fileSize = fileCache->fileTableSize;
// write header // write header
fs->writeU32(FILECACHE_MAGIC_V3); fs->writeU32(FILECACHE_MAGIC_V3);
fs->writeU32(fileCache->extraVersion); fs->writeU32(fileCache->extraVersion);
fs->writeU64(fileCache->dataOffset); fs->writeU64(fileCache->dataOffset);
@ -316,7 +316,7 @@ bool _uncompressFileData(const uint8* rawData, size_t rawSize, std::vector<uint8
Bytef* compressedInput = (Bytef*)rawData + 4; Bytef* compressedInput = (Bytef*)rawData + 4;
uLongf compressedLen = (uLongf)(rawSize - 4); uLongf compressedLen = (uLongf)(rawSize - 4);
uLongf uncompressedLen = fileSize; uLongf uncompressedLen = fileSize;
dataOut.resize(fileSize); dataOut.resize(fileSize);
int zret = uncompress2(dataOut.data(), &uncompressedLen, compressedInput, &compressedLen); int zret = uncompress2(dataOut.data(), &uncompressedLen, compressedInput, &compressedLen);
if (zret != Z_OK) if (zret != Z_OK)
{ {
@ -462,9 +462,15 @@ void FileCache::_addFileInternal(uint64 name1, uint64 name2, const uint8* fileDa
// write file data // write file data
fileStream->SetPosition(this->dataOffset + currentStartOffset); fileStream->SetPosition(this->dataOffset + currentStartOffset);
fileStream->writeData(rawData, rawSize); fileStream->writeData(rawData, rawSize);
#ifdef __APPLE__
fileStream->Flush();
#endif
// write file table entry // write file table entry
fileStream->SetPosition(this->dataOffset + this->fileTableOffset + (uint64)(sizeof(FileTableEntry)*entryIndex)); fileStream->SetPosition(this->dataOffset + this->fileTableOffset + (uint64)(sizeof(FileTableEntry)*entryIndex));
fileStream->writeData(this->fileTableEntries + entryIndex, sizeof(FileTableEntry)); fileStream->writeData(this->fileTableEntries + entryIndex, sizeof(FileTableEntry));
#ifdef __APPLE__
fileStream->Flush();
#endif
if (isCompressed) if (isCompressed)
free(rawData); free(rawData);
} }

View file

@ -158,7 +158,7 @@ bool cemuLog_log(LogType type, std::string_view text)
bool cemuLog_log(LogType type, std::u8string_view text) bool cemuLog_log(LogType type, std::u8string_view text)
{ {
std::basic_string_view<char> s((char*)text.data(), text.size()); std::basic_string_view<char> s((char*)text.data(), text.size());
return cemuLog_log(type, s); return cemuLog_log(type, s);
} }

View file

@ -52,7 +52,7 @@ enum class LogType : sint32
template <> template <>
struct fmt::formatter<std::u8string_view> : formatter<string_view> { struct fmt::formatter<std::u8string_view> : formatter<string_view> {
template <typename FormatContext> template <typename FormatContext>
auto format(std::u8string_view v, FormatContext& ctx) auto format(std::u8string_view v, FormatContext& ctx)
{ {
string_view s((char*)v.data(), v.size()); string_view s((char*)v.data(), v.size());
return formatter<string_view>::format(s, ctx); return formatter<string_view>::format(s, ctx);
@ -100,7 +100,7 @@ bool cemuLog_log(LogType type, std::basic_string<T> formatStr, TArgs&&... args)
} }
return true; return true;
} }
template<typename T, typename ... TArgs> template<typename T, typename ... TArgs>
bool cemuLog_log(LogType type, const T* format, TArgs&&... args) bool cemuLog_log(LogType type, const T* format, TArgs&&... args)
{ {

View file

@ -116,6 +116,11 @@ void FileStream::extract(std::vector<uint8>& data)
readData(data.data(), fileSize); readData(data.data(), fileSize);
} }
void FileStream::Flush()
{
m_fileStream.flush();
}
uint32 FileStream::readData(void* data, uint32 length) uint32 FileStream::readData(void* data, uint32 length)
{ {
SyncReadWriteSeek(false); SyncReadWriteSeek(false);

View file

@ -22,6 +22,8 @@ class FileStream
bool SetEndOfFile(); bool SetEndOfFile();
void extract(std::vector<uint8>& data); void extract(std::vector<uint8>& data);
void Flush();
// reading // reading
uint32 readData(void* data, uint32 length); uint32 readData(void* data, uint32 length);
bool readU64(uint64& v); bool readU64(uint64& v);

View file

@ -32,7 +32,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
mlc_path = mlc; mlc_path = mlc;
permanent_storage = parser.get("permanent_storage", permanent_storage); permanent_storage = parser.get("permanent_storage", permanent_storage);
language = parser.get<sint32>("language", wxLANGUAGE_DEFAULT); language = parser.get<sint32>("language", wxLANGUAGE_DEFAULT);
use_discord_presence = parser.get("use_discord_presence", true); use_discord_presence = parser.get("use_discord_presence", true);
fullscreen_menubar = parser.get("fullscreen_menubar", false); fullscreen_menubar = parser.get("fullscreen_menubar", false);
@ -103,7 +103,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
cemuLog_log(LogType::Force, "config load error: can't load recently launched game file: {}", path); cemuLog_log(LogType::Force, "config load error: can't load recently launched game file: {}", path);
} }
} }
recent_nfc_files.clear(); recent_nfc_files.clear();
auto nfc_parser = parser.get("RecentNFCFiles"); auto nfc_parser = parser.get("RecentNFCFiles");
for (auto element = nfc_parser.get("Entry"); element.valid(); element = nfc_parser.get("Entry", element)) for (auto element = nfc_parser.get("Entry"); element.valid(); element = nfc_parser.get("Entry", element))
@ -199,7 +199,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
{ {
graphic_pack_entries[path].try_emplace("_disabled", "true"); graphic_pack_entries[path].try_emplace("_disabled", "true");
} }
for (auto preset = element.get("Preset"); preset.valid(); preset = element.get("Preset", preset)) for (auto preset = element.get("Preset"); preset.valid(); preset = element.get("Preset", preset))
{ {
const std::string category = preset.get("category", ""); const std::string category = preset.get("category", "");
@ -207,13 +207,14 @@ void CemuConfig::Load(XMLConfigParser& parser)
graphic_pack_entries[path].try_emplace(category, active_preset); graphic_pack_entries[path].try_emplace(category, active_preset);
} }
} }
} }
// graphics // graphics
auto graphic = parser.get("Graphic"); auto graphic = parser.get("Graphic");
graphic_api = graphic.get("api", kOpenGL); graphic_api = graphic.get("api", kOpenGL);
graphic.get("device", graphic_device_uuid); graphic.get("vkDevice", vk_graphic_device_uuid);
mtl_graphic_device_uuid = graphic.get("mtlDevice", 0);
vsync = graphic.get("VSync", 0); vsync = graphic.get("VSync", 0);
gx2drawdone_sync = graphic.get("GX2DrawdoneSync", true); gx2drawdone_sync = graphic.get("GX2DrawdoneSync", true);
upscale_filter = graphic.get("UpscaleFilter", kBicubicHermiteFilter); upscale_filter = graphic.get("UpscaleFilter", kBicubicHermiteFilter);
@ -221,6 +222,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
fullscreen_scaling = graphic.get("FullscreenScaling", kKeepAspectRatio); fullscreen_scaling = graphic.get("FullscreenScaling", kKeepAspectRatio);
async_compile = graphic.get("AsyncCompile", async_compile); async_compile = graphic.get("AsyncCompile", async_compile);
vk_accurate_barriers = graphic.get("vkAccurateBarriers", true); // this used to be "VulkanAccurateBarriers" but because we changed the default to true in 1.27.1 the option name had to be changed vk_accurate_barriers = graphic.get("vkAccurateBarriers", true); // this used to be "VulkanAccurateBarriers" but because we changed the default to true in 1.27.1 the option name had to be changed
force_mesh_shaders = graphic.get("ForceMeshShaders", false);
auto overlay_node = graphic.get("Overlay"); auto overlay_node = graphic.get("Overlay");
if(overlay_node.valid()) if(overlay_node.valid())
@ -336,6 +338,8 @@ void CemuConfig::Load(XMLConfigParser& parser)
crash_dump = debug.get("CrashDumpUnix", crash_dump); crash_dump = debug.get("CrashDumpUnix", crash_dump);
#endif #endif
gdb_port = debug.get("GDBPort", 1337); gdb_port = debug.get("GDBPort", 1337);
gpu_capture_dir = debug.get("GPUCaptureDir", "");
framebuffer_fetch = debug.get("FramebufferFetch", true);
// input // input
auto input = parser.get("Input"); auto input = parser.get("Input");
@ -376,7 +380,7 @@ void CemuConfig::Save(XMLConfigParser& parser)
// config.set("cpu_mode", cpu_mode.GetValue()); // config.set("cpu_mode", cpu_mode.GetValue());
//config.set("console_region", console_region.GetValue()); //config.set("console_region", console_region.GetValue());
config.set("console_language", console_language.GetValue()); config.set("console_language", console_language.GetValue());
auto wpos = config.set("window_position"); auto wpos = config.set("window_position");
wpos.set<sint32>("x", window_position.x); wpos.set<sint32>("x", window_position.x);
wpos.set<sint32>("y", window_position.y); wpos.set<sint32>("y", window_position.y);
@ -411,13 +415,13 @@ void CemuConfig::Save(XMLConfigParser& parser)
{ {
launch_files_parser.set("Entry", entry.c_str()); launch_files_parser.set("Entry", entry.c_str());
} }
auto nfc_files_parser = config.set("RecentNFCFiles"); auto nfc_files_parser = config.set("RecentNFCFiles");
for (const auto& entry : recent_nfc_files) for (const auto& entry : recent_nfc_files)
{ {
nfc_files_parser.set("Entry", entry.c_str()); nfc_files_parser.set("Entry", entry.c_str());
} }
// game paths // game paths
auto game_path_parser = config.set("GamePaths"); auto game_path_parser = config.set("GamePaths");
for (const auto& entry : game_paths) for (const auto& entry : game_paths)
@ -458,11 +462,11 @@ void CemuConfig::Save(XMLConfigParser& parser)
entry.set_attribute("disabled", true); entry.set_attribute("disabled", true);
continue; continue;
} }
auto preset = entry.set("Preset"); auto preset = entry.set("Preset");
if(!kv.first.empty()) if(!kv.first.empty())
preset.set("category", kv.first.c_str()); preset.set("category", kv.first.c_str());
preset.set("preset", kv.second.c_str()); preset.set("preset", kv.second.c_str());
} }
} }
@ -470,9 +474,11 @@ void CemuConfig::Save(XMLConfigParser& parser)
// graphics // graphics
auto graphic = config.set("Graphic"); auto graphic = config.set("Graphic");
graphic.set("api", graphic_api); graphic.set("api", graphic_api);
graphic.set("device", graphic_device_uuid); graphic.set("vkDevice", vk_graphic_device_uuid);
graphic.set("mtlDevice", mtl_graphic_device_uuid);
graphic.set("VSync", vsync); graphic.set("VSync", vsync);
graphic.set("GX2DrawdoneSync", gx2drawdone_sync); graphic.set("GX2DrawdoneSync", gx2drawdone_sync);
graphic.set("ForceMeshShaders", force_mesh_shaders);
//graphic.set("PrecompiledShaders", precompiled_shaders.GetValue()); //graphic.set("PrecompiledShaders", precompiled_shaders.GetValue());
graphic.set("UpscaleFilter", upscale_filter); graphic.set("UpscaleFilter", upscale_filter);
graphic.set("DownscaleFilter", downscale_filter); graphic.set("DownscaleFilter", downscale_filter);
@ -537,6 +543,8 @@ void CemuConfig::Save(XMLConfigParser& parser)
debug.set("CrashDumpUnix", crash_dump.GetValue()); debug.set("CrashDumpUnix", crash_dump.GetValue());
#endif #endif
debug.set("GDBPort", gdb_port); debug.set("GDBPort", gdb_port);
debug.set("GPUCaptureDir", gpu_capture_dir);
debug.set("FramebufferFetch", framebuffer_fetch);
// input // input
auto input = config.set("Input"); auto input = config.set("Input");

View file

@ -32,7 +32,7 @@ struct GameEntry
std::wstring save_folder; std::wstring save_folder;
std::wstring update_folder; std::wstring update_folder;
std::wstring dlc_folder; std::wstring dlc_folder;
uint64 legacy_time_played = 0; uint64 legacy_time_played = 0;
uint64 legacy_last_played = 0; uint64 legacy_last_played = 0;
@ -74,6 +74,7 @@ enum GraphicAPI
{ {
kOpenGL = 0, kOpenGL = 0,
kVulkan, kVulkan,
kMetal,
}; };
enum AudioChannels enum AudioChannels
@ -105,7 +106,7 @@ enum class ScreenPosition
kTopRight, kTopRight,
kBottomLeft, kBottomLeft,
kBottomCenter, kBottomCenter,
kBottomRight, kBottomRight,
}; };
enum class PrecompiledShaderOption enum class PrecompiledShaderOption
@ -123,6 +124,23 @@ enum class AccurateShaderMulOption
}; };
ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True); ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True);
enum class BufferCacheMode
{
Auto,
DevicePrivate,
DeviceShared,
Host,
};
ENABLE_ENUM_ITERATORS(BufferCacheMode, BufferCacheMode::Auto, BufferCacheMode::Host);
enum class PositionInvariance
{
Auto,
False,
True,
};
ENABLE_ENUM_ITERATORS(PositionInvariance, PositionInvariance::False, PositionInvariance::True);
enum class CPUMode enum class CPUMode
{ {
SinglecoreInterpreter = 0, SinglecoreInterpreter = 0,
@ -134,7 +152,7 @@ enum class CPUMode
ENABLE_ENUM_ITERATORS(CPUMode, CPUMode::SinglecoreInterpreter, CPUMode::Auto); ENABLE_ENUM_ITERATORS(CPUMode, CPUMode::SinglecoreInterpreter, CPUMode::Auto);
enum class CPUModeLegacy enum class CPUModeLegacy
{ {
SinglecoreInterpreter = 0, SinglecoreInterpreter = 0,
SinglecoreRecompiler = 1, SinglecoreRecompiler = 1,
@ -221,6 +239,37 @@ struct fmt::formatter<const AccurateShaderMulOption> : formatter<string_view> {
} }
}; };
template <> template <>
struct fmt::formatter<const BufferCacheMode> : formatter<string_view> {
template <typename FormatContext>
auto format(const BufferCacheMode c, FormatContext &ctx) const {
string_view name;
switch (c)
{
case BufferCacheMode::Auto: name = "auto"; break;
case BufferCacheMode::DevicePrivate: name = "device private"; break;
case BufferCacheMode::DeviceShared: name = "device shared"; break;
case BufferCacheMode::Host: name = "host"; break;
default: name = "unknown"; break;
}
return formatter<string_view>::format(name, ctx);
}
};
template <>
struct fmt::formatter<const PositionInvariance> : formatter<string_view> {
template <typename FormatContext>
auto format(const PositionInvariance c, FormatContext &ctx) const {
string_view name;
switch (c)
{
case PositionInvariance::Auto: name = "auto"; break;
case PositionInvariance::False: name = "false"; break;
case PositionInvariance::True: name = "true"; break;
default: name = "unknown"; break;
}
return formatter<string_view>::format(name, ctx);
}
};
template <>
struct fmt::formatter<const CPUMode> : formatter<string_view> { struct fmt::formatter<const CPUMode> : formatter<string_view> {
template <typename FormatContext> template <typename FormatContext>
auto format(const CPUMode c, FormatContext &ctx) const { auto format(const CPUMode c, FormatContext &ctx) const {
@ -270,7 +319,7 @@ struct fmt::formatter<const CafeConsoleRegion> : formatter<string_view> {
case CafeConsoleRegion::TWN: name = wxTRANSLATE("Taiwan"); break; case CafeConsoleRegion::TWN: name = wxTRANSLATE("Taiwan"); break;
case CafeConsoleRegion::Auto: name = wxTRANSLATE("Auto"); break; case CafeConsoleRegion::Auto: name = wxTRANSLATE("Auto"); break;
default: name = wxTRANSLATE("many"); break; default: name = wxTRANSLATE("many"); break;
} }
return formatter<string_view>::format(name, ctx); return formatter<string_view>::format(name, ctx);
} }
@ -312,7 +361,7 @@ struct fmt::formatter<const CrashDump> : formatter<string_view> {
case CrashDump::Lite: name = "Lite"; break; case CrashDump::Lite: name = "Lite"; break;
case CrashDump::Full: name = "Full"; break; case CrashDump::Full: name = "Full"; break;
default: name = "unknown"; break; default: name = "unknown"; break;
} }
return formatter<string_view>::format(name, ctx); return formatter<string_view>::format(name, ctx);
} }
@ -363,7 +412,7 @@ struct CemuConfig
ConfigValue<bool> advanced_ppc_logging{ false }; ConfigValue<bool> advanced_ppc_logging{ false };
ConfigValue<bool> permanent_storage{ true }; ConfigValue<bool> permanent_storage{ true };
ConfigValue<sint32> language{ wxLANGUAGE_DEFAULT }; ConfigValue<sint32> language{ wxLANGUAGE_DEFAULT };
ConfigValue<bool> use_discord_presence{ true }; ConfigValue<bool> use_discord_presence{ true };
ConfigValue<std::string> mlc_path{}; ConfigValue<std::string> mlc_path{};
@ -388,7 +437,7 @@ struct CemuConfig
// optimized access // optimized access
std::set<uint64> game_cache_favorites; // per titleId std::set<uint64> game_cache_favorites; // per titleId
struct _path_hash { struct _path_hash {
std::size_t operator()(const fs::path& path) const { std::size_t operator()(const fs::path& path) const {
return fs::hash_value(path); return fs::hash_value(path);
@ -439,11 +488,13 @@ struct CemuConfig
// graphics // graphics
ConfigValue<GraphicAPI> graphic_api{ kVulkan }; ConfigValue<GraphicAPI> graphic_api{ kVulkan };
std::array<uint8, 16> graphic_device_uuid; std::array<uint8, 16> vk_graphic_device_uuid;
ConfigValue<int> vsync{ 0 }; // 0 = off, 1+ = on depending on render backend uint64 mtl_graphic_device_uuid{ 0 };
ConfigValue<bool> gx2drawdone_sync {true}; ConfigValue<int> vsync{ 0 }; // 0 = off, 1+ = depending on render backend
ConfigValue<bool> gx2drawdone_sync { true };
ConfigValue<bool> render_upside_down{ false }; ConfigValue<bool> render_upside_down{ false };
ConfigValue<bool> async_compile{ true }; ConfigValue<bool> async_compile{ true };
ConfigValue<bool> force_mesh_shaders{ false };
ConfigValue<bool> vk_accurate_barriers{ true }; ConfigValue<bool> vk_accurate_barriers{ true };
@ -502,6 +553,8 @@ struct CemuConfig
// debug // debug
ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled }; ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled };
ConfigValue<uint16> gdb_port{ 1337 }; ConfigValue<uint16> gdb_port{ 1337 };
ConfigValue<std::string> gpu_capture_dir{ "" };
ConfigValue<bool> framebuffer_fetch{ true };
void Load(XMLConfigParser& parser); void Load(XMLConfigParser& parser);
void Save(XMLConfigParser& parser); void Save(XMLConfigParser& parser);
@ -516,7 +569,7 @@ struct CemuConfig
NetworkService GetAccountNetworkService(uint32 persistentId); NetworkService GetAccountNetworkService(uint32 persistentId);
void SetAccountSelectedService(uint32 persistentId, NetworkService serviceIndex); void SetAccountSelectedService(uint32 persistentId, NetworkService serviceIndex);
// emulated usb devices // emulated usb devices
struct struct
{ {
@ -546,5 +599,3 @@ struct CemuConfig
typedef XMLDataConfig<CemuConfig, &CemuConfig::Load, &CemuConfig::Save> XMLCemuConfig_t; typedef XMLDataConfig<CemuConfig, &CemuConfig::Load, &CemuConfig::Save> XMLCemuConfig_t;
extern XMLCemuConfig_t g_config; extern XMLCemuConfig_t g_config;
inline CemuConfig& GetConfig() { return g_config.data(); } inline CemuConfig& GetConfig() { return g_config.data(); }

View file

@ -1,4 +1,4 @@
add_library(CemuGui add_library(CemuGui
canvas/IRenderCanvas.h canvas/IRenderCanvas.h
canvas/OpenGLCanvas.cpp canvas/OpenGLCanvas.cpp
canvas/OpenGLCanvas.h canvas/OpenGLCanvas.h
@ -129,6 +129,13 @@ add_library(CemuGui
wxHelper.h wxHelper.h
) )
if(ENABLE_METAL)
target_sources(CemuGui PRIVATE
canvas/MetalCanvas.cpp
canvas/MetalCanvas.h
)
endif()
set_property(TARGET CemuGui PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") set_property(TARGET CemuGui PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")

View file

@ -383,7 +383,8 @@ void CemuApp::OnAssertFailure(const wxChar* file, int line, const wxChar* func,
#if BOOST_OS_WINDOWS #if BOOST_OS_WINDOWS
DumpThreadStackTrace(); DumpThreadStackTrace();
#endif #endif
cemu_assert_debug(false); // HACK
//cemu_assert_debug(false);
} }
int CemuApp::FilterEvent(wxEvent& event) int CemuApp::FilterEvent(wxEvent& event)
@ -567,5 +568,3 @@ void CemuApp::ActivateApp(wxActivateEvent& event)
g_window_info.app_active = event.GetActive(); g_window_info.app_active = event.GetActive();
event.Skip(); event.Skip();
} }

View file

@ -8,6 +8,7 @@
#include <wx/wupdlock.h> #include <wx/wupdlock.h>
#include <wx/slider.h> #include <wx/slider.h>
#include "config/CemuConfig.h"
#include "gui/helpers/wxHelpers.h" #include "gui/helpers/wxHelpers.h"
#include "input/InputManager.h" #include "input/InputManager.h"
@ -61,7 +62,7 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
const sint32 m_cpu_modeNChoices = std::size(cpu_modes); const sint32 m_cpu_modeNChoices = std::size(cpu_modes);
m_cpu_mode = new wxChoice(box, wxID_ANY, wxDefaultPosition, wxDefaultSize, m_cpu_modeNChoices, cpu_modes, 0); m_cpu_mode = new wxChoice(box, wxID_ANY, wxDefaultPosition, wxDefaultSize, m_cpu_modeNChoices, cpu_modes, 0);
m_cpu_mode->SetToolTip(_("Set the CPU emulation mode")); m_cpu_mode->SetToolTip(_("Set the CPU emulation mode"));
first_row->Add(m_cpu_mode, 0, wxALL, 5); first_row->Add(m_cpu_mode, 0, wxALL, 5);
first_row->Add(new wxStaticText(box, wxID_ANY, _("Thread quantum")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); first_row->Add(new wxStaticText(box, wxID_ANY, _("Thread quantum")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
@ -112,10 +113,14 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); first_row->Add(new wxStaticText(panel, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString gapi_values[] = { "", "OpenGL", "Vulkan" }; wxString gapi_values[] = { "", "OpenGL", "Vulkan",
#if ENABLE_METAL
"Metal"
#endif
};
m_graphic_api = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(gapi_values), gapi_values); m_graphic_api = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(gapi_values), gapi_values);
first_row->Add(m_graphic_api, 0, wxALL, 5); first_row->Add(m_graphic_api, 0, wxALL, 5);
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Shader multiplication accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); first_row->Add(new wxStaticText(panel, wxID_ANY, _("Shader multiplication accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString mul_values[] = { _("false"), _("true")}; wxString mul_values[] = { _("false"), _("true")};
@ -123,6 +128,27 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true")); m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true"));
first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5); first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5);
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Fast math")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString math_values[] = { _("false"), _("true") };
m_fast_math = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(math_values), math_values);
m_fast_math->SetToolTip(_("EXPERT OPTION\nEnables fast math for all shaders. May (rarely) cause graphical bugs.\n\nMetal only\n\nRecommended: true"));
first_row->Add(m_fast_math, 0, wxALL, 5);
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Buffer cache mode")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString cache_values[] = { _("auto"), _("device private"), _("device shared"), _("host") };
m_buffer_cache_mode = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(cache_values), cache_values);
m_buffer_cache_mode->SetToolTip(_("EXPERT OPTION\nDecides how the buffer cache memory will be managed.\n\nMetal only\n\nRecommended: auto"));
first_row->Add(m_buffer_cache_mode, 0, wxALL, 5);
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Position invariance")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString pos_values[] = { _("auto"), _("false"), _("true") };
m_position_invariance = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(pos_values), pos_values);
m_position_invariance->SetToolTip(_("EXPERT OPTION\nDisables most optimizations for vertex positions. May fix polygon cutouts or flickering in some games.\n\nMetal only\n\nRecommended: auto"));
first_row->Add(m_position_invariance, 0, wxALL, 5);
/*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); /*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString accuarcy_values[] = { _("high"), _("medium"), _("low") }; wxString accuarcy_values[] = { _("high"), _("medium"), _("low") };
m_cache_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(accuarcy_values), accuarcy_values); m_cache_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(accuarcy_values), accuarcy_values);
@ -249,7 +275,7 @@ void GameProfileWindow::ApplyProfile()
// general // general
m_load_libs->SetValue(m_game_profile.m_loadSharedLibraries.value()); m_load_libs->SetValue(m_game_profile.m_loadSharedLibraries.value());
m_start_with_padview->SetValue(m_game_profile.m_startWithPadView); m_start_with_padview->SetValue(m_game_profile.m_startWithPadView);
// cpu // cpu
// wxString cpu_modes[] = { _("Singlecore-Interpreter"), _("Singlecore-Recompiler"), _("Triplecore-Recompiler"), _("Auto (recommended)") }; // wxString cpu_modes[] = { _("Singlecore-Interpreter"), _("Singlecore-Recompiler"), _("Triplecore-Recompiler"), _("Auto (recommended)") };
switch(m_game_profile.m_cpuMode.value()) switch(m_game_profile.m_cpuMode.value())
@ -258,24 +284,27 @@ void GameProfileWindow::ApplyProfile()
case CPUMode::SinglecoreRecompiler: m_cpu_mode->SetSelection(1); break; case CPUMode::SinglecoreRecompiler: m_cpu_mode->SetSelection(1); break;
case CPUMode::DualcoreRecompiler: m_cpu_mode->SetSelection(2); break; case CPUMode::DualcoreRecompiler: m_cpu_mode->SetSelection(2); break;
case CPUMode::MulticoreRecompiler: m_cpu_mode->SetSelection(2); break; case CPUMode::MulticoreRecompiler: m_cpu_mode->SetSelection(2); break;
default: m_cpu_mode->SetSelection(3); default: m_cpu_mode->SetSelection(3);
} }
m_thread_quantum->SetStringSelection(fmt::format("{}", m_game_profile.m_threadQuantum)); m_thread_quantum->SetStringSelection(fmt::format("{}", m_game_profile.m_threadQuantum));
// gpu // gpu
if (!m_game_profile.m_graphics_api.has_value()) if (!m_game_profile.m_graphics_api.has_value())
m_graphic_api->SetSelection(0); // selecting "" m_graphic_api->SetSelection(0); // selecting ""
else else
m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan, Metal
m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul); m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul);
m_fast_math->SetSelection((int)m_game_profile.m_fastMath);
m_buffer_cache_mode->SetSelection((int)m_game_profile.m_bufferCacheMode);
m_position_invariance->SetSelection((int)m_game_profile.m_positionInvariance);
//// audio //// audio
//m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio)); //m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio));
// controller // controller
auto profiles = InputManager::get_profiles(); auto profiles = InputManager::get_profiles();
for (const auto& cb : m_controller_profile) for (const auto& cb : m_controller_profile)
{ {
cb->Clear(); cb->Clear();
@ -293,7 +322,7 @@ void GameProfileWindow::ApplyProfile()
const auto& v = m_game_profile.m_controllerProfile[i].value(); const auto& v = m_game_profile.m_controllerProfile[i].value();
m_controller_profile[i]->SetStringSelection(wxString::FromUTF8(v)); m_controller_profile[i]->SetStringSelection(wxString::FromUTF8(v));
} }
else else
m_controller_profile[i]->SetSelection(wxNOT_FOUND); m_controller_profile[i]->SetSelection(wxNOT_FOUND);
} }
@ -317,7 +346,7 @@ void GameProfileWindow::SaveProfile()
m_game_profile.m_cpuMode = CPUMode::Auto; m_game_profile.m_cpuMode = CPUMode::Auto;
} }
const wxString thread_quantum = m_thread_quantum->GetStringSelection(); const wxString thread_quantum = m_thread_quantum->GetStringSelection();
if (!thread_quantum.empty()) if (!thread_quantum.empty())
{ {
@ -330,11 +359,14 @@ void GameProfileWindow::SaveProfile()
m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection(); m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection();
if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True) if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True)
m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value
m_game_profile.m_fastMath = (bool)m_fast_math->GetSelection();
m_game_profile.m_bufferCacheMode = (BufferCacheMode)m_buffer_cache_mode->GetSelection();
m_game_profile.m_positionInvariance = (PositionInvariance)m_position_invariance->GetSelection();
if (m_graphic_api->GetSelection() == 0) if (m_graphic_api->GetSelection() == 0)
m_game_profile.m_graphics_api = {}; m_game_profile.m_graphics_api = {};
else else
m_game_profile.m_graphics_api = (GraphicAPI)(m_graphic_api->GetSelection() - 1); // "", OpenGL, Vulkan m_game_profile.m_graphics_api = (GraphicAPI)(m_graphic_api->GetSelection() - 1); // "", OpenGL, Vulkan, Metal
// controller // controller
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
@ -365,4 +397,4 @@ void GameProfileWindow::SetSliderValue(wxSlider* slider, sint32 new_value) const
slider_event.SetEventObject(slider); slider_event.SetEventObject(slider);
slider_event.SetClientData((void*)IsFrozen()); slider_event.SetClientData((void*)IsFrozen());
wxPostEvent(slider->GetEventHandler(), slider_event); wxPostEvent(slider->GetEventHandler(), slider_event);
} }

View file

@ -40,6 +40,9 @@ private:
wxChoice* m_graphic_api; wxChoice* m_graphic_api;
wxChoice* m_shader_mul_accuracy; wxChoice* m_shader_mul_accuracy;
wxChoice* m_fast_math;
wxChoice* m_buffer_cache_mode;
wxChoice* m_position_invariance;
//wxChoice* m_cache_accuracy; //wxChoice* m_cache_accuracy;
// audio // audio
@ -47,4 +50,4 @@ private:
// controller // controller
wxComboBox* m_controller_profile[8]; wxComboBox* m_controller_profile[8];
}; };

View file

@ -10,6 +10,7 @@
#include <wx/collpane.h> #include <wx/collpane.h>
#include <wx/clrpicker.h> #include <wx/clrpicker.h>
#include <wx/cshelp.h> #include <wx/cshelp.h>
#include <wx/textctrl.h>
#include <wx/textdlg.h> #include <wx/textdlg.h>
#include <wx/hyperlink.h> #include <wx/hyperlink.h>
@ -27,6 +28,9 @@
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#if ENABLE_METAL
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#endif
#include "Cafe/Account/Account.h" #include "Cafe/Account/Account.h"
#include <boost/tokenizer.hpp> #include <boost/tokenizer.hpp>
@ -93,6 +97,19 @@ private:
VulkanRenderer::DeviceInfo m_device_info; VulkanRenderer::DeviceInfo m_device_info;
}; };
#if ENABLE_METAL
class wxMetalUUID : public wxClientData
{
public:
wxMetalUUID(const MetalRenderer::DeviceInfo& info)
: m_device_info(info) {}
const MetalRenderer::DeviceInfo& GetDeviceInfo() const { return m_device_info; }
private:
MetalRenderer::DeviceInfo m_device_info;
};
#endif
class wxAccountData : public wxClientData class wxAccountData : public wxClientData
{ {
public: public:
@ -101,7 +118,7 @@ public:
Account& GetAccount() { return m_account; } Account& GetAccount() { return m_account; }
const Account& GetAccount() const { return m_account; } const Account& GetAccount() const { return m_account; }
private: private:
Account m_account; Account m_account;
}; };
@ -311,12 +328,14 @@ wxPanel* GeneralSettings2::AddGraphicsPage(wxNotebook* notebook)
row->Add(new wxStaticText(box, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); row->Add(new wxStaticText(box, wxID_ANY, _("Graphics API")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
sint32 api_size = 1; sint32 api_size = 1;
wxString choices[2] = { "OpenGL" }; wxString choices[3] = { "OpenGL" };
if (g_vulkan_available) if (g_vulkan_available)
{ {
choices[1] = "Vulkan"; choices[api_size++] = "Vulkan";
api_size = 2;
} }
#if ENABLE_METAL
choices[api_size++] = "Metal";
#endif
m_graphic_api = new wxChoice(box, wxID_ANY, wxDefaultPosition, wxDefaultSize, api_size, choices); m_graphic_api = new wxChoice(box, wxID_ANY, wxDefaultPosition, wxDefaultSize, api_size, choices);
m_graphic_api->SetSelection(0); m_graphic_api->SetSelection(0);
@ -348,6 +367,10 @@ wxPanel* GeneralSettings2::AddGraphicsPage(wxNotebook* notebook)
m_gx2drawdone_sync->SetToolTip(_("If synchronization is requested by the game, the emulated CPU will wait for the GPU to finish all operations.\nThis is more accurate behavior, but may cause lower performance")); m_gx2drawdone_sync->SetToolTip(_("If synchronization is requested by the game, the emulated CPU will wait for the GPU to finish all operations.\nThis is more accurate behavior, but may cause lower performance"));
graphic_misc_row->Add(m_gx2drawdone_sync, 0, wxALL, 5); graphic_misc_row->Add(m_gx2drawdone_sync, 0, wxALL, 5);
m_force_mesh_shaders = new wxCheckBox(box, wxID_ANY, _("Force mesh shaders"));
m_force_mesh_shaders->SetToolTip(_("Force mesh shaders on all GPUs that support them. Mesh shaders are disabled by default on Intel GPUs due to potential stability issues"));
graphic_misc_row->Add(m_force_mesh_shaders, 0, wxALL, 5);
box_sizer->Add(graphic_misc_row, 1, wxEXPAND, 5); box_sizer->Add(graphic_misc_row, 1, wxEXPAND, 5);
graphics_panel_sizer->Add(box_sizer, 0, wxEXPAND | wxALL, 5); graphics_panel_sizer->Add(box_sizer, 0, wxEXPAND | wxALL, 5);
} }
@ -763,7 +786,7 @@ wxPanel* GeneralSettings2::AddAccountPage(wxNotebook* notebook)
auto* row = new wxFlexGridSizer(0, 2, 0, 0); auto* row = new wxFlexGridSizer(0, 2, 0, 0);
row->SetFlexibleDirection(wxBOTH); row->SetFlexibleDirection(wxBOTH);
row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED); row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
const wxImage tmp = wxBITMAP_PNG_FROM_DATA(PNG_ERROR).ConvertToImage(); const wxImage tmp = wxBITMAP_PNG_FROM_DATA(PNG_ERROR).ConvertToImage();
m_validate_online = new wxBitmapButton(box, wxID_ANY, tmp.Scale(16, 16)); m_validate_online = new wxBitmapButton(box, wxID_ANY, tmp.Scale(16, 16));
m_validate_online->Bind(wxEVT_BUTTON, &GeneralSettings2::OnShowOnlineValidator, this); m_validate_online->Bind(wxEVT_BUTTON, &GeneralSettings2::OnShowOnlineValidator, this);
@ -773,7 +796,7 @@ wxPanel* GeneralSettings2::AddAccountPage(wxNotebook* notebook)
row->Add(m_online_status, 1, wxALL | wxALIGN_CENTRE_VERTICAL, 5); row->Add(m_online_status, 1, wxALL | wxALIGN_CENTRE_VERTICAL, 5);
box_sizer->Add(row, 1, wxEXPAND, 5); box_sizer->Add(row, 1, wxEXPAND, 5);
auto* tutorial_link = new wxHyperlinkCtrl(box, wxID_ANY, _("Online play tutorial"), "https://cemu.info/online-guide"); auto* tutorial_link = new wxHyperlinkCtrl(box, wxID_ANY, _("Online play tutorial"), "https://cemu.info/online-guide");
box_sizer->Add(tutorial_link, 0, wxALL, 5); box_sizer->Add(tutorial_link, 0, wxALL, 5);
@ -876,6 +899,33 @@ wxPanel* GeneralSettings2::AddDebugPage(wxNotebook* notebook)
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5); debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
} }
{
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
debug_row->SetFlexibleDirection(wxBOTH);
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
debug_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU capture save directory"), wxDefaultPosition, wxDefaultSize, 0), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
m_gpu_capture_dir = new wxTextCtrl(panel, wxID_ANY, wxEmptyString, wxDefaultPosition, wxDefaultSize, wxTE_DONTWRAP);
m_gpu_capture_dir->SetMinSize(wxSize(150, -1));
m_gpu_capture_dir->SetToolTip(_("Cemu will save the GPU captures done by selecting Debug -> GPU capture in the menu bar in this directory. If a debugger with support for GPU captures (like Xcode) is attached, the capture will be opened in that debugger instead. If such debugger is not attached, METAL_CAPTURE_ENABLED must be set to 1 as an environment variable."));
debug_row->Add(m_gpu_capture_dir, 0, wxALL | wxEXPAND, 5);
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
}
{
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
debug_row->SetFlexibleDirection(wxBOTH);
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
m_framebuffer_fetch = new wxCheckBox(panel, wxID_ANY, _("Framebuffer fetch"));
m_framebuffer_fetch->SetToolTip(_("Enable framebuffer fetch for eligible textures on supported devices."));
debug_row->Add(m_framebuffer_fetch, 0, wxALL | wxEXPAND, 5);
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
}
panel->SetSizerAndFit(debug_panel_sizer); panel->SetSizerAndFit(debug_panel_sizer);
return panel; return panel;
@ -891,14 +941,14 @@ GeneralSettings2::GeneralSettings2(wxWindow* parent, bool game_launched)
notebook->AddPage(AddGeneralPage(notebook), _("General")); notebook->AddPage(AddGeneralPage(notebook), _("General"));
notebook->AddPage(AddGraphicsPage(notebook), _("Graphics")); notebook->AddPage(AddGraphicsPage(notebook), _("Graphics"));
notebook->AddPage(AddAudioPage(notebook), _("Audio")); notebook->AddPage(AddAudioPage(notebook), _("Audio"));
notebook->AddPage(AddOverlayPage(notebook), _("Overlay")); notebook->AddPage(AddOverlayPage(notebook), _("Overlay"));
notebook->AddPage(AddAccountPage(notebook), _("Account")); notebook->AddPage(AddAccountPage(notebook), _("Account"));
notebook->AddPage(AddDebugPage(notebook), _("Debug")); notebook->AddPage(AddDebugPage(notebook), _("Debug"));
Bind(wxEVT_CLOSE_WINDOW, &GeneralSettings2::OnClose, this); Bind(wxEVT_CLOSE_WINDOW, &GeneralSettings2::OnClose, this);
// //
sizer->Add(notebook, 1, wxEXPAND | wxALL, 5); sizer->Add(notebook, 1, wxEXPAND | wxALL, 5);
@ -913,7 +963,7 @@ GeneralSettings2::GeneralSettings2(wxWindow* parent, bool game_launched)
ApplyConfig(); ApplyConfig();
HandleGraphicsApiSelection(); HandleGraphicsApiSelection();
DisableSettings(game_launched); DisableSettings(game_launched);
} }
@ -925,7 +975,7 @@ uint32 GeneralSettings2::GetSelectedAccountPersistentId()
return dynamic_cast<wxAccountData*>(m_active_account->GetClientObject(active_account))->GetAccount().GetPersistentId(); return dynamic_cast<wxAccountData*>(m_active_account->GetClientObject(active_account))->GetAccount().GetPersistentId();
} }
void GeneralSettings2::StoreConfig() void GeneralSettings2::StoreConfig()
{ {
auto* app = (CemuApp*)wxTheApp; auto* app = (CemuApp*)wxTheApp;
auto& config = GetConfig(); auto& config = GetConfig();
@ -946,7 +996,6 @@ void GeneralSettings2::StoreConfig()
ScreenSaver::SetInhibit(config.disable_screensaver); ScreenSaver::SetInhibit(config.disable_screensaver);
} }
// -1 is default wx widget value -> set to dummy 0 so mainwindow and padwindow will update it // -1 is default wx widget value -> set to dummy 0 so mainwindow and padwindow will update it
config.window_position = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1}; config.window_position = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1};
config.window_size = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1}; config.window_size = m_save_window_position_size->IsChecked() ? Vector2i{ 0,0 } : Vector2i{-1,-1};
@ -989,7 +1038,7 @@ void GeneralSettings2::StoreConfig()
config.pad_channels = kStereo; // (AudioChannels)m_pad_channels->GetSelection(); config.pad_channels = kStereo; // (AudioChannels)m_pad_channels->GetSelection();
//config.input_channels = (AudioChannels)m_input_channels->GetSelection(); //config.input_channels = (AudioChannels)m_input_channels->GetSelection();
config.input_channels = kMono; // (AudioChannels)m_input_channels->GetSelection(); config.input_channels = kMono; // (AudioChannels)m_input_channels->GetSelection();
config.tv_volume = m_tv_volume->GetValue(); config.tv_volume = m_tv_volume->GetValue();
config.pad_volume = m_pad_volume->GetValue(); config.pad_volume = m_pad_volume->GetValue();
config.input_volume = m_input_volume->GetValue(); config.input_volume = m_input_volume->GetValue();
@ -1025,26 +1074,43 @@ void GeneralSettings2::StoreConfig()
config.graphic_api = (GraphicAPI)m_graphic_api->GetSelection(); config.graphic_api = (GraphicAPI)m_graphic_api->GetSelection();
selection = m_graphic_device->GetSelection(); selection = m_graphic_device->GetSelection();
if(selection != wxNOT_FOUND) if (config.graphic_api == GraphicAPI::kVulkan)
{ {
const auto* info = (wxVulkanUUID*)m_graphic_device->GetClientObject(selection); if (selection != wxNOT_FOUND)
if(info) {
config.graphic_device_uuid = info->GetDeviceInfo().uuid; const auto* info = (wxVulkanUUID*)m_graphic_device->GetClientObject(selection);
else if (info)
config.graphic_device_uuid = {}; config.vk_graphic_device_uuid = info->GetDeviceInfo().uuid;
else
config.vk_graphic_device_uuid = {};
}
else
config.vk_graphic_device_uuid = {};
} }
else else if (config.graphic_api == GraphicAPI::kMetal)
config.graphic_device_uuid = {}; {
if (selection != wxNOT_FOUND)
{
const auto* info = (wxMetalUUID*)m_graphic_device->GetClientObject(selection);
if (info)
config.mtl_graphic_device_uuid = info->GetDeviceInfo().uuid;
else
config.mtl_graphic_device_uuid = {};
}
else
config.mtl_graphic_device_uuid = {};
}
config.vsync = m_vsync->GetSelection(); config.vsync = m_vsync->GetSelection();
config.gx2drawdone_sync = m_gx2drawdone_sync->IsChecked(); config.gx2drawdone_sync = m_gx2drawdone_sync->IsChecked();
config.force_mesh_shaders = m_force_mesh_shaders->IsChecked();
config.async_compile = m_async_compile->IsChecked(); config.async_compile = m_async_compile->IsChecked();
config.upscale_filter = m_upscale_filter->GetSelection(); config.upscale_filter = m_upscale_filter->GetSelection();
config.downscale_filter = m_downscale_filter->GetSelection(); config.downscale_filter = m_downscale_filter->GetSelection();
config.fullscreen_scaling = m_fullscreen_scaling->GetSelection(); config.fullscreen_scaling = m_fullscreen_scaling->GetSelection();
config.overlay.position = (ScreenPosition)m_overlay_position->GetSelection(); wxASSERT((int)config.overlay.position <= (int)ScreenPosition::kBottomRight); config.overlay.position = (ScreenPosition)m_overlay_position->GetSelection(); wxASSERT((int)config.overlay.position <= (int)ScreenPosition::kBottomRight);
config.overlay.text_color = m_overlay_font_color->GetColour().GetRGBA(); config.overlay.text_color = m_overlay_font_color->GetColour().GetRGBA();
config.overlay.text_scale = m_overlay_scale->GetSelection() * 25 + 50; config.overlay.text_scale = m_overlay_scale->GetSelection() * 25 + 50;
@ -1071,6 +1137,8 @@ void GeneralSettings2::StoreConfig()
// debug // debug
config.crash_dump = (CrashDump)m_crash_dump->GetSelection(); config.crash_dump = (CrashDump)m_crash_dump->GetSelection();
config.gdb_port = m_gdb_port->GetValue(); config.gdb_port = m_gdb_port->GetValue();
config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string();
config.framebuffer_fetch = m_framebuffer_fetch->IsChecked();
g_config.Save(); g_config.Save();
} }
@ -1102,7 +1170,7 @@ void GeneralSettings2::ValidateConfig()
void GeneralSettings2::DisableSettings(bool game_launched) void GeneralSettings2::DisableSettings(bool game_launched)
{ {
} }
void GeneralSettings2::OnAudioLatencyChanged(wxCommandEvent& event) void GeneralSettings2::OnAudioLatencyChanged(wxCommandEvent& event)
@ -1113,7 +1181,7 @@ void GeneralSettings2::OnAudioLatencyChanged(wxCommandEvent& event)
void GeneralSettings2::OnVolumeChanged(wxCommandEvent& event) void GeneralSettings2::OnVolumeChanged(wxCommandEvent& event)
{ {
if(event.GetEventObject() == m_input_volume) if(event.GetEventObject() == m_input_volume)
{ {
std::shared_lock lock(g_audioInputMutex); std::shared_lock lock(g_audioInputMutex);
@ -1137,7 +1205,7 @@ void GeneralSettings2::OnVolumeChanged(wxCommandEvent& event)
g_tvAudio->SetVolume(event.GetInt()); g_tvAudio->SetVolume(event.GetInt());
} }
} }
event.Skip(); event.Skip();
} }
@ -1150,7 +1218,7 @@ void GeneralSettings2::OnInputVolumeChanged(wxCommandEvent& event)
g_padAudio->SetInputVolume(event.GetInt()); g_padAudio->SetInputVolume(event.GetInt());
g_padVolume = event.GetInt(); g_padVolume = event.GetInt();
} }
event.Skip(); event.Skip();
} }
@ -1228,7 +1296,7 @@ void GeneralSettings2::UpdateAudioDeviceList()
// todo reset global instance of audio device // todo reset global instance of audio device
} }
void GeneralSettings2::ResetAccountInformation() void GeneralSettings2::ResetAccountInformation()
{ {
m_account_grid->SetSplitterPosition(100); m_account_grid->SetSplitterPosition(100);
m_active_account->SetSelection(0); m_active_account->SetSelection(0);
@ -1256,7 +1324,7 @@ void GeneralSettings2::OnAccountCreate(wxCommandEvent& event)
Account account(dialog.GetPersistentId(), dialog.GetMiiName().ToStdWstring()); Account account(dialog.GetPersistentId(), dialog.GetMiiName().ToStdWstring());
account.Save(); account.Save();
Account::RefreshAccounts(); Account::RefreshAccounts();
const int index = m_active_account->Append(account.ToString(), new wxAccountData(account)); const int index = m_active_account->Append(account.ToString(), new wxAccountData(account));
// update ui // update ui
@ -1265,7 +1333,7 @@ void GeneralSettings2::OnAccountCreate(wxCommandEvent& event)
m_create_account->Enable(m_active_account->GetCount() < 0xC); m_create_account->Enable(m_active_account->GetCount() < 0xC);
m_delete_account->Enable(m_active_account->GetCount() > 1); m_delete_account->Enable(m_active_account->GetCount() > 1);
// send main window event // send main window event
wxASSERT(GetParent()); wxASSERT(GetParent());
wxCommandEvent refresh_event(wxEVT_ACCOUNTLIST_REFRESH); wxCommandEvent refresh_event(wxEVT_ACCOUNTLIST_REFRESH);
@ -1295,7 +1363,7 @@ void GeneralSettings2::OnAccountDelete(wxCommandEvent& event)
return; return;
// todo: ask if saves should be deleted too? // todo: ask if saves should be deleted too?
const fs::path path = account.GetFileName(); const fs::path path = account.GetFileName();
try try
{ {
@ -1313,7 +1381,7 @@ void GeneralSettings2::OnAccountDelete(wxCommandEvent& event)
SystemException sys(ex); SystemException sys(ex);
cemuLog_log(LogType::Force, sys.what()); cemuLog_log(LogType::Force, sys.what());
} }
} }
void GeneralSettings2::OnAccountSettingsChanged(wxPropertyGridEvent& event) void GeneralSettings2::OnAccountSettingsChanged(wxPropertyGridEvent& event)
@ -1368,7 +1436,7 @@ void GeneralSettings2::OnAccountSettingsChanged(wxPropertyGridEvent& event)
else if (property->GetName() == kPropertyEmail) else if (property->GetName() == kPropertyEmail)
{ {
account.SetEmail(value.As<wxString>().ToStdString()); account.SetEmail(value.As<wxString>().ToStdString());
} }
else if (property->GetName() == kPropertyCountry) else if (property->GetName() == kPropertyCountry)
{ {
@ -1376,7 +1444,7 @@ void GeneralSettings2::OnAccountSettingsChanged(wxPropertyGridEvent& event)
} }
else else
cemu_assert_debug(false); cemu_assert_debug(false);
account.Save(); account.Save();
Account::RefreshAccounts(); // refresh internal account list Account::RefreshAccounts(); // refresh internal account list
UpdateAccountInformation(); // refresh on invalid values UpdateAccountInformation(); // refresh on invalid values
@ -1416,7 +1484,7 @@ void GeneralSettings2::UpdateAccountInformation()
gender_property->SetChoiceSelection(std::min(gender_property->GetChoices().GetCount() - 1, (uint32)account.GetGender())); gender_property->SetChoiceSelection(std::min(gender_property->GetChoices().GetCount() - 1, (uint32)account.GetGender()));
m_account_grid->GetProperty(kPropertyEmail)->SetValueFromString(std::string{ account.GetEmail() }); m_account_grid->GetProperty(kPropertyEmail)->SetValueFromString(std::string{ account.GetEmail() });
auto* country_property = dynamic_cast<wxEnumProperty*>(m_account_grid->GetProperty(kPropertyCountry)); auto* country_property = dynamic_cast<wxEnumProperty*>(m_account_grid->GetProperty(kPropertyCountry));
wxASSERT(country_property); wxASSERT(country_property);
int index = (country_property)->GetIndexForValue(account.GetCountry()); int index = (country_property)->GetIndexForValue(account.GetCountry());
@ -1500,9 +1568,9 @@ void GeneralSettings2::HandleGraphicsApiSelection()
int selection = m_vsync->GetSelection(); int selection = m_vsync->GetSelection();
if(selection == wxNOT_FOUND) if(selection == wxNOT_FOUND)
selection = GetConfig().vsync; selection = GetConfig().vsync;
m_vsync->Clear(); m_vsync->Clear();
if(m_graphic_api->GetSelection() == 0) if (m_graphic_api->GetSelection() == 0)
{ {
// OpenGL // OpenGL
m_vsync->AppendString(_("Off")); m_vsync->AppendString(_("Off"));
@ -1517,12 +1585,14 @@ void GeneralSettings2::HandleGraphicsApiSelection()
m_gx2drawdone_sync->Enable(); m_gx2drawdone_sync->Enable();
m_async_compile->Disable(); m_async_compile->Disable();
m_force_mesh_shaders->Disable();
} }
else else if (m_graphic_api->GetSelection() == 1)
{ {
// Vulkan // Vulkan
m_gx2drawdone_sync->Disable(); m_gx2drawdone_sync->Disable();
m_async_compile->Enable(); m_async_compile->Enable();
m_force_mesh_shaders->Disable();
m_vsync->AppendString(_("Off")); m_vsync->AppendString(_("Off"));
m_vsync->AppendString(_("Double buffering")); m_vsync->AppendString(_("Double buffering"));
@ -1532,7 +1602,7 @@ void GeneralSettings2::HandleGraphicsApiSelection()
#endif #endif
m_vsync->Select(selection); m_vsync->Select(selection);
m_graphic_device->Enable(); m_graphic_device->Enable();
auto devices = VulkanRenderer::GetDevices(); auto devices = VulkanRenderer::GetDevices();
m_graphic_device->Clear(); m_graphic_device->Clear();
@ -1547,7 +1617,7 @@ void GeneralSettings2::HandleGraphicsApiSelection()
const auto& config = GetConfig(); const auto& config = GetConfig();
for(size_t i = 0; i < devices.size(); ++i) for(size_t i = 0; i < devices.size(); ++i)
{ {
if(config.graphic_device_uuid == devices[i].uuid) if(config.vk_graphic_device_uuid == devices[i].uuid)
{ {
m_graphic_device->SetSelection(i); m_graphic_device->SetSelection(i);
break; break;
@ -1555,6 +1625,42 @@ void GeneralSettings2::HandleGraphicsApiSelection()
} }
} }
} }
else
{
// Metal
m_gx2drawdone_sync->Disable();
m_async_compile->Enable();
m_force_mesh_shaders->Enable();
m_vsync->AppendString(_("Off"));
m_vsync->AppendString(_("On"));
m_vsync->Select(selection);
m_graphic_device->Enable();
auto devices = MetalRenderer::GetDevices();
m_graphic_device->Clear();
#if ENABLE_METAL
if(!devices.empty())
{
for (const auto& device : devices)
{
m_graphic_device->Append(device.name, new wxMetalUUID(device));
}
m_graphic_device->SetSelection(0);
const auto& config = GetConfig();
for (size_t i = 0; i < devices.size(); ++i)
{
if (config.mtl_graphic_device_uuid == devices[i].uuid)
{
m_graphic_device->SetSelection(i);
break;
}
}
}
#endif
}
} }
void GeneralSettings2::ApplyConfig() void GeneralSettings2::ApplyConfig()
@ -1608,6 +1714,7 @@ void GeneralSettings2::ApplyConfig()
m_vsync->SetSelection(config.vsync); m_vsync->SetSelection(config.vsync);
m_async_compile->SetValue(config.async_compile); m_async_compile->SetValue(config.async_compile);
m_gx2drawdone_sync->SetValue(config.gx2drawdone_sync); m_gx2drawdone_sync->SetValue(config.gx2drawdone_sync);
m_force_mesh_shaders->SetValue(config.force_mesh_shaders);
m_upscale_filter->SetSelection(config.upscale_filter); m_upscale_filter->SetSelection(config.upscale_filter);
m_downscale_filter->SetSelection(config.downscale_filter); m_downscale_filter->SetSelection(config.downscale_filter);
m_fullscreen_scaling->SetSelection(config.fullscreen_scaling); m_fullscreen_scaling->SetSelection(config.fullscreen_scaling);
@ -1658,7 +1765,7 @@ void GeneralSettings2::ApplyConfig()
m_pad_channels->SetSelection(0); m_pad_channels->SetSelection(0);
//m_input_channels->SetSelection(config.pad_channels); //m_input_channels->SetSelection(config.pad_channels);
m_input_channels->SetSelection(0); m_input_channels->SetSelection(0);
SendSliderEvent(m_tv_volume, config.tv_volume); SendSliderEvent(m_tv_volume, config.tv_volume);
if (!config.tv_device.empty() && m_tv_device->HasClientObjectData()) if (!config.tv_device.empty() && m_tv_device->HasClientObjectData())
@ -1675,7 +1782,7 @@ void GeneralSettings2::ApplyConfig()
} }
else else
m_tv_device->SetSelection(0); m_tv_device->SetSelection(0);
SendSliderEvent(m_pad_volume, config.pad_volume); SendSliderEvent(m_pad_volume, config.pad_volume);
if (!config.pad_device.empty() && m_pad_device->HasClientObjectData()) if (!config.pad_device.empty() && m_pad_device->HasClientObjectData())
{ {
@ -1728,6 +1835,8 @@ void GeneralSettings2::ApplyConfig()
// debug // debug
m_crash_dump->SetSelection((int)config.crash_dump.GetValue()); m_crash_dump->SetSelection((int)config.crash_dump.GetValue());
m_gdb_port->SetValue(config.gdb_port.GetValue()); m_gdb_port->SetValue(config.gdb_port.GetValue());
m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue()));
m_framebuffer_fetch->SetValue(config.framebuffer_fetch);
} }
void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event) void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event)
@ -1795,7 +1904,7 @@ void GeneralSettings2::UpdateAudioDevice()
} }
} }
} }
// pad audio device // pad audio device
{ {
const auto selection = m_pad_device->GetSelection(); const auto selection = m_pad_device->GetSelection();
@ -1885,14 +1994,14 @@ void GeneralSettings2::OnAudioChannelsSelected(wxCommandEvent& event)
{ {
if (config.tv_channels == (AudioChannels)obj->GetSelection()) if (config.tv_channels == (AudioChannels)obj->GetSelection())
return; return;
config.tv_channels = (AudioChannels)obj->GetSelection(); config.tv_channels = (AudioChannels)obj->GetSelection();
} }
else if (obj == m_pad_channels) else if (obj == m_pad_channels)
{ {
if (config.pad_channels == (AudioChannels)obj->GetSelection()) if (config.pad_channels == (AudioChannels)obj->GetSelection())
return; return;
config.pad_channels = (AudioChannels)obj->GetSelection(); config.pad_channels = (AudioChannels)obj->GetSelection();
} }
else else
@ -2035,23 +2144,23 @@ void GeneralSettings2::OnShowOnlineValidator(wxCommandEvent& event)
const auto selection = m_active_account->GetSelection(); const auto selection = m_active_account->GetSelection();
if (selection == wxNOT_FOUND) if (selection == wxNOT_FOUND)
return; return;
const auto* obj = dynamic_cast<wxAccountData*>(m_active_account->GetClientObject(selection)); const auto* obj = dynamic_cast<wxAccountData*>(m_active_account->GetClientObject(selection));
wxASSERT(obj); wxASSERT(obj);
const auto& account = obj->GetAccount(); const auto& account = obj->GetAccount();
const auto validator = account.ValidateOnlineFiles(); const auto validator = account.ValidateOnlineFiles();
if (validator) // everything valid? shouldn't happen if (validator) // everything valid? shouldn't happen
return; return;
wxString err; wxString err;
err << _("The following error(s) have been found:") << '\n'; err << _("The following error(s) have been found:") << '\n';
if (validator.otp == OnlineValidator::FileState::Missing) if (validator.otp == OnlineValidator::FileState::Missing)
err << _("otp.bin missing in Cemu directory") << '\n'; err << _("otp.bin missing in Cemu directory") << '\n';
else if(validator.otp == OnlineValidator::FileState::Corrupted) else if(validator.otp == OnlineValidator::FileState::Corrupted)
err << _("otp.bin is invalid") << '\n'; err << _("otp.bin is invalid") << '\n';
if (validator.seeprom == OnlineValidator::FileState::Missing) if (validator.seeprom == OnlineValidator::FileState::Missing)
err << _("seeprom.bin missing in Cemu directory") << '\n'; err << _("seeprom.bin missing in Cemu directory") << '\n';
else if(validator.seeprom == OnlineValidator::FileState::Corrupted) else if(validator.seeprom == OnlineValidator::FileState::Corrupted)

View file

@ -28,7 +28,7 @@ private:
bool m_has_account_change = false; // keep track of dirty state of accounts bool m_has_account_change = false; // keep track of dirty state of accounts
wxPanel* AddGeneralPage(wxNotebook* notebook); wxPanel* AddGeneralPage(wxNotebook* notebook);
wxPanel* AddGraphicsPage(wxNotebook* notebook); wxPanel* AddGraphicsPage(wxNotebook* notebook);
wxPanel* AddAudioPage(wxNotebook* notebook); wxPanel* AddAudioPage(wxNotebook* notebook);
@ -53,7 +53,7 @@ private:
// Graphics // Graphics
wxChoice* m_graphic_api, * m_graphic_device; wxChoice* m_graphic_api, * m_graphic_device;
wxChoice* m_vsync; wxChoice* m_vsync;
wxCheckBox *m_async_compile, *m_gx2drawdone_sync; wxCheckBox *m_async_compile, *m_gx2drawdone_sync, *m_force_mesh_shaders;
wxRadioBox* m_upscale_filter, *m_downscale_filter, *m_fullscreen_scaling; wxRadioBox* m_upscale_filter, *m_downscale_filter, *m_fullscreen_scaling;
wxChoice* m_overlay_position, *m_notification_position, *m_overlay_scale, *m_notification_scale; wxChoice* m_overlay_position, *m_notification_position, *m_overlay_scale, *m_notification_scale;
wxCheckBox* m_controller_profile_name, *m_controller_low_battery, *m_shader_compiling, *m_friends_data; wxCheckBox* m_controller_profile_name, *m_controller_low_battery, *m_shader_compiling, *m_friends_data;
@ -79,6 +79,8 @@ private:
// Debug // Debug
wxChoice* m_crash_dump; wxChoice* m_crash_dump;
wxSpinCtrl* m_gdb_port; wxSpinCtrl* m_gdb_port;
wxTextCtrl* m_gpu_capture_dir;
wxCheckBox* m_framebuffer_fetch;
void OnAccountCreate(wxCommandEvent& event); void OnAccountCreate(wxCommandEvent& event);
void OnAccountDelete(wxCommandEvent& event); void OnAccountDelete(wxCommandEvent& event);
@ -107,11 +109,10 @@ private:
void UpdateAudioDevice(); void UpdateAudioDevice();
// refreshes audio device list for dropdown // refreshes audio device list for dropdown
void UpdateAudioDeviceList(); void UpdateAudioDeviceList();
void ResetAccountInformation(); void ResetAccountInformation();
void UpdateAccountInformation(); void UpdateAccountInformation();
void UpdateOnlineAccounts(); void UpdateOnlineAccounts();
void HandleGraphicsApiSelection(); void HandleGraphicsApiSelection();
void ApplyConfig(); void ApplyConfig();
}; };

View file

@ -21,7 +21,7 @@ LoggingWindow::LoggingWindow(wxFrame* parent)
filter_row->Add(new wxStaticText( this, wxID_ANY, _("Filter")), 0, wxALIGN_CENTER_VERTICAL|wxALL, 5 ); filter_row->Add(new wxStaticText( this, wxID_ANY, _("Filter")), 0, wxALIGN_CENTER_VERTICAL|wxALL, 5 );
wxString choices[] = {"Unsupported APIs calls", "Coreinit Logging", "Coreinit File-Access", "Coreinit Thread-Synchronization", "Coreinit Memory", "Coreinit MP", "Coreinit Thread", "nn::nfp", "GX2", "Audio", "Input", "Socket", "Save", "H264", "Graphic pack patches", "Texture cache", "Texture readback", "OpenGL debug output", "Vulkan validation layer"}; wxString choices[] = {"Unsupported APIs calls", "Coreinit Logging", "Coreinit File-Access", "Coreinit Thread-Synchronization", "Coreinit Memory", "Coreinit MP", "Coreinit Thread", "nn::nfp", "GX2", "Audio", "Input", "Socket", "Save", "H264", "Graphic pack patches", "Texture cache", "Texture readback", "OpenGL debug output", "Vulkan validation layer", "Metal debug output"};
m_filter = new wxComboBox( this, wxID_ANY, wxEmptyString, wxDefaultPosition, wxDefaultSize, std::size(choices), choices, 0 ); m_filter = new wxComboBox( this, wxID_ANY, wxEmptyString, wxDefaultPosition, wxDefaultSize, std::size(choices), choices, 0 );
m_filter->Bind(wxEVT_COMBOBOX, &LoggingWindow::OnFilterChange, this); m_filter->Bind(wxEVT_COMBOBOX, &LoggingWindow::OnFilterChange, this);
m_filter->Bind(wxEVT_TEXT, &LoggingWindow::OnFilterChange, this); m_filter->Bind(wxEVT_TEXT, &LoggingWindow::OnFilterChange, this);
@ -83,7 +83,7 @@ void LoggingWindow::Log(std::string_view filter, std::wstring_view message)
void LoggingWindow::OnLogMessage(wxLogEvent& event) void LoggingWindow::OnLogMessage(wxLogEvent& event)
{ {
m_log_list->PushEntry(event.GetFilter(), event.GetMessage()); m_log_list->PushEntry(event.GetFilter(), event.GetMessage());
} }
void LoggingWindow::OnFilterChange(wxCommandEvent& event) void LoggingWindow::OnFilterChange(wxCommandEvent& event)
@ -97,4 +97,3 @@ void LoggingWindow::OnFilterMessageChange(wxCommandEvent& event)
m_log_list->SetFilterMessage(m_filter_message->GetValue()); m_log_list->SetFilterMessage(m_filter_message->GetValue());
event.Skip(); event.Skip();
} }

View file

@ -1,3 +1,5 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "gui/wxgui.h" #include "gui/wxgui.h"
#include "gui/MainWindow.h" #include "gui/MainWindow.h"
#include "gui/guiWrapper.h" #include "gui/guiWrapper.h"
@ -12,6 +14,7 @@
#include "audio/audioDebuggerWindow.h" #include "audio/audioDebuggerWindow.h"
#include "gui/canvas/OpenGLCanvas.h" #include "gui/canvas/OpenGLCanvas.h"
#include "gui/canvas/VulkanCanvas.h" #include "gui/canvas/VulkanCanvas.h"
#include "gui/canvas/MetalCanvas.h"
#include "Cafe/OS/libs/nfc/nfc.h" #include "Cafe/OS/libs/nfc/nfc.h"
#include "Cafe/OS/libs/swkbd/swkbd.h" #include "Cafe/OS/libs/swkbd/swkbd.h"
#include "gui/debugger/DebuggerWindow2.h" #include "gui/debugger/DebuggerWindow2.h"
@ -94,7 +97,7 @@ enum
// options -> account // options -> account
MAINFRAME_MENU_ID_OPTIONS_ACCOUNT_1 = 20350, MAINFRAME_MENU_ID_OPTIONS_ACCOUNT_1 = 20350,
MAINFRAME_MENU_ID_OPTIONS_ACCOUNT_12 = 20350 + 11, MAINFRAME_MENU_ID_OPTIONS_ACCOUNT_12 = 20350 + 11,
// options -> system language // options -> system language
MAINFRAME_MENU_ID_OPTIONS_LANGUAGE_JAPANESE = 20500, MAINFRAME_MENU_ID_OPTIONS_LANGUAGE_JAPANESE = 20500,
MAINFRAME_MENU_ID_OPTIONS_LANGUAGE_ENGLISH, MAINFRAME_MENU_ID_OPTIONS_LANGUAGE_ENGLISH,
@ -137,6 +140,7 @@ enum
MAINFRAME_MENU_ID_DEBUG_VIEW_TEXTURE_RELATIONS, MAINFRAME_MENU_ID_DEBUG_VIEW_TEXTURE_RELATIONS,
MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY, MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY,
MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS,
MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE,
// debug->logging // debug->logging
MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500, MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500,
@ -215,6 +219,7 @@ EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, MainWindow::OnDebugSetting) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY, MainWindow::OnDebugSetting) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, MainWindow::OnDebugSetting) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_RAM, MainWindow::OnDebugSetting) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_RAM, MainWindow::OnDebugSetting)
EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_FST, MainWindow::OnDebugSetting) EVT_MENU(MAINFRAME_MENU_ID_DEBUG_DUMP_FST, MainWindow::OnDebugSetting)
// debug -> View ... // debug -> View ...
@ -247,7 +252,7 @@ public:
{ {
if(!m_window->IsGameLaunched() && filenames.GetCount() == 1) if(!m_window->IsGameLaunched() && filenames.GetCount() == 1)
return m_window->FileLoad(_utf8ToPath(filenames[0].utf8_string()), wxLaunchGameEvent::INITIATED_BY::DRAG_AND_DROP); return m_window->FileLoad(_utf8ToPath(filenames[0].utf8_string()), wxLaunchGameEvent::INITIATED_BY::DRAG_AND_DROP);
return false; return false;
} }
@ -459,7 +464,7 @@ bool MainWindow::InstallUpdate(const fs::path& metaFilePath)
{ {
throw std::runtime_error(frame.GetExceptionMessage()); throw std::runtime_error(frame.GetExceptionMessage());
} }
} }
} }
catch(const AbortException&) catch(const AbortException&)
{ {
@ -643,13 +648,13 @@ void MainWindow::OnFileMenu(wxCommandEvent& event)
_("Wii U executable (*.rpx, *.elf)"), _("Wii U executable (*.rpx, *.elf)"),
_("All files (*.*)") _("All files (*.*)")
); );
wxFileDialog openFileDialog(this, _("Open file to launch"), wxEmptyString, wxEmptyString, wildcard, wxFD_OPEN | wxFD_FILE_MUST_EXIST); wxFileDialog openFileDialog(this, _("Open file to launch"), wxEmptyString, wxEmptyString, wildcard, wxFD_OPEN | wxFD_FILE_MUST_EXIST);
if (openFileDialog.ShowModal() == wxID_CANCEL || openFileDialog.GetPath().IsEmpty()) if (openFileDialog.ShowModal() == wxID_CANCEL || openFileDialog.GetPath().IsEmpty())
return; return;
const wxString wxStrFilePath = openFileDialog.GetPath(); const wxString wxStrFilePath = openFileDialog.GetPath();
FileLoad(_utf8ToPath(wxStrFilePath.utf8_string()), wxLaunchGameEvent::INITIATED_BY::MENU); FileLoad(_utf8ToPath(wxStrFilePath.utf8_string()), wxLaunchGameEvent::INITIATED_BY::MENU);
} }
else if (menuId >= MAINFRAME_MENU_ID_FILE_RECENT_0 && menuId <= MAINFRAME_MENU_ID_FILE_RECENT_LAST) else if (menuId >= MAINFRAME_MENU_ID_FILE_RECENT_0 && menuId <= MAINFRAME_MENU_ID_FILE_RECENT_LAST)
@ -793,7 +798,7 @@ void MainWindow::TogglePadView()
{ {
if (m_padView) if (m_padView)
return; return;
m_padView = new PadViewFrame(this); m_padView = new PadViewFrame(this);
m_padView->Bind(wxEVT_CLOSE_WINDOW, &MainWindow::OnPadClose, this); m_padView->Bind(wxEVT_CLOSE_WINDOW, &MainWindow::OnPadClose, this);
@ -1001,7 +1006,7 @@ void MainWindow::OnConsoleLanguage(wxCommandEvent& event)
// GetConfig().cpu_mode = CPUMode::TriplecoreRecompiler; // GetConfig().cpu_mode = CPUMode::TriplecoreRecompiler;
// else // else
// cemu_assert_debug(false); // cemu_assert_debug(false);
// //
// g_config.Save(); // g_config.Save();
//} //}
@ -1015,6 +1020,14 @@ void MainWindow::OnDebugSetting(wxCommandEvent& event)
if(!GetConfig().vk_accurate_barriers) if(!GetConfig().vk_accurate_barriers)
wxMessageBox(_("Warning: Disabling the accurate barriers option will lead to flickering graphics but may improve performance. It is highly recommended to leave it turned on."), _("Accurate barriers are off"), wxOK); wxMessageBox(_("Warning: Disabling the accurate barriers option will lead to flickering graphics but may improve performance. It is highly recommended to leave it turned on."), _("Accurate barriers are off"), wxOK);
} }
else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE)
{
cemu_assert_debug(g_renderer->GetType() == RendererAPI::Metal);
#if ENABLE_METAL
static_cast<MetalRenderer*>(g_renderer.get())->CaptureFrame();
#endif
}
else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY) else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_AUDIO_AUX_ONLY)
ActiveSettings::EnableAudioOnlyAux(event.IsChecked()); ActiveSettings::EnableAudioOnlyAux(event.IsChecked());
else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_DUMP_RAM) else if (event.GetId() == MAINFRAME_MENU_ID_DEBUG_DUMP_RAM)
@ -1065,7 +1078,7 @@ void MainWindow::OnDebugSetting(wxCommandEvent& event)
ActiveSettings::SetTimerShiftFactor(6); ActiveSettings::SetTimerShiftFactor(6);
else else
cemu_assert_debug(false); cemu_assert_debug(false);
g_config.Save(); g_config.Save();
} }
@ -1137,7 +1150,7 @@ void MainWindow::OnLoggingWindow(wxCommandEvent& event)
return; return;
m_logging_window = new LoggingWindow(this); m_logging_window = new LoggingWindow(this);
m_logging_window->Bind(wxEVT_CLOSE_WINDOW, m_logging_window->Bind(wxEVT_CLOSE_WINDOW,
[this](wxCloseEvent& event) { [this](wxCloseEvent& event) {
m_logging_window = nullptr; m_logging_window = nullptr;
event.Skip(); event.Skip();
@ -1312,7 +1325,7 @@ void MainWindow::SaveSettings()
{ {
auto lock = g_config.Lock(); auto lock = g_config.Lock();
auto& config = GetConfig(); auto& config = GetConfig();
if (config.window_position != Vector2i{ -1,-1 }) if (config.window_position != Vector2i{ -1,-1 })
{ {
config.window_position.x = m_restored_position.x; config.window_position.x = m_restored_position.x;
@ -1349,7 +1362,7 @@ void MainWindow::SaveSettings()
if(m_game_list) if(m_game_list)
m_game_list->SaveConfig(); m_game_list->SaveConfig();
g_config.Save(); g_config.Save();
} }
@ -1379,14 +1392,14 @@ void MainWindow::OnMouseMove(wxMouseEvent& event)
void MainWindow::OnMouseLeft(wxMouseEvent& event) void MainWindow::OnMouseLeft(wxMouseEvent& event)
{ {
auto& instance = InputManager::instance(); auto& instance = InputManager::instance();
std::scoped_lock lock(instance.m_main_mouse.m_mutex); std::scoped_lock lock(instance.m_main_mouse.m_mutex);
instance.m_main_mouse.left_down = event.ButtonDown(wxMOUSE_BTN_LEFT); instance.m_main_mouse.left_down = event.ButtonDown(wxMOUSE_BTN_LEFT);
auto physPos = ToPhys(event.GetPosition()); auto physPos = ToPhys(event.GetPosition());
instance.m_main_mouse.position = { physPos.x, physPos.y }; instance.m_main_mouse.position = { physPos.x, physPos.y };
if (event.ButtonDown(wxMOUSE_BTN_LEFT)) if (event.ButtonDown(wxMOUSE_BTN_LEFT))
instance.m_main_mouse.left_down_toggle = true; instance.m_main_mouse.left_down_toggle = true;
event.Skip(); event.Skip();
} }
@ -1400,7 +1413,7 @@ void MainWindow::OnMouseRight(wxMouseEvent& event)
instance.m_main_mouse.position = { physPos.x, physPos.y }; instance.m_main_mouse.position = { physPos.x, physPos.y };
if(event.ButtonDown(wxMOUSE_BTN_RIGHT)) if(event.ButtonDown(wxMOUSE_BTN_RIGHT))
instance.m_main_mouse.right_down_toggle = true; instance.m_main_mouse.right_down_toggle = true;
event.Skip(); event.Skip();
} }
@ -1448,7 +1461,7 @@ void MainWindow::OnKeyUp(wxKeyEvent& event)
void MainWindow::OnKeyDown(wxKeyEvent& event) void MainWindow::OnKeyDown(wxKeyEvent& event)
{ {
if ((event.AltDown() && event.GetKeyCode() == WXK_F4) || if ((event.AltDown() && event.GetKeyCode() == WXK_F4) ||
(event.CmdDown() && event.GetKeyCode() == 'Q')) (event.CmdDown() && event.GetKeyCode() == 'Q'))
{ {
Close(true); Close(true);
@ -1463,7 +1476,7 @@ void MainWindow::OnChar(wxKeyEvent& event)
{ {
if (swkbd_hasKeyboardInputHook()) if (swkbd_hasKeyboardInputHook())
swkbd_keyInput(event.GetUnicodeKey()); swkbd_keyInput(event.GetUnicodeKey());
// event.Skip(); // event.Skip();
} }
@ -1488,7 +1501,7 @@ void MainWindow::OnToolsInput(wxCommandEvent& event)
case MAINFRAME_MENU_ID_TOOLS_DOWNLOAD_MANAGER: case MAINFRAME_MENU_ID_TOOLS_DOWNLOAD_MANAGER:
{ {
const auto default_tab = id == MAINFRAME_MENU_ID_TOOLS_TITLE_MANAGER ? TitleManagerPage::TitleManager : TitleManagerPage::DownloadManager; const auto default_tab = id == MAINFRAME_MENU_ID_TOOLS_TITLE_MANAGER ? TitleManagerPage::TitleManager : TitleManagerPage::DownloadManager;
if (m_title_manager) if (m_title_manager)
m_title_manager->SetFocusAndTab(default_tab); m_title_manager->SetFocusAndTab(default_tab);
else else
@ -1538,7 +1551,7 @@ void MainWindow::OnGesturePan(wxPanGestureEvent& event)
instance.m_main_touch.left_down = event.IsGestureStart() || !event.IsGestureEnd(); instance.m_main_touch.left_down = event.IsGestureStart() || !event.IsGestureEnd();
if (event.IsGestureStart() || !event.IsGestureEnd()) if (event.IsGestureStart() || !event.IsGestureEnd())
instance.m_main_touch.left_down_toggle = true; instance.m_main_touch.left_down_toggle = true;
event.Skip(); event.Skip();
} }
@ -1572,8 +1585,12 @@ void MainWindow::CreateCanvas()
// create canvas // create canvas
if (ActiveSettings::GetGraphicsAPI() == kVulkan) if (ActiveSettings::GetGraphicsAPI() == kVulkan)
m_render_canvas = new VulkanCanvas(m_game_panel, wxSize(1280, 720), true); m_render_canvas = new VulkanCanvas(m_game_panel, wxSize(1280, 720), true);
else else if (ActiveSettings::GetGraphicsAPI() == kOpenGL)
m_render_canvas = GLCanvas_Create(m_game_panel, wxSize(1280, 720), true); m_render_canvas = GLCanvas_Create(m_game_panel, wxSize(1280, 720), true);
#if ENABLE_METAL
else
m_render_canvas = new MetalCanvas(m_game_panel, wxSize(1280, 720), true);
#endif
// mouse events // mouse events
m_render_canvas->Bind(wxEVT_MOTION, &MainWindow::OnMouseMove, this); m_render_canvas->Bind(wxEVT_MOTION, &MainWindow::OnMouseMove, this);
@ -1753,10 +1770,10 @@ void MainWindow::UpdateNFCMenu()
const auto& entry = config.recent_nfc_files[i]; const auto& entry = config.recent_nfc_files[i];
if (entry.empty()) if (entry.empty())
continue; continue;
if (!fs::exists(_utf8ToPath(entry))) if (!fs::exists(_utf8ToPath(entry)))
continue; continue;
if (recentFileIndex == 0) if (recentFileIndex == 0)
m_nfcMenuSeparator0 = m_nfcMenu->AppendSeparator(); m_nfcMenuSeparator0 = m_nfcMenu->AppendSeparator();
@ -1807,7 +1824,7 @@ void MainWindow::OnTimer(wxTimerEvent& event)
{ {
ShowCursor(false); ShowCursor(false);
} }
} }
#define BUILD_DATE __DATE__ " " __TIME__ #define BUILD_DATE __DATE__ " " __TIME__
@ -2066,9 +2083,9 @@ void MainWindow::RecreateMenu()
m_menuBar->Destroy(); m_menuBar->Destroy();
m_menuBar = nullptr; m_menuBar = nullptr;
} }
auto& config = GetConfig(); auto& config = GetConfig();
m_menuBar = new wxMenuBar(); m_menuBar = new wxMenuBar();
// file submenu // file submenu
m_fileMenu = new wxMenu(); m_fileMenu = new wxMenu();
@ -2121,7 +2138,7 @@ void MainWindow::RecreateMenu()
item->Check(account_id == account.GetPersistentId()); item->Check(account_id == account.GetPersistentId());
if (m_game_launched || LaunchSettings::GetPersistentId().has_value()) if (m_game_launched || LaunchSettings::GetPersistentId().has_value())
item->Enable(false); item->Enable(false);
++index; ++index;
} }
@ -2151,8 +2168,8 @@ void MainWindow::RecreateMenu()
// options submenu // options submenu
wxMenu* optionsMenu = new wxMenu(); wxMenu* optionsMenu = new wxMenu();
m_fullscreenMenuItem = optionsMenu->AppendCheckItem(MAINFRAME_MENU_ID_OPTIONS_FULLSCREEN, _("&Fullscreen"), wxEmptyString); m_fullscreenMenuItem = optionsMenu->AppendCheckItem(MAINFRAME_MENU_ID_OPTIONS_FULLSCREEN, _("&Fullscreen"), wxEmptyString);
m_fullscreenMenuItem->Check(ActiveSettings::FullscreenEnabled()); m_fullscreenMenuItem->Check(ActiveSettings::FullscreenEnabled());
optionsMenu->Append(MAINFRAME_MENU_ID_OPTIONS_GRAPHIC_PACKS2, _("&Graphic packs")); optionsMenu->Append(MAINFRAME_MENU_ID_OPTIONS_GRAPHIC_PACKS2, _("&Graphic packs"));
m_padViewMenuItem = optionsMenu->AppendCheckItem(MAINFRAME_MENU_ID_OPTIONS_SECOND_WINDOW_PADVIEW, _("&Separate GamePad view"), wxEmptyString); m_padViewMenuItem = optionsMenu->AppendCheckItem(MAINFRAME_MENU_ID_OPTIONS_SECOND_WINDOW_PADVIEW, _("&Separate GamePad view"), wxEmptyString);
m_padViewMenuItem->Check(GetConfig().pad_open); m_padViewMenuItem->Check(GetConfig().pad_open);
@ -2247,7 +2264,7 @@ void MainWindow::RecreateMenu()
debugMenu->AppendSubMenu(debugLoggingMenu, _("&Logging")); debugMenu->AppendSubMenu(debugLoggingMenu, _("&Logging"));
debugMenu->AppendSubMenu(debugDumpMenu, _("&Dump")); debugMenu->AppendSubMenu(debugDumpMenu, _("&Dump"));
debugMenu->AppendSeparator(); debugMenu->AppendSeparator();
auto upsidedownItem = debugMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, _("&Render upside-down"), wxEmptyString); auto upsidedownItem = debugMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_RENDER_UPSIDE_DOWN, _("&Render upside-down"), wxEmptyString);
upsidedownItem->Check(ActiveSettings::RenderUpsideDownEnabled()); upsidedownItem->Check(ActiveSettings::RenderUpsideDownEnabled());
if(LaunchSettings::RenderUpsideDownEnabled().has_value()) if(LaunchSettings::RenderUpsideDownEnabled().has_value())
@ -2256,6 +2273,9 @@ void MainWindow::RecreateMenu()
auto accurateBarriers = debugMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, _("&Accurate barriers (Vulkan)"), wxEmptyString); auto accurateBarriers = debugMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, _("&Accurate barriers (Vulkan)"), wxEmptyString);
accurateBarriers->Check(GetConfig().vk_accurate_barriers); accurateBarriers->Check(GetConfig().vk_accurate_barriers);
auto gpuCapture = debugMenu->Append(MAINFRAME_MENU_ID_DEBUG_GPU_CAPTURE, _("&GPU capture (Metal)"));
gpuCapture->Enable(m_game_launched && g_renderer->GetType() == RendererAPI::Metal);
debugMenu->AppendSeparator(); debugMenu->AppendSeparator();
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT

View file

@ -8,6 +8,7 @@
#include "Cafe/OS/libs/swkbd/swkbd.h" #include "Cafe/OS/libs/swkbd/swkbd.h"
#include "gui/canvas/OpenGLCanvas.h" #include "gui/canvas/OpenGLCanvas.h"
#include "gui/canvas/VulkanCanvas.h" #include "gui/canvas/VulkanCanvas.h"
#include "gui/canvas/MetalCanvas.h"
#include "config/CemuConfig.h" #include "config/CemuConfig.h"
#include "gui/MainWindow.h" #include "gui/MainWindow.h"
#include "gui/helpers/wxHelpers.h" #include "gui/helpers/wxHelpers.h"
@ -74,8 +75,12 @@ void PadViewFrame::InitializeRenderCanvas()
{ {
if (ActiveSettings::GetGraphicsAPI() == kVulkan) if (ActiveSettings::GetGraphicsAPI() == kVulkan)
m_render_canvas = new VulkanCanvas(this, wxSize(854, 480), false); m_render_canvas = new VulkanCanvas(this, wxSize(854, 480), false);
else else if (ActiveSettings::GetGraphicsAPI() == kOpenGL)
m_render_canvas = GLCanvas_Create(this, wxSize(854, 480), false); m_render_canvas = GLCanvas_Create(this, wxSize(854, 480), false);
#if ENABLE_METAL
else
m_render_canvas = new MetalCanvas(this, wxSize(854, 480), false);
#endif
sizer->Add(m_render_canvas, 1, wxEXPAND, 0, nullptr); sizer->Add(m_render_canvas, 1, wxEXPAND, 0, nullptr);
} }
SetSizer(sizer); SetSizer(sizer);
@ -173,7 +178,7 @@ void PadViewFrame::OnChar(wxKeyEvent& event)
{ {
if (swkbd_hasKeyboardInputHook()) if (swkbd_hasKeyboardInputHook())
swkbd_keyInput(event.GetUnicodeKey()); swkbd_keyInput(event.GetUnicodeKey());
event.Skip(); event.Skip();
} }
@ -198,7 +203,7 @@ void PadViewFrame::OnMouseLeft(wxMouseEvent& event)
instance.m_pad_mouse.position = { physPos.x, physPos.y }; instance.m_pad_mouse.position = { physPos.x, physPos.y };
if (event.ButtonDown(wxMOUSE_BTN_LEFT)) if (event.ButtonDown(wxMOUSE_BTN_LEFT))
instance.m_pad_mouse.left_down_toggle = true; instance.m_pad_mouse.left_down_toggle = true;
} }
void PadViewFrame::OnMouseRight(wxMouseEvent& event) void PadViewFrame::OnMouseRight(wxMouseEvent& event)

Some files were not shown because too many files have changed in this diff Show more