diff --git a/src/common/input.h b/src/common/input.h index fc14fd7..d27b1d7 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -292,9 +292,6 @@ class InputDevice { public: virtual ~InputDevice() = default; - // Request input device to update if necessary - virtual void SoftUpdate() {} - // Force input device to update data regardless of the current state virtual void ForceUpdate() {} diff --git a/src/common/settings.h b/src/common/settings.h index 9eb3711..a457e3f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -415,7 +415,7 @@ struct Values { // Renderer SwitchableSetting renderer_backend{ RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; - SwitchableSetting renderer_force_max_clock{true, "force_max_clock"}; + SwitchableSetting renderer_force_max_clock{false, "force_max_clock"}; Setting renderer_debug{false, "debug"}; Setting renderer_shader_feedback{false, "shader_feedback"}; Setting enable_nsight_aftermath{false, "nsight_aftermath"}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 0252c8c..5afdeb5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -226,6 +226,7 @@ add_library(core STATIC hle/kernel/k_page_buffer.h hle/kernel/k_page_heap.cpp hle/kernel/k_page_heap.h + hle/kernel/k_page_group.cpp hle/kernel/k_page_group.h hle/kernel/k_page_table.cpp hle/kernel/k_page_table.h diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index 71364c3..7a01f3f 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -1434,16 +1434,6 @@ AnalogSticks EmulatedController::GetSticks() const { return {}; } - // Some drivers like stick from buttons need constant refreshing - for (auto& device : stick_devices) { - if (!device) { - continue; - } - lock.unlock(); - device->SoftUpdate(); - lock.lock(); - } - return controller.analog_stick_state; } diff --git a/src/core/hle/kernel/k_code_memory.cpp b/src/core/hle/kernel/k_code_memory.cpp index 4b1c134..d9da1e6 100644 --- a/src/core/hle/kernel/k_code_memory.cpp +++ b/src/core/hle/kernel/k_code_memory.cpp @@ -27,13 +27,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si auto& page_table = m_owner->PageTable(); // Construct the page group. - m_page_group = {}; + m_page_group.emplace(kernel, page_table.GetBlockInfoManager()); // Lock the memory. - R_TRY(page_table.LockForCodeMemory(&m_page_group, addr, size)) + R_TRY(page_table.LockForCodeMemory(std::addressof(*m_page_group), addr, size)) // Clear the memory. - for (const auto& block : m_page_group.Nodes()) { + for (const auto& block : *m_page_group) { std::memset(device_memory.GetPointer(block.GetAddress()), 0xFF, block.GetSize()); } @@ -51,12 +51,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si void KCodeMemory::Finalize() { // Unlock. if (!m_is_mapped && !m_is_owner_mapped) { - const size_t size = m_page_group.GetNumPages() * PageSize; - m_owner->PageTable().UnlockForCodeMemory(m_address, size, m_page_group); + const size_t size = m_page_group->GetNumPages() * PageSize; + m_owner->PageTable().UnlockForCodeMemory(m_address, size, *m_page_group); } // Close the page group. - m_page_group = {}; + m_page_group->Close(); + m_page_group->Finalize(); // Close our reference to our owner. m_owner->Close(); @@ -64,7 +65,7 @@ void KCodeMemory::Finalize() { Result KCodeMemory::Map(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); @@ -74,7 +75,7 @@ Result KCodeMemory::Map(VAddr address, size_t size) { // Map the memory. R_TRY(kernel.CurrentProcess()->PageTable().MapPages( - address, m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite)); + address, *m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite)); // Mark ourselves as mapped. m_is_mapped = true; @@ -84,13 +85,13 @@ Result KCodeMemory::Map(VAddr address, size_t size) { Result KCodeMemory::Unmap(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); // Unmap the memory. - R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, m_page_group, + R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, *m_page_group, KMemoryState::CodeOut)); // Mark ourselves as unmapped. @@ -101,7 +102,7 @@ Result KCodeMemory::Unmap(VAddr address, size_t size) { Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); @@ -125,7 +126,7 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission // Map the memory. R_TRY( - m_owner->PageTable().MapPages(address, m_page_group, KMemoryState::GeneratedCode, k_perm)); + m_owner->PageTable().MapPages(address, *m_page_group, KMemoryState::GeneratedCode, k_perm)); // Mark ourselves as mapped. m_is_owner_mapped = true; @@ -135,13 +136,13 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); // Unmap the memory. - R_TRY(m_owner->PageTable().UnmapPages(address, m_page_group, KMemoryState::GeneratedCode)); + R_TRY(m_owner->PageTable().UnmapPages(address, *m_page_group, KMemoryState::GeneratedCode)); // Mark ourselves as unmapped. m_is_owner_mapped = false; diff --git a/src/core/hle/kernel/k_code_memory.h b/src/core/hle/kernel/k_code_memory.h index 2e7e143..5b260b3 100644 --- a/src/core/hle/kernel/k_code_memory.h +++ b/src/core/hle/kernel/k_code_memory.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/common_types.h" #include "core/device_memory.h" #include "core/hle/kernel/k_auto_object.h" @@ -49,11 +51,11 @@ public: return m_address; } size_t GetSize() const { - return m_is_initialized ? m_page_group.GetNumPages() * PageSize : 0; + return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0; } private: - KPageGroup m_page_group{}; + std::optional m_page_group{}; KProcess* m_owner{}; VAddr m_address{}; KLightLock m_lock; diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index bd33571..cd6ea38 100644 --- a/src/core/hle/kernel/k_memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -223,7 +223,7 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages, // Ensure that we don't leave anything un-freed. ON_RESULT_FAILURE { - for (const auto& it : out->Nodes()) { + for (const auto& it : *out) { auto& manager = this->GetManager(it.GetAddress()); const size_t node_num_pages = std::min( it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize); @@ -285,7 +285,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op m_has_optimized_process[static_cast(pool)], true)); // Open the first reference to the pages. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { PAddr cur_address = block.GetAddress(); size_t remaining_pages = block.GetNumPages(); while (remaining_pages > 0) { @@ -335,7 +335,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Perform optimized memory tracking, if we should. if (optimized) { // Iterate over the allocated blocks. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { // Get the block extents. const PAddr block_address = block.GetAddress(); const size_t block_pages = block.GetNumPages(); @@ -391,7 +391,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 } } else { // Set all the allocated memory. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { std::memset(m_system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern, block.GetSize()); } diff --git a/src/core/hle/kernel/k_page_group.cpp b/src/core/hle/kernel/k_page_group.cpp new file mode 100644 index 0000000..d8c644a --- /dev/null +++ b/src/core/hle/kernel/k_page_group.cpp @@ -0,0 +1,121 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/hle/kernel/k_dynamic_resource_manager.h" +#include "core/hle/kernel/k_memory_manager.h" +#include "core/hle/kernel/k_page_group.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/svc_results.h" + +namespace Kernel { + +void KPageGroup::Finalize() { + KBlockInfo* cur = m_first_block; + while (cur != nullptr) { + KBlockInfo* next = cur->GetNext(); + m_manager->Free(cur); + cur = next; + } + + m_first_block = nullptr; + m_last_block = nullptr; +} + +void KPageGroup::CloseAndReset() { + auto& mm = m_kernel.MemoryManager(); + + KBlockInfo* cur = m_first_block; + while (cur != nullptr) { + KBlockInfo* next = cur->GetNext(); + mm.Close(cur->GetAddress(), cur->GetNumPages()); + m_manager->Free(cur); + cur = next; + } + + m_first_block = nullptr; + m_last_block = nullptr; +} + +size_t KPageGroup::GetNumPages() const { + size_t num_pages = 0; + + for (const auto& it : *this) { + num_pages += it.GetNumPages(); + } + + return num_pages; +} + +Result KPageGroup::AddBlock(KPhysicalAddress addr, size_t num_pages) { + // Succeed immediately if we're adding no pages. + R_SUCCEED_IF(num_pages == 0); + + // Check for overflow. + ASSERT(addr < addr + num_pages * PageSize); + + // Try to just append to the last block. + if (m_last_block != nullptr) { + R_SUCCEED_IF(m_last_block->TryConcatenate(addr, num_pages)); + } + + // Allocate a new block. + KBlockInfo* new_block = m_manager->Allocate(); + R_UNLESS(new_block != nullptr, ResultOutOfResource); + + // Initialize the block. + new_block->Initialize(addr, num_pages); + + // Add the block to our list. + if (m_last_block != nullptr) { + m_last_block->SetNext(new_block); + } else { + m_first_block = new_block; + } + m_last_block = new_block; + + R_SUCCEED(); +} + +void KPageGroup::Open() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.Open(it.GetAddress(), it.GetNumPages()); + } +} + +void KPageGroup::OpenFirst() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.OpenFirst(it.GetAddress(), it.GetNumPages()); + } +} + +void KPageGroup::Close() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.Close(it.GetAddress(), it.GetNumPages()); + } +} + +bool KPageGroup::IsEquivalentTo(const KPageGroup& rhs) const { + auto lit = this->begin(); + auto rit = rhs.begin(); + auto lend = this->end(); + auto rend = rhs.end(); + + while (lit != lend && rit != rend) { + if (*lit != *rit) { + return false; + } + + ++lit; + ++rit; + } + + return lit == lend && rit == rend; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_page_group.h b/src/core/hle/kernel/k_page_group.h index 316f172..c07f176 100644 --- a/src/core/hle/kernel/k_page_group.h +++ b/src/core/hle/kernel/k_page_group.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -13,24 +13,23 @@ namespace Kernel { +class KBlockInfoManager; +class KernelCore; class KPageGroup; class KBlockInfo { -private: - friend class KPageGroup; - public: - constexpr KBlockInfo() = default; + constexpr explicit KBlockInfo() : m_next(nullptr) {} - constexpr void Initialize(PAddr addr, size_t np) { + constexpr void Initialize(KPhysicalAddress addr, size_t np) { ASSERT(Common::IsAligned(addr, PageSize)); ASSERT(static_cast(np) == np); - m_page_index = static_cast(addr) / PageSize; + m_page_index = static_cast(addr / PageSize); m_num_pages = static_cast(np); } - constexpr PAddr GetAddress() const { + constexpr KPhysicalAddress GetAddress() const { return m_page_index * PageSize; } constexpr size_t GetNumPages() const { @@ -39,10 +38,10 @@ public: constexpr size_t GetSize() const { return this->GetNumPages() * PageSize; } - constexpr PAddr GetEndAddress() const { + constexpr KPhysicalAddress GetEndAddress() const { return (m_page_index + m_num_pages) * PageSize; } - constexpr PAddr GetLastAddress() const { + constexpr KPhysicalAddress GetLastAddress() const { return this->GetEndAddress() - 1; } @@ -62,8 +61,8 @@ public: return !(*this == rhs); } - constexpr bool IsStrictlyBefore(PAddr addr) const { - const PAddr end = this->GetEndAddress(); + constexpr bool IsStrictlyBefore(KPhysicalAddress addr) const { + const KPhysicalAddress end = this->GetEndAddress(); if (m_page_index != 0 && end == 0) { return false; @@ -72,11 +71,11 @@ public: return end < addr; } - constexpr bool operator<(PAddr addr) const { + constexpr bool operator<(KPhysicalAddress addr) const { return this->IsStrictlyBefore(addr); } - constexpr bool TryConcatenate(PAddr addr, size_t np) { + constexpr bool TryConcatenate(KPhysicalAddress addr, size_t np) { if (addr != 0 && addr == this->GetEndAddress()) { m_num_pages += static_cast(np); return true; @@ -90,96 +89,118 @@ private: } private: + friend class KPageGroup; + KBlockInfo* m_next{}; u32 m_page_index{}; u32 m_num_pages{}; }; static_assert(sizeof(KBlockInfo) <= 0x10); -class KPageGroup final { +class KPageGroup { public: - class Node final { + class Iterator { public: - constexpr Node(u64 addr_, std::size_t num_pages_) : addr{addr_}, num_pages{num_pages_} {} + using iterator_category = std::forward_iterator_tag; + using value_type = const KBlockInfo; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; - constexpr u64 GetAddress() const { - return addr; + constexpr explicit Iterator(pointer n) : m_node(n) {} + + constexpr bool operator==(const Iterator& rhs) const { + return m_node == rhs.m_node; + } + constexpr bool operator!=(const Iterator& rhs) const { + return !(*this == rhs); } - constexpr std::size_t GetNumPages() const { - return num_pages; + constexpr pointer operator->() const { + return m_node; + } + constexpr reference operator*() const { + return *m_node; } - constexpr std::size_t GetSize() const { - return GetNumPages() * PageSize; + constexpr Iterator& operator++() { + m_node = m_node->GetNext(); + return *this; + } + + constexpr Iterator operator++(int) { + const Iterator it{*this}; + ++(*this); + return it; } private: - u64 addr{}; - std::size_t num_pages{}; + pointer m_node{}; }; -public: - KPageGroup() = default; - KPageGroup(u64 address, u64 num_pages) { - ASSERT(AddBlock(address, num_pages).IsSuccess()); + explicit KPageGroup(KernelCore& kernel, KBlockInfoManager* m) + : m_kernel{kernel}, m_manager{m} {} + ~KPageGroup() { + this->Finalize(); } - constexpr std::list& Nodes() { - return nodes; + void CloseAndReset(); + void Finalize(); + + Iterator begin() const { + return Iterator{m_first_block}; + } + Iterator end() const { + return Iterator{nullptr}; + } + bool empty() const { + return m_first_block == nullptr; } - constexpr const std::list& Nodes() const { - return nodes; + Result AddBlock(KPhysicalAddress addr, size_t num_pages); + void Open() const; + void OpenFirst() const; + void Close() const; + + size_t GetNumPages() const; + + bool IsEquivalentTo(const KPageGroup& rhs) const; + + bool operator==(const KPageGroup& rhs) const { + return this->IsEquivalentTo(rhs); } - std::size_t GetNumPages() const { - std::size_t num_pages = 0; - for (const Node& node : nodes) { - num_pages += node.GetNumPages(); - } - return num_pages; + bool operator!=(const KPageGroup& rhs) const { + return !(*this == rhs); } - bool IsEqual(KPageGroup& other) const { - auto this_node = nodes.begin(); - auto other_node = other.nodes.begin(); - while (this_node != nodes.end() && other_node != other.nodes.end()) { - if (this_node->GetAddress() != other_node->GetAddress() || - this_node->GetNumPages() != other_node->GetNumPages()) { - return false; - } - this_node = std::next(this_node); - other_node = std::next(other_node); - } - - return this_node == nodes.end() && other_node == other.nodes.end(); - } - - Result AddBlock(u64 address, u64 num_pages) { - if (!num_pages) { - return ResultSuccess; - } - if (!nodes.empty()) { - const auto node = nodes.back(); - if (node.GetAddress() + node.GetNumPages() * PageSize == address) { - address = node.GetAddress(); - num_pages += node.GetNumPages(); - nodes.pop_back(); - } - } - nodes.push_back({address, num_pages}); - return ResultSuccess; - } - - bool Empty() const { - return nodes.empty(); - } - - void Finalize() {} - private: - std::list nodes; + KernelCore& m_kernel; + KBlockInfo* m_first_block{}; + KBlockInfo* m_last_block{}; + KBlockInfoManager* m_manager{}; +}; + +class KScopedPageGroup { +public: + explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) { + if (m_pg) { + m_pg->Open(); + } + } + explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {} + ~KScopedPageGroup() { + if (m_pg) { + m_pg->Close(); + } + } + + void CancelClose() { + m_pg = nullptr; + } + +private: + const KPageGroup* m_pg{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 612fc76..9c7ac22 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -100,7 +100,7 @@ constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType a KPageTable::KPageTable(Core::System& system_) : m_general_lock{system_.Kernel()}, - m_map_physical_memory_lock{system_.Kernel()}, m_system{system_} {} + m_map_physical_memory_lock{system_.Kernel()}, m_system{system_}, m_kernel{system_.Kernel()} {} KPageTable::~KPageTable() = default; @@ -373,7 +373,7 @@ Result KPageTable::MapProcessCode(VAddr addr, size_t num_pages, KMemoryState sta m_memory_block_slab_manager); // Allocate and open. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( &pg, num_pages, KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option))); @@ -432,7 +432,7 @@ Result KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, size_t si const size_t num_pages = size / PageSize; // Create page groups for the memory being mapped. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; AddRegionToPages(src_address, num_pages, pg); // Reprotect the source as kernel-read/not mapped. @@ -593,7 +593,7 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) { const size_t size = num_pages * PageSize; // We're making a new group, not adding to an existing one. - R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory); + R_UNLESS(pg.empty(), ResultInvalidCurrentMemory); // Begin traversal. Common::PageTable::TraversalContext context; @@ -640,11 +640,10 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) { R_SUCCEED(); } -bool KPageTable::IsValidPageGroup(const KPageGroup& pg_ll, VAddr addr, size_t num_pages) { +bool KPageTable::IsValidPageGroup(const KPageGroup& pg, VAddr addr, size_t num_pages) { ASSERT(this->IsLockedByCurrentThread()); const size_t size = num_pages * PageSize; - const auto& pg = pg_ll.Nodes(); const auto& memory_layout = m_system.Kernel().MemoryLayout(); // Empty groups are necessarily invalid. @@ -942,9 +941,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add ON_RESULT_FAILURE { if (cur_mapped_addr != dst_addr) { - // HACK: Manually close the pages. - HACK_ClosePages(dst_addr, (cur_mapped_addr - dst_addr) / PageSize); - ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize, KMemoryPermission::None, OperationType::Unmap) .IsSuccess()); @@ -1020,9 +1016,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add // Map the page. R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page)); - // HACK: Manually open the pages. - HACK_OpenPages(start_partial_page, 1); - // Update tracking extents. cur_mapped_addr += PageSize; cur_block_addr += PageSize; @@ -1051,9 +1044,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map, cur_block_addr)); - // HACK: Manually open the pages. - HACK_OpenPages(cur_block_addr, cur_block_size / PageSize); - // Update tracking extents. cur_mapped_addr += cur_block_size; cur_block_addr = next_entry.phys_addr; @@ -1073,9 +1063,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map, cur_block_addr)); - // HACK: Manually open the pages. - HACK_OpenPages(cur_block_addr, last_block_size / PageSize); - // Update tracking extents. cur_mapped_addr += last_block_size; cur_block_addr += last_block_size; @@ -1107,9 +1094,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add // Map the page. R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page)); - - // HACK: Manually open the pages. - HACK_OpenPages(end_partial_page, 1); } // Update memory blocks to reflect our changes @@ -1211,9 +1195,6 @@ Result KPageTable::CleanupForIpcServer(VAddr address, size_t size, KMemoryState const size_t aligned_size = aligned_end - aligned_start; const size_t aligned_num_pages = aligned_size / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(aligned_start, aligned_num_pages); - // Unmap the pages. R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap)); @@ -1501,17 +1482,6 @@ void KPageTable::CleanupForIpcClientOnServerSetupFailure([[maybe_unused]] PageLi } } -void KPageTable::HACK_OpenPages(PAddr phys_addr, size_t num_pages) { - m_system.Kernel().MemoryManager().OpenFirst(phys_addr, num_pages); -} - -void KPageTable::HACK_ClosePages(VAddr virt_addr, size_t num_pages) { - for (size_t index = 0; index < num_pages; ++index) { - const auto paddr = GetPhysicalAddr(virt_addr + (index * PageSize)); - m_system.Kernel().MemoryManager().Close(paddr, 1); - } -} - Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Lock the physical memory lock. KScopedLightLock phys_lk(m_map_physical_memory_lock); @@ -1572,7 +1542,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the new memory. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( &pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0)); @@ -1650,7 +1620,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { KScopedPageTableUpdater updater(this); // Prepare to iterate over the memory. - auto pg_it = pg.Nodes().begin(); + auto pg_it = pg.begin(); PAddr pg_phys_addr = pg_it->GetAddress(); size_t pg_pages = pg_it->GetNumPages(); @@ -1680,9 +1650,6 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { last_unmap_address + 1 - cur_address) / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(cur_address, cur_pages); - // Unmap. ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap) @@ -1703,7 +1670,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Release any remaining unmapped memory. m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages); m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages); - for (++pg_it; pg_it != pg.Nodes().end(); ++pg_it) { + for (++pg_it; pg_it != pg.end(); ++pg_it) { m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(), pg_it->GetNumPages()); m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(), @@ -1731,7 +1698,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Check if we're at the end of the physical block. if (pg_pages == 0) { // Ensure there are more pages to map. - ASSERT(pg_it != pg.Nodes().end()); + ASSERT(pg_it != pg.end()); // Advance our physical block. ++pg_it; @@ -1742,10 +1709,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Map whatever we can. const size_t cur_pages = std::min(pg_pages, map_pages); R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite, - OperationType::Map, pg_phys_addr)); - - // HACK: Manually open the pages. - HACK_OpenPages(pg_phys_addr, cur_pages); + OperationType::MapFirst, pg_phys_addr)); // Advance. cur_address += cur_pages * PageSize; @@ -1888,9 +1852,6 @@ Result KPageTable::UnmapPhysicalMemory(VAddr address, size_t size) { last_address + 1 - cur_address) / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(cur_address, cur_pages); - // Unmap. ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap) .IsSuccess()); @@ -1955,7 +1916,7 @@ Result KPageTable::MapMemory(VAddr dst_address, VAddr src_address, size_t size) R_TRY(dst_allocator_result); // Map the memory. - KPageGroup page_linked_list; + KPageGroup page_linked_list{m_kernel, m_block_info_manager}; const size_t num_pages{size / PageSize}; const KMemoryPermission new_src_perm = static_cast( KMemoryPermission::KernelRead | KMemoryPermission::NotMapped); @@ -2022,14 +1983,14 @@ Result KPageTable::UnmapMemory(VAddr dst_address, VAddr src_address, size_t size num_dst_allocator_blocks); R_TRY(dst_allocator_result); - KPageGroup src_pages; - KPageGroup dst_pages; + KPageGroup src_pages{m_kernel, m_block_info_manager}; + KPageGroup dst_pages{m_kernel, m_block_info_manager}; const size_t num_pages{size / PageSize}; AddRegionToPages(src_address, num_pages, src_pages); AddRegionToPages(dst_address, num_pages, dst_pages); - R_UNLESS(dst_pages.IsEqual(src_pages), ResultInvalidMemoryRegion); + R_UNLESS(dst_pages.IsEquivalentTo(src_pages), ResultInvalidMemoryRegion); { auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); }); @@ -2060,7 +2021,7 @@ Result KPageTable::MapPages(VAddr addr, const KPageGroup& page_linked_list, VAddr cur_addr{addr}; - for (const auto& node : page_linked_list.Nodes()) { + for (const auto& node : page_linked_list) { if (const auto result{ Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())}; result.IsError()) { @@ -2160,7 +2121,7 @@ Result KPageTable::UnmapPages(VAddr addr, const KPageGroup& page_linked_list) { VAddr cur_addr{addr}; - for (const auto& node : page_linked_list.Nodes()) { + for (const auto& node : page_linked_list) { if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None, OperationType::Unmap)}; result.IsError()) { @@ -2527,13 +2488,13 @@ Result KPageTable::SetHeapSize(VAddr* out, size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the heap extension. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( &pg, allocation_size / PageSize, KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option))); // Clear all the newly allocated pages. - for (const auto& it : pg.Nodes()) { + for (const auto& it : pg) { std::memset(m_system.DeviceMemory().GetPointer(it.GetAddress()), m_heap_fill_value, it.GetSize()); } @@ -2610,11 +2571,23 @@ ResultVal KPageTable::AllocateAndMapMemory(size_t needed_num_pages, size_ if (is_map_only) { R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { - KPageGroup page_group; - R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( - &page_group, needed_num_pages, - KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option), 0, 0)); - R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); + // Create a page group tohold the pages we allocate. + KPageGroup pg{m_kernel, m_block_info_manager}; + + R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( + &pg, needed_num_pages, + KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option))); + + // Ensure that the page group is closed when we're done working with it. + SCOPE_EXIT({ pg.Close(); }); + + // Clear all pages. + for (const auto& it : pg) { + std::memset(m_system.DeviceMemory().GetPointer(it.GetAddress()), + m_heap_fill_value, it.GetSize()); + } + + R_TRY(Operate(addr, needed_num_pages, pg, OperationType::MapGroup)); } // Update the blocks. @@ -2795,19 +2768,28 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, const KPageGroup& page_ ASSERT(num_pages > 0); ASSERT(num_pages == page_group.GetNumPages()); - for (const auto& node : page_group.Nodes()) { - const size_t size{node.GetNumPages() * PageSize}; + switch (operation) { + case OperationType::MapGroup: { + // We want to maintain a new reference to every page in the group. + KScopedPageGroup spg(page_group); - switch (operation) { - case OperationType::MapGroup: + for (const auto& node : page_group) { + const size_t size{node.GetNumPages() * PageSize}; + + // Map the pages. m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress()); - break; - default: - ASSERT(false); - break; + + addr += size; } - addr += size; + // We succeeded! We want to persist the reference to the pages. + spg.CancelClose(); + + break; + } + default: + ASSERT(false); + break; } R_SUCCEED(); @@ -2822,13 +2804,29 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, KMemoryPermission perm, ASSERT(ContainsPages(addr, num_pages)); switch (operation) { - case OperationType::Unmap: + case OperationType::Unmap: { + // Ensure that any pages we track close on exit. + KPageGroup pages_to_close{m_kernel, this->GetBlockInfoManager()}; + SCOPE_EXIT({ pages_to_close.CloseAndReset(); }); + + this->AddRegionToPages(addr, num_pages, pages_to_close); m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize); break; + } + case OperationType::MapFirst: case OperationType::Map: { ASSERT(map_addr); ASSERT(Common::IsAligned(map_addr, PageSize)); m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr); + + // Open references to pages, if we should. + if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) { + if (operation == OperationType::MapFirst) { + m_kernel.MemoryManager().OpenFirst(map_addr, num_pages); + } else { + m_kernel.MemoryManager().Open(map_addr, num_pages); + } + } break; } case OperationType::Separate: { diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index f1ca785..0a454b0 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -107,6 +107,10 @@ public: return *m_page_table_impl; } + KBlockInfoManager* GetBlockInfoManager() { + return m_block_info_manager; + } + bool CanContain(VAddr addr, size_t size, KMemoryState state) const; protected: @@ -261,10 +265,6 @@ private: void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address, size_t size, KMemoryPermission prot_perm); - // HACK: These will be removed once we automatically manage page reference counts. - void HACK_OpenPages(PAddr phys_addr, size_t num_pages); - void HACK_ClosePages(VAddr virt_addr, size_t num_pages); - mutable KLightLock m_general_lock; mutable KLightLock m_map_physical_memory_lock; @@ -488,6 +488,7 @@ private: std::unique_ptr m_page_table_impl; Core::System& m_system; + KernelCore& m_kernel; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_shared_memory.cpp b/src/core/hle/kernel/k_shared_memory.cpp index 0aa6810..3cf2b5d 100644 --- a/src/core/hle/kernel/k_shared_memory.cpp +++ b/src/core/hle/kernel/k_shared_memory.cpp @@ -13,10 +13,7 @@ namespace Kernel { KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {} - -KSharedMemory::~KSharedMemory() { - kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemoryMax, size); -} +KSharedMemory::~KSharedMemory() = default; Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_, Svc::MemoryPermission owner_permission_, @@ -49,7 +46,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o R_UNLESS(physical_address != 0, ResultOutOfMemory); //! Insert the result into our page group. - page_group.emplace(physical_address, num_pages); + page_group.emplace(kernel, &kernel.GetSystemSystemResource().GetBlockInfoManager()); + page_group->AddBlock(physical_address, num_pages); // Commit our reservation. memory_reservation.Commit(); @@ -62,7 +60,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o is_initialized = true; // Clear all pages in the memory. - for (const auto& block : page_group->Nodes()) { + for (const auto& block : *page_group) { std::memset(device_memory_.GetPointer(block.GetAddress()), 0, block.GetSize()); } @@ -71,13 +69,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o void KSharedMemory::Finalize() { // Close and finalize the page group. - // page_group->Close(); - // page_group->Finalize(); - - //! HACK: Manually close. - for (const auto& block : page_group->Nodes()) { - kernel.MemoryManager().Close(block.GetAddress(), block.GetNumPages()); - } + page_group->Close(); + page_group->Finalize(); // Release the memory reservation. resource_limit->Release(LimitableResource::PhysicalMemoryMax, size); diff --git a/src/core/hle/kernel/memory_types.h b/src/core/hle/kernel/memory_types.h index 3975507..92b8b37 100644 --- a/src/core/hle/kernel/memory_types.h +++ b/src/core/hle/kernel/memory_types.h @@ -14,4 +14,7 @@ constexpr std::size_t PageSize{1 << PageBits}; using Page = std::array; +using KPhysicalAddress = PAddr; +using KProcessAddress = VAddr; + } // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 788ee21..aca4421 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1485,7 +1485,7 @@ static Result MapProcessMemory(Core::System& system, VAddr dst_address, Handle p ResultInvalidMemoryRegion); // Create a new page group. - KPageGroup pg; + KPageGroup pg{system.Kernel(), dst_pt.GetBlockInfoManager()}; R_TRY(src_pt.MakeAndOpenPageGroup( std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess, KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None, diff --git a/src/input_common/drivers/tas_input.cpp b/src/input_common/drivers/tas_input.cpp index f3ade90..f3cb14c 100644 --- a/src/input_common/drivers/tas_input.cpp +++ b/src/input_common/drivers/tas_input.cpp @@ -156,10 +156,12 @@ void Tas::RecordInput(u64 buttons, TasAnalog left_axis, TasAnalog right_axis) { }; } -std::tuple Tas::GetStatus() const { +std::tuple> Tas::GetStatus() const { TasState state; + std::array lengths{0}; if (is_recording) { - return {TasState::Recording, 0, record_commands.size()}; + lengths[0] = record_commands.size(); + return {TasState::Recording, record_commands.size(), lengths}; } if (is_running) { @@ -168,7 +170,11 @@ std::tuple Tas::GetStatus() const { state = TasState::Stopped; } - return {state, current_command, script_length}; + for (size_t i = 0; i < PLAYER_NUMBER; i++) { + lengths[i] = commands[i].size(); + } + + return {state, current_command, lengths}; } void Tas::UpdateThread() { diff --git a/src/input_common/drivers/tas_input.h b/src/input_common/drivers/tas_input.h index 38a27a2..5be66d1 100644 --- a/src/input_common/drivers/tas_input.h +++ b/src/input_common/drivers/tas_input.h @@ -124,7 +124,7 @@ public: * Current playback progress ; * Total length of script file currently loaded or being recorded */ - std::tuple GetStatus() const; + std::tuple> GetStatus() const; private: enum class TasAxis : u8; diff --git a/src/input_common/helpers/stick_from_buttons.cpp b/src/input_common/helpers/stick_from_buttons.cpp index 82aa6ac..f3a0b34 100644 --- a/src/input_common/helpers/stick_from_buttons.cpp +++ b/src/input_common/helpers/stick_from_buttons.cpp @@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice { public: using Button = std::unique_ptr; - Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, + Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_, float modifier_scale_, float modifier_angle_) : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)), - right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_), - modifier_angle(modifier_angle_) { + right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)), + modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) { up->SetCallback({ .on_change = [this](const Common::Input::CallbackStatus& callback_) { @@ -48,6 +48,9 @@ public: UpdateModButtonStatus(callback_); }, }); + updater->SetCallback({ + .on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); }, + }); last_x_axis_value = 0.0f; last_y_axis_value = 0.0f; } @@ -248,7 +251,7 @@ public: modifier->ForceUpdate(); } - void SoftUpdate() override { + void SoftUpdate() { Common::Input::CallbackStatus status{ .type = Common::Input::InputType::Stick, .stick_status = GetStatus(), @@ -308,6 +311,7 @@ private: Button left; Button right; Button modifier; + Button updater; float modifier_scale{}; float modifier_angle{}; float angle{}; @@ -331,11 +335,12 @@ std::unique_ptr StickFromButton::Create( auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine)); auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine)); auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine)); + auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0"); auto modifier_scale = params.Get("modifier_scale", 0.5f); auto modifier_angle = params.Get("modifier_angle", 5.5f); return std::make_unique(std::move(up), std::move(down), std::move(left), - std::move(right), std::move(modifier), modifier_scale, - modifier_angle); + std::move(right), std::move(modifier), std::move(updater), + modifier_scale, modifier_angle); } } // namespace InputCommon diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp index edd5287..d6e49d2 100644 --- a/src/input_common/input_mapping.cpp +++ b/src/input_common/input_mapping.cpp @@ -76,7 +76,7 @@ void MappingFactory::RegisterButton(const MappingData& data) { break; case EngineInputType::Analog: // Ignore mouse axis when mapping buttons - if (data.engine == "mouse") { + if (data.engine == "mouse" && data.index != 4) { return; } new_input.Set("axis", data.index); diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 4dc92f4..e0b2131 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -28,6 +28,28 @@ namespace InputCommon { +/// Dummy engine to get periodic updates +class UpdateEngine final : public InputEngine { +public: + explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) { + PreSetController(identifier); + } + + void PumpEvents() { + SetButton(identifier, 0, last_state); + last_state = !last_state; + } + +private: + static constexpr PadIdentifier identifier = { + .guid = Common::UUID{}, + .port = 0, + .pad = 0, + }; + + bool last_state{}; +}; + struct InputSubsystem::Impl { template void RegisterEngine(std::string name, std::shared_ptr& engine) { @@ -45,6 +67,7 @@ struct InputSubsystem::Impl { void Initialize() { mapping_factory = std::make_shared(); + RegisterEngine("updater", update_engine); RegisterEngine("keyboard", keyboard); RegisterEngine("mouse", mouse); RegisterEngine("touch", touch_screen); @@ -74,6 +97,7 @@ struct InputSubsystem::Impl { } void Shutdown() { + UnregisterEngine(update_engine); UnregisterEngine(keyboard); UnregisterEngine(mouse); UnregisterEngine(touch_screen); @@ -252,6 +276,7 @@ struct InputSubsystem::Impl { } void PumpEvents() const { + update_engine->PumpEvents(); #ifdef HAVE_SDL2 sdl->PumpEvents(); #endif @@ -263,6 +288,7 @@ struct InputSubsystem::Impl { std::shared_ptr mapping_factory; + std::shared_ptr update_engine; std::shared_ptr keyboard; std::shared_ptr mouse; std::shared_ptr touch_screen; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a3b99e2..17a6d48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -259,7 +259,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool> (i % 32)) & 1) == 0; } @@ -292,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool low_addr{TrackLowAddress(&inst)}) { @@ -416,10 +415,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; - - // Align the offset base to match the host alignment requirements - low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; return ir.ISub(offset, low_cbuf); } @@ -514,7 +510,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -538,8 +534,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn const IR::U32 index{IR::Value{static_cast(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{ - StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 4ffad11..1f8f2ba 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -15,7 +15,7 @@ namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f617665..b474eb3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,8 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/blit_image.cpp + renderer_opengl/blit_image.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_compute_pipeline.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 627917a..06fd408 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1938,21 +1938,14 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read(ssbo_addr); const u32 size = gpu_memory->Read(ssbo_addr + 8); - const u32 alignment = runtime.GetStorageBufferAlignment(); - - const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); - const u32 aligned_size = - Common::AlignUp(static_cast(gpu_addr - aligned_gpu_addr) + size, alignment); - - const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); + const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr || size == 0) { return NULL_BINDING; } - - const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); + const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); const Binding binding{ .cpu_addr = *cpu_addr, - .size = is_written ? aligned_size : static_cast(cpu_end - *cpu_addr), + .size = is_written ? size : static_cast(cpu_end - *cpu_addr), .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 2437121..1d22d25 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { LOG_WARNING(HW_GPU, "(STUBBED) called"); break; } + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { + DrawTexture(); + break; + } default: break; } @@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } +void DrawManager::DrawTexture() { + const auto& regs{maxwell3d->regs}; + draw_texture_state.dst_x0 = static_cast(regs.draw_texture.dst_x0) / 4096.f; + draw_texture_state.dst_y0 = static_cast(regs.draw_texture.dst_y0) / 4096.f; + const auto dst_width = static_cast(regs.draw_texture.dst_width) / 4096.f; + const auto dst_height = static_cast(regs.draw_texture.dst_height) / 4096.f; + const bool lower_left{regs.window_origin.mode != + Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; + if (lower_left) { + draw_texture_state.dst_y0 -= dst_height; + } + draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; + draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.src_x0 = static_cast(regs.draw_texture.src_x0) / 4096.f; + draw_texture_state.src_y0 = static_cast(regs.draw_texture.src_y0) / 4096.f; + draw_texture_state.src_x1 = + (static_cast(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + + draw_texture_state.src_x0; + draw_texture_state.src_y1 = + (static_cast(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + + draw_texture_state.src_y0; + draw_texture_state.src_sampler = regs.draw_texture.src_sampler; + draw_texture_state.src_texture = regs.draw_texture.src_texture; + + maxwell3d->rasterizer->DrawTexture(); +} + void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; switch (regs.primitive_topology_control) { diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 58d1b2d..7c22c49 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,19 @@ public: std::vector inline_index_draw_indexes; }; + struct DrawTextureState { + f32 dst_x0; + f32 dst_y0; + f32 dst_x1; + f32 dst_y1; + f32 src_x0; + f32 src_y0; + f32 src_x1; + f32 src_y1; + u32 src_sampler; + u32 src_texture; + }; + struct IndirectParams { bool is_indexed; bool include_count; @@ -64,6 +77,10 @@ public: return draw_state; } + const DrawTextureState& GetDrawTextureState() const { + return draw_texture_state; + } + IndirectParams& GetIndirectParams() { return indirect_state; } @@ -81,6 +98,8 @@ private: void DrawIndexSmall(u32 argument); + void DrawTexture(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); @@ -89,6 +108,7 @@ private: Maxwell3D* maxwell3d{}; State draw_state{}; + DrawTextureState draw_texture_state{}; IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 97f5477..05459af 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) { case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): case MAXWELL3D_REG_INDEX(vertex_array_instance_first): case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): case MAXWELL3D_REG_INDEX(wait_for_idle): case MAXWELL3D_REG_INDEX(shadow_ram_control): case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0b2fd29..c89969b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1599,6 +1599,20 @@ public: }; static_assert(sizeof(TIRModulationCoeff) == 0x4); + struct DrawTexture { + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 dx_du; + s64 dy_dv; + u32 src_sampler; + u32 src_texture; + s32 src_x0; + s32 src_y0; + }; + static_assert(sizeof(DrawTexture) == 0x30); + struct ReduceColorThreshold { union { BitField<0, 8, u32> all_hit_once; @@ -2751,7 +2765,7 @@ public: u32 reserved_sw_method2; ///< 0x102C std::array tir_modulation_coeff; ///< 0x1030 std::array spare_nop; ///< 0x1044 - INSERT_PADDING_BYTES_NOINIT(0x30); + DrawTexture draw_texture; ///< 0x1080 std::array reserved_sw_method3_to_7; ///< 0x10B0 ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC std::array reserved_sw_method10_to_13; ///< 0x10D0 diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f275b2a..e968ae2 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(GLSL_INCLUDES set(SHADER_FILES astc_decoder.comp + blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag @@ -36,7 +37,6 @@ set(SHADER_FILES smaa_blending_weight_calculation.frag smaa_neighborhood_blending.vert smaa_neighborhood_blending.frag - vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag similarity index 100% rename from src/video_core/host_shaders/vulkan_blit_color_float.frag rename to src/video_core/host_shaders/blit_color_float.frag diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert index 2c976b1..8ac936e 100644 --- a/src/video_core/host_shaders/full_screen_triangle.vert +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -4,13 +4,20 @@ #version 450 #ifdef VULKAN +#define VERTEX_ID gl_VertexIndex #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { #define END_PUSH_CONSTANTS }; #define UNIFORM(n) +#define FLIPY 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define VERTEX_ID gl_VertexID #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS +#define FLIPY -1 #define UNIFORM(n) layout (location = n) uniform +out gl_PerVertex { + vec4 gl_Position; +}; #endif BEGIN_PUSH_CONSTANTS @@ -21,8 +28,8 @@ END_PUSH_CONSTANTS layout(location = 0) out vec2 texcoord; void main() { - float x = float((gl_VertexIndex & 1) << 2); - float y = float((gl_VertexIndex & 2) << 1); - gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + float x = float((VERTEX_ID & 1) << 2); + float y = float((VERTEX_ID & 2) << 1); + gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0); texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); -} +} \ No newline at end of file diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1735b61..33e2610 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public: /// Dispatches an indirect draw invocation virtual void DrawIndirect() {} + /// Dispatches an draw texture invocation + virtual void DrawTexture() = 0; + /// Clear the current framebuffer virtual void Clear(u32 layer_count) = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 2c11345..2b5c7de 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp RasterizerNull::~RasterizerNull() = default; void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} +void RasterizerNull::DrawTexture() {} void RasterizerNull::Clear(u32 layer_count) {} void RasterizerNull::DispatchCompute() {} void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 2112aa7..51f896e 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -31,6 +31,7 @@ public: ~RasterizerNull() override; void Draw(bool is_indexed, u32 instance_count) override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp new file mode 100644 index 0000000..9a560a7 --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "video_core/host_shaders/blit_color_float_frag.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_) + : program_manager(program_manager_), + full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)), + blit_color_to_color_frag( + CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle); + glProgramUniform2f(full_screen_vert.handle, 0, + static_cast(src_region.end.x - src_region.start.x) / + static_cast(src_size.width), + static_cast(src_region.end.y - src_region.start.y) / + static_cast(src_size.height)); + glProgramUniform2f(full_screen_vert.handle, 1, + static_cast(src_region.start.x) / static_cast(src_size.width), + static_cast(src_region.start.y) / + static_cast(src_size.height)); + glViewport(std::min(dst_region.start.x, dst_region.end.x), + std::min(dst_region.start.y, dst_region.end.y), + std::abs(dst_region.end.x - dst_region.start.x), + std::abs(dst_region.end.y - dst_region.start.y)); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); + glBindSampler(0, src_sampler); + glBindTextureUnit(0, src_image_view); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h new file mode 100644 index 0000000..5a2b12d --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; +using VideoCommon::Region2D; + +class ProgramManager; +class Framebuffer; +class ImageView; + +class BlitImageHelper { +public: + explicit BlitImageHelper(ProgramManager& program_manager); + ~BlitImageHelper(); + + void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size); + +private: + ProgramManager& program_manager; + + OGLProgram full_screen_vert; + OGLProgram blit_color_to_color_frag; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bb19620..a8c3f8b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -160,10 +160,6 @@ public: return device.CanReportMemoryUsage(); } - u32 GetStorageBufferAlignment() const { - return static_cast(device.GetShaderStorageBufferAlignment()); - } - private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index cee5c32..22ed16e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; + has_draw_texture = GLAD_GL_NV_draw_texture; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2a72d84..3ff8cad 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -4,6 +4,8 @@ #pragma once #include +#include + #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/stage.h" @@ -146,6 +148,10 @@ public: return has_sparse_texture_2; } + bool HasDrawTexture() const { + return has_draw_texture; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -216,6 +222,7 @@ private: bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; + bool has_draw_texture{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7d48af8..7bced67 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + blit_image(program_manager_) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -139,6 +140,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load void RasterizerOpenGL::Clear(u32 layer_count) { MICROPROFILE_SCOPE(OpenGL_Clears); + gpu_memory->FlushCaching(); const auto& regs = maxwell3d->regs; bool use_color{}; bool use_depth{}; @@ -207,6 +209,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; @@ -318,7 +321,49 @@ void RasterizerOpenGL::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerOpenGL::DrawTexture() { + MICROPROFILE_SCOPE(OpenGL_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + SyncState(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + + if (device.HasDrawTexture()) { + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0, + draw_texture_state.dst_y0, draw_texture_state.dst_x1, + draw_texture_state.dst_y1, 0, + draw_texture_state.src_x0 / static_cast(texture.size.width), + draw_texture_state.src_y0 / static_cast(texture.size.height), + draw_texture_state.src_x1 / static_cast(texture.size.width), + draw_texture_state.src_y1 / static_cast(texture.size.height)); + } else { + Region2D dst_region = {Offset2D{.x = static_cast(draw_texture_state.dst_x0), + .y = static_cast(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast(draw_texture_state.dst_x1), + .y = static_cast(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast(draw_texture_state.src_x0), + .y = static_cast(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast(draw_texture_state.src_x1), + .y = static_cast(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), + sampler->Handle(), dst_region, src_region, texture.size); + } + + ++num_queued_commands; +} + void RasterizerOpenGL::DispatchCompute() { + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; if (!pipeline) { return; @@ -526,6 +571,7 @@ void RasterizerOpenGL::TickFrame() { } bool RasterizerOpenGL::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be4f76c..0c45832 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/blit_image.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -70,6 +71,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -224,6 +226,8 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; + BlitImageHelper blit_image; + boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7dd854e..9442193 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -236,7 +236,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), - .min_ssbo_alignment = static_cast(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), } { if (use_asynchronous_shaders) { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index d9c29d8..98841ae 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,2 +1,123 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; + +ProgramManager::ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } +} + +void ProgramManager::BindComputeProgram(GLuint program) { + glUseProgram(program); + is_compute_bound = true; +} + +void ProgramManager::BindComputeAssemblyProgram(GLuint program) { + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + UnbindPipeline(); +} + +void ProgramManager::BindSourcePrograms(std::span programs) { + static constexpr std::array stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); +} + +void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); +} + +void ProgramManager::BindAssemblyPrograms(std::span programs, + u32 stage_mask) { + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + UnbindPipeline(); +} + +void ProgramManager::RestoreGuestCompute() {} + +void ProgramManager::BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); +} + +void ProgramManager::UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); +} + +void ProgramManager::UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a84f5ae..07ffab7 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,8 +6,6 @@ #include #include -#include - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -16,121 +14,28 @@ namespace OpenGL { class ProgramManager { static constexpr size_t NUM_STAGES = 5; - static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ - GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, - GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, - }; - public: - explicit ProgramManager(const Device& device) { - glCreateProgramPipelines(1, &pipeline.handle); - if (device.UseAssemblyShaders()) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } - } + explicit ProgramManager(const Device& device); - void BindComputeProgram(GLuint program) { - glUseProgram(program); - is_compute_bound = true; - } + void BindComputeProgram(GLuint program); - void BindComputeAssemblyProgram(GLuint program) { - if (current_assembly_compute_program != program) { - current_assembly_compute_program = program; - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } - UnbindPipeline(); - } + void BindComputeAssemblyProgram(GLuint program); - void BindSourcePrograms(std::span programs) { - static constexpr std::array stage_enums{ - GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, - GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, - }; - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); - } - } - BindPipeline(); - } + void BindSourcePrograms(std::span programs); - void BindPresentPrograms(GLuint vertex, GLuint fragment) { - if (current_programs[0] != vertex) { - current_programs[0] = vertex; - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); - } - if (current_programs[4] != fragment) { - current_programs[4] = fragment; - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); - } - glUseProgramStages( - pipeline.handle, - GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); - current_programs[1] = 0; - current_programs[2] = 0; - current_programs[3] = 0; - - if (current_stage_mask != 0) { - current_stage_mask = 0; - for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { - glDisable(program_type); - } - } - BindPipeline(); - } + void BindPresentPrograms(GLuint vertex, GLuint fragment); void BindAssemblyPrograms(std::span programs, - u32 stage_mask) { - const u32 changed_mask = current_stage_mask ^ stage_mask; - current_stage_mask = stage_mask; + u32 stage_mask); - if (changed_mask != 0) { - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (((changed_mask >> stage) & 1) != 0) { - if (((stage_mask >> stage) & 1) != 0) { - glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } else { - glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } - } - } - } - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); - } - } - UnbindPipeline(); - } - - void RestoreGuestCompute() {} + void RestoreGuestCompute(); private: - void BindPipeline() { - if (!is_pipeline_bound) { - is_pipeline_bound = true; - glBindProgramPipeline(pipeline.handle); - } - UnbindCompute(); - } + void BindPipeline(); - void UnbindPipeline() { - if (is_pipeline_bound) { - is_pipeline_bound = false; - glBindProgramPipeline(0); - } - UnbindCompute(); - } + void UnbindPipeline(); - void UnbindCompute() { - if (is_compute_bound) { - is_compute_bound = false; - glUseProgram(0); - } - } + void UnbindCompute(); OGLPipeline pipeline; bool is_pipeline_bound{}; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139..dd00d3e 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,13 +4,13 @@ #include #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri } void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region) { + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; - const float scale_x = static_cast(src_region.end.x - src_region.start.x); - const float scale_y = static_cast(src_region.end.y - src_region.start.y); + const float scale_x = static_cast(src_region.end.x - src_region.start.x) / + static_cast(src_size.width); + const float scale_y = static_cast(src_region.end.y - src_region.start.y) / + static_cast(src_size.height); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast(src_region.start.x), - static_cast(src_region.start.y)}, + .tex_offset = {static_cast(src_region.start.x) / static_cast(src_size.width), + static_cast(src_region.start.y) / + static_cast(src_size.height)}, }; cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -347,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) { .height = is_rescaled ? resolution.ScaleUp(height) : height, }; } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = flags, + .dstAccessMask = flags, + .oldLayout = source_layout, + .newLayout = target_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { + const VkRenderPass render_pass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = render_pass, + .framebuffer = framebuffer_handle, + .renderArea{ + .offset{}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -365,7 +413,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(two_textures_set_layout.address()))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), - blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), @@ -404,6 +452,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView scheduler.InvalidateState(); } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, + src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); + BeginRenderPass(cmdbuf, dst_framebuffer); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679f..be8a9a2 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@ namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; using VideoCommon::Region2D; class Device; @@ -36,6 +38,10 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size); + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 5285512..2a8d9e3 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -60,22 +60,9 @@ std::string GetDriverVersion(const Device& device) { return GetReadableVersion(version); } -std::string BuildCommaSeparatedExtensions(std::vector available_extensions) { - std::sort(std::begin(available_extensions), std::end(available_extensions)); - - static constexpr std::size_t AverageExtensionSize = 64; - std::string separated_extensions; - separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); - - const auto end = std::end(available_extensions); - for (auto extension = std::begin(available_extensions); extension != end; ++extension) { - if (const bool is_last = extension + 1 == end; is_last) { - separated_extensions += *extension; - } else { - separated_extensions += fmt::format("{},", *extension); - } - } - return separated_extensions; +std::string BuildCommaSeparatedExtensions( + const std::set>& available_extensions) { + return fmt::format("{}", fmt::join(available_extensions, ",")); } } // Anonymous namespace @@ -112,6 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, state_tracker, scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); + scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } Report(); } catch (const vk::Exception& exception) { @@ -120,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, } RendererVulkan::~RendererVulkan() { + scheduler.RegisterOnSubmit([] {}); void(device.GetLogical().WaitIdle()); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1cfb4c2..b0153a5 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -330,10 +330,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } -u32 BufferCacheRuntime::GetStorageBufferAlignment() const { - return static_cast(device.GetStorageBufferAlignment()); -} - void BufferCacheRuntime::Finish() { scheduler.Finish(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 06539c7..183b336 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -73,8 +73,6 @@ public: bool CanReportMemoryUsage() const; - u32 GetStorageBufferAlignment() const; - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7e69b11..0684cce 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -344,7 +344,6 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), - .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ed4a721..86ef0da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -266,10 +266,40 @@ void RasterizerVulkan::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerVulkan::DrawTexture() { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + UpdateDynamicStates(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + Region2D dst_region = {Offset2D{.x = static_cast(draw_texture_state.dst_x0), + .y = static_cast(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast(draw_texture_state.dst_x1), + .y = static_cast(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast(draw_texture_state.src_x0), + .y = static_cast(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast(draw_texture_state.src_x1), + .y = static_cast(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), + texture.ImageHandle(), sampler->Handle(), dst_region, src_region, + texture.size); +} + void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -628,6 +658,7 @@ void RasterizerVulkan::TickFrame() { } bool RasterizerVulkan::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 472cc64..a0508b5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c2e53a5..e03685a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; + + if (on_submit) { + on_submit(); + } + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3858c50..bd4cb0f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -66,6 +67,11 @@ public: query_cache = &query_cache_; } + // Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function&& func) { + on_submit = std::move(func); + } + /// Send work to a separate thread. template void Record(T&& command) { @@ -216,6 +222,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr chunk; + std::function on_submit; State state; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index 852b86f..c425941 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -14,11 +14,21 @@ using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + } m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); } TurboMode::~TurboMode() = default; +void TurboMode::QueueSubmitted() { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + m_submission_cv.notify_one(); +} + void TurboMode::Run(std::stop_token stop_token) { auto& dld = m_device.GetLogical(); @@ -199,6 +209,13 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); + + // Wait for the next graphics queue submission if necessary. + std::unique_lock lk{m_submission_lock}; + Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { + return (std::chrono::steady_clock::now() - m_submission_time) <= + std::chrono::milliseconds{100}; + }); } } diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 2060e23..99b5ac5 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -3,6 +3,9 @@ #pragma once +#include +#include + #include "common/polyfill_thread.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -15,11 +18,17 @@ public: explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); ~TurboMode(); + void QueueSubmitted(); + private: void Run(std::stop_token stop_token); Device m_device; MemoryAllocator m_allocator; + std::mutex m_submission_lock; + std::condition_variable_any m_submission_cv; + std::chrono::time_point m_submission_time{}; + std::jthread m_thread; }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87152c8..1b01990 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -148,6 +148,13 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } +template +typename P::ImageView& TextureCache

::GetImageView(u32 index) noexcept { + const auto image_view_id = VisitImageView(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, index); + return slot_image_views[image_view_id]; +} + template void TextureCache

::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4eea1f6..485eaab 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -129,6 +129,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Get the imageview from the graphics descriptor table in the specified index + [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; + /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index fd1c5a6..23d922e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -74,30 +74,6 @@ enum class NvidiaArchitecture { VoltaOrOlder, }; -constexpr std::array REQUIRED_EXTENSIONS{ - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, -#ifdef _WIN32 - VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, -#endif -#ifdef __unix__ - VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, -#endif -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{ - VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, - VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{ - VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, -}; - template void SetNext(void**& next, T& data) { *next = &data; @@ -286,24 +262,9 @@ std::unordered_map GetFormatProperties(vk::Physica return format_properties; } -std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector supported_extensions; - supported_extensions.reserve(extensions.size()); - for (const auto& extension : extensions) { - supported_extensions.emplace_back(extension.extensionName); - } - return supported_extensions; -} - -bool IsExtensionSupported(std::span supported_extensions, - std::string_view extension) { - return std::ranges::find(supported_extensions, extension) != supported_extensions.end(); -} - NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, - std::span exts) { - if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + const std::set>& exts) { + if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; shading_rate_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; @@ -316,423 +277,55 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, return NvidiaArchitecture::AmpereOrNewer; } } - if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { + if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { return NvidiaArchitecture::Turing; } return NvidiaArchitecture::VoltaOrOlder; } + +std::vector ExtensionListForVulkan( + const std::set>& extensions) { + std::vector output; + for (const auto& extension : extensions) { + output.push_back(extension.c_str()); + } + return output; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions( - physical)}, + : instance{instance_}, dld{dld_}, physical{physical_}, format_properties(GetFormatProperties(physical)) { - CheckSuitability(surface != nullptr); + // Get suitability and device properties. + const bool is_suitable = GetSuitability(surface != nullptr); + + const VkDriverId driver_id = properties.driver.driverID; + const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; + const bool is_amd_driver = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; + const bool is_amd = is_amd_driver || is_radv; + const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; + + if (is_mvk && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway"); + } else if (!is_suitable) { + throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); + } + SetupFamilies(surface); - SetupFeatures(); - SetupProperties(); - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(surface != nullptr); - VkPhysicalDeviceFeatures2 features2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = nullptr, - .features{ - .robustBufferAccess = true, - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = true, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = true, - .drawIndirectFirstInstance = true, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = is_depth_bounds_supported, - .wideLines = true, - .largePoints = true, - .alphaToOne = false, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = is_optimal_astc_supported, - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = is_shader_storage_image_multisample, - .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = true, - .shaderCullDistance = true, - .shaderFloat64 = is_shader_float64_supported, - .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = is_shader_int16_supported, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }, - }; + // GetSuitability has already configured the linked list of features for us. + // Reuse it here. const void* first_next = &features2; - void** next = &features2.pNext; - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, - .pNext = nullptr, - .timelineSemaphore = true, - }; - SetNext(next, timeline_semaphore); - - VkPhysicalDevice16BitStorageFeatures bit16_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer16BitAccess = true, - .uniformAndStorageBuffer16BitAccess = true, - .storagePushConstant16 = false, - .storageInputOutput16 = false, - }; - SetNext(next, bit16_storage); - - VkPhysicalDevice8BitStorageFeatures bit8_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer8BitAccess = true, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = false, - }; - SetNext(next, bit8_storage); - - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, - .pNext = nullptr, - .robustBufferAccess2 = true, - .robustImageAccess2 = true, - .nullDescriptor = true, - }; - SetNext(next, robustness2); - - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, - .pNext = nullptr, - .hostQueryReset = true, - }; - SetNext(next, host_query_reset); - - VkPhysicalDeviceVariablePointerFeatures variable_pointers{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, - .pNext = nullptr, - .variablePointersStorageBuffer = VK_TRUE, - .variablePointers = VK_TRUE, - }; - SetNext(next, variable_pointers); - - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES, - .pNext = nullptr, - .shaderDemoteToHelperInvocation = true, - }; - SetNext(next, demote); - - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES, - .pNext = nullptr, - .shaderDrawParameters = true, - }; - SetNext(next, draw_parameters); - - VkPhysicalDeviceShaderFloat16Int8Features float16_int8; - if (is_int8_supported || is_float16_supported) { - float16_int8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, - .pNext = nullptr, - .shaderFloat16 = is_float16_supported, - .shaderInt8 = is_int8_supported, - }; - SetNext(next, float16_int8); - } - if (!is_float16_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); - } - if (!is_int8_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); - } - - if (!nv_viewport_swizzle) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); - } - - if (!nv_viewport_array2) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); - } - - if (!nv_geometry_shader_passthrough) { - LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); - } - - VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout; - if (khr_uniform_buffer_standard_layout) { - std430_layout = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, - .pNext = nullptr, - .uniformBufferStandardLayout = true, - }; - SetNext(next, std430_layout); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); - } - - VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; - if (ext_index_type_uint8) { - index_type_uint8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, - .pNext = nullptr, - .indexTypeUint8 = true, - }; - SetNext(next, index_type_uint8); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); - } - - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart; - if (is_topology_list_restart_supported || is_patch_list_restart_supported) { - primitive_topology_list_restart = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT, - .pNext = nullptr, - .primitiveTopologyListRestart = is_topology_list_restart_supported, - .primitiveTopologyPatchListRestart = is_patch_list_restart_supported, - }; - SetNext(next, primitive_topology_list_restart); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart"); - } - - VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; - if (ext_transform_feedback) { - transform_feedback = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, - .pNext = nullptr, - .transformFeedback = true, - .geometryStreams = true, - }; - SetNext(next, transform_feedback); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); - } - - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; - if (ext_custom_border_color) { - custom_border = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, - .pNext = nullptr, - .customBorderColors = VK_TRUE, - .customBorderColorWithoutFormat = VK_TRUE, - }; - SetNext(next, custom_border); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); - } - - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - if (ext_extended_dynamic_state) { - dynamic_state = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState = VK_TRUE, - }; - SetNext(next, dynamic_state); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); - } - - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state_2; - if (ext_extended_dynamic_state_2) { - dynamic_state_2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState2 = VK_TRUE, - .extendedDynamicState2LogicOp = ext_extended_dynamic_state_2_extra ? VK_TRUE : VK_FALSE, - .extendedDynamicState2PatchControlPoints = VK_FALSE, - }; - SetNext(next, dynamic_state_2); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 2"); - } - - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3; - if (ext_extended_dynamic_state_3) { - dynamic_state_3 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState3TessellationDomainOrigin = VK_FALSE, - .extendedDynamicState3DepthClampEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3PolygonMode = VK_FALSE, - .extendedDynamicState3RasterizationSamples = VK_FALSE, - .extendedDynamicState3SampleMask = VK_FALSE, - .extendedDynamicState3AlphaToCoverageEnable = VK_FALSE, - .extendedDynamicState3AlphaToOneEnable = VK_FALSE, - .extendedDynamicState3LogicOpEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEnable = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEquation = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorWriteMask = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3RasterizationStream = VK_FALSE, - .extendedDynamicState3ConservativeRasterizationMode = VK_FALSE, - .extendedDynamicState3ExtraPrimitiveOverestimationSize = VK_FALSE, - .extendedDynamicState3DepthClipEnable = VK_FALSE, - .extendedDynamicState3SampleLocationsEnable = VK_FALSE, - .extendedDynamicState3ColorBlendAdvanced = VK_FALSE, - .extendedDynamicState3ProvokingVertexMode = VK_FALSE, - .extendedDynamicState3LineRasterizationMode = VK_FALSE, - .extendedDynamicState3LineStippleEnable = VK_FALSE, - .extendedDynamicState3DepthClipNegativeOneToOne = VK_FALSE, - .extendedDynamicState3ViewportWScalingEnable = VK_FALSE, - .extendedDynamicState3ViewportSwizzle = VK_FALSE, - .extendedDynamicState3CoverageToColorEnable = VK_FALSE, - .extendedDynamicState3CoverageToColorLocation = VK_FALSE, - .extendedDynamicState3CoverageModulationMode = VK_FALSE, - .extendedDynamicState3CoverageModulationTableEnable = VK_FALSE, - .extendedDynamicState3CoverageModulationTable = VK_FALSE, - .extendedDynamicState3CoverageReductionMode = VK_FALSE, - .extendedDynamicState3RepresentativeFragmentTestEnable = VK_FALSE, - .extendedDynamicState3ShadingRateImageEnable = VK_FALSE, - }; - SetNext(next, dynamic_state_3); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 3"); - } - - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - if (ext_line_rasterization) { - line_raster = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, - .pNext = nullptr, - .rectangularLines = VK_TRUE, - .bresenhamLines = VK_FALSE, - .smoothLines = VK_TRUE, - .stippledRectangularLines = VK_FALSE, - .stippledBresenhamLines = VK_FALSE, - .stippledSmoothLines = VK_FALSE, - }; - SetNext(next, line_raster); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); - } - - if (!ext_conservative_rasterization) { - LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); - } - - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - if (ext_provoking_vertex) { - provoking_vertex = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .pNext = nullptr, - .provokingVertexLast = VK_TRUE, - .transformFeedbackPreservesProvokingVertex = VK_TRUE, - }; - SetNext(next, provoking_vertex); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); - } - - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; - if (ext_vertex_input_dynamic_state) { - vertex_input_dynamic = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .vertexInputDynamicState = VK_TRUE, - }; - SetNext(next, vertex_input_dynamic); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); - } - - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - if (ext_shader_atomic_int64) { - atomic_int64 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES, - .pNext = nullptr, - .shaderBufferInt64Atomics = VK_TRUE, - .shaderSharedInt64Atomics = VK_TRUE, - }; - SetNext(next, atomic_int64); - } - - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; - if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) { - workgroup_layout = { - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, - .pNext = nullptr, - .workgroupMemoryExplicitLayout = VK_TRUE, - .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, - .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, - .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, - }; - SetNext(next, workgroup_layout); - } else if (khr_workgroup_memory_explicit_layout) { - // TODO(lat9nq): Find a proper fix for this - LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a " - "yuzu bug when host driver does not support 16-bit integers"); - khr_workgroup_memory_explicit_layout = false; - } - - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - if (khr_pipeline_executable_properties) { - LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times"); - executable_properties = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR, - .pNext = nullptr, - .pipelineExecutableInfo = VK_TRUE, - }; - SetNext(next, executable_properties); - } - - if (!ext_depth_range_unrestricted) { - LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); - } - - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - if (ext_depth_clip_control) { - depth_clip_control_features = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT, - .pNext = nullptr, - .depthClipControl = VK_TRUE, - }; - SetNext(next, depth_clip_control_features); - } - - VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{}; + if (Settings::values.enable_nsight_aftermath && extensions.device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -744,33 +337,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; first_next = &diagnostics_nv; } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; - is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; - is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || - properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); + is_optimal_astc_supported = ComputeIsOptimalAstcSupported(); + is_warp_potentially_bigger = !extensions.subgroup_size_control || + properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize; + + is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; + is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || + properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + + supports_d24_depth = + IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); CollectPhysicalMemoryInfo(); - CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { - const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; - + if (is_nvidia) { + const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); switch (arch) { case NvidiaArchitecture::AmpereOrNewer: - LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; break; case NvidiaArchitecture::Turing: break; case NvidiaArchitecture::VoltaOrOlder: if (nv_major_version < 527) { - LOG_WARNING(Render_Vulkan, - "Blacklisting Volta and older from VK_KHR_push_descriptor"); - khr_push_descriptor = false; + LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); + extensions.push_descriptor = false; + loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } break; } @@ -779,75 +378,75 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR cant_blit_msaa = true; } } - const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; - if (ext_extended_dynamic_state && is_radv) { + if (extensions.extended_dynamic_state && is_radv) { // Mask driver version variant - const u32 version = (properties.driverVersion << 3) >> 3; + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { LOG_WARNING(Render_Vulkan, "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); } } - if (ext_vertex_input_dynamic_state && is_radv) { - // TODO(ameerj): Blacklist only offending driver versions - // TODO(ameerj): Confirm if RDNA1 is affected - const bool is_rdna2 = - IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); - if (is_rdna2) { - LOG_WARNING(Render_Vulkan, - "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); - ext_vertex_input_dynamic_state = false; - } - } - if (ext_extended_dynamic_state_2 && is_radv) { - const u32 version = (properties.driverVersion << 3) >> 3; + if (extensions.extended_dynamic_state2 && is_radv) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { LOG_WARNING( Render_Vulkan, "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2"); - ext_extended_dynamic_state_2 = false; - ext_extended_dynamic_state_2_extra = false; + features.extended_dynamic_state2.extendedDynamicState2 = false; + features.extended_dynamic_state2.extendedDynamicState2LogicOp = false; + features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false; + extensions.extended_dynamic_state2 = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); + } + } + if (extensions.vertex_input_dynamic_state && is_radv) { + // TODO(ameerj): Blacklist only offending driver versions + // TODO(ameerj): Confirm if RDNA1 is affected + const bool is_rdna2 = + supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); + if (is_rdna2) { + LOG_WARNING(Render_Vulkan, + "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - sets_per_pool = 64; - const bool is_amd = - driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; - if (is_amd) { + sets_per_pool = 64; + if (is_amd_driver) { // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. sets_per_pool = 96; // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { - LOG_WARNING( - Render_Vulkan, - "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); + if (!features.shader_float16_int8.shaderFloat16) { + LOG_WARNING(Render_Vulkan, + "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); has_broken_cube_compatibility = true; } } - const bool is_amd_or_radv = is_amd || is_radv; - if (ext_sampler_filter_minmax && is_amd_or_radv) { + if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { + if (!features.shader_float16_int8.shaderFloat16) { LOG_WARNING(Render_Vulkan, - "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); - ext_sampler_filter_minmax = false; + "AMD GCN4 and earlier have broken VK_EXT_sampler_filter_minmax"); + extensions.sampler_filter_minmax = false; + loaded_extensions.erase(VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME); } } - const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; - const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - if (ext_vertex_input_dynamic_state && is_intel_windows) { - const u32 version = (properties.driverVersion << 3) >> 3; + if (extensions.vertex_input_dynamic_state && is_intel_windows) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { - LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); - ext_vertex_input_dynamic_state = false; + LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state"); + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - if (is_float16_supported && is_intel_windows) { + if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Intel has broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; } if (is_intel_windows) { LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); @@ -857,10 +456,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); must_emulate_bgr565 = true; } + if (is_mvk) { + LOG_WARNING(Render_Vulkan, + "MVK driver breaks when using more than 16 vertex attributes/bindings"); + properties.properties.limits.maxVertexInputAttributes = + std::min(properties.properties.limits.maxVertexInputAttributes, 16U); + properties.properties.limits.maxVertexInputBindings = + std::min(properties.properties.limits.maxVertexInputBindings, 16U); + } - supports_d24_depth = - IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), + first_next, dld); graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); @@ -915,7 +521,7 @@ void Device::SaveShader(std::span spirv) const { } } -bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { +bool Device::ComputeIsOptimalAstcSupported() const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, @@ -933,7 +539,7 @@ bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) co VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, }; - if (!features.textureCompressionASTC_LDR) { + if (!features.features.textureCompressionASTC_LDR) { return false; } const auto format_feature_usage{ @@ -971,7 +577,7 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want } std::string Device::GetDriverName() const { - switch (driver_id) { + switch (properties.driver.driverID) { case VK_DRIVER_ID_AMD_PROPRIETARY: return "AMD"; case VK_DRIVER_ID_AMD_OPEN_SOURCE: @@ -987,522 +593,336 @@ std::string Device::GetDriverName() const { case VK_DRIVER_ID_MESA_LLVMPIPE: return "LAVAPIPE"; default: - return vendor_name; + return properties.driver.driverName; } } bool Device::ShouldBoostClocks() const { + const auto driver_id = properties.driver.driverID; + const auto vendor_id = properties.properties.vendorID; + const auto device_id = properties.properties.deviceID; + const bool validated_driver = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - const bool is_steam_deck = properties.vendorID == 0x1002 && properties.deviceID == 0x163F; + const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; return validated_driver && !is_steam_deck; } -static std::vector ExtensionsRequiredForInstanceVersion(u32 available_version) { - std::vector extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; +bool Device::GetSuitability(bool requires_swapchain) { + // Assume we will be suitable. + bool suitable = true; - if (available_version < VK_API_VERSION_1_2) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_2.end()); - } + // Configure properties. + properties.properties = physical.GetProperties(); - if (available_version < VK_API_VERSION_1_3) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_3.end()); - } + // Set instance version. + instance_version = properties.properties.apiVersion; - return extensions; -} - -void Device::CheckSuitability(bool requires_swapchain) const { - std::vector required_extensions = - ExtensionsRequiredForInstanceVersion(instance_version); - std::vector available_extensions; - - if (requires_swapchain) { - required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - } + // Minimum of API version 1.1 is required. (This is well-supported.) + ASSERT(instance_version >= VK_API_VERSION_1_1); + // Get available extensions. auto extension_properties = physical.EnumerateDeviceExtensionProperties(); + // Get the set of supported extensions. + supported_extensions.clear(); for (const VkExtensionProperties& property : extension_properties) { - available_extensions.push_back(property.extensionName); + supported_extensions.insert(property.extensionName); } - bool has_all_required_extensions = true; - for (const char* requirement_name : required_extensions) { - const bool found = - std::ranges::any_of(available_extensions, [&](const char* extension_name) { - return std::strcmp(requirement_name, extension_name) == 0; - }); + // Generate list of extensions to load. + loaded_extensions.clear(); - if (!found) { - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); - has_all_required_extensions = false; - } +#define EXTENSION(prefix, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ + } +#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ } - if (!has_all_required_extensions) { - throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); + if (instance_version < VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE_EXTENSION); + } + if (instance_version < VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE_EXTENSION); } - struct LimitTuple { + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); + FOR_EACH_VK_EXTENSION(EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); +#endif + +#undef FEATURE_EXTENSION +#undef EXTENSION + + // Some extensions are mandatory. Check those. +#define CHECK_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \ + suitable = false; \ + } + +#define LOG_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \ + } + + FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); + FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION); +#else + FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION); +#endif + + if (requires_swapchain) { + CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } + +#undef LOG_EXTENSION +#undef CHECK_EXTENSION + + // Generate the linked list of features to test. + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + + // Set next pointer. + void** next = &features2.pNext; + + // Test all features we know about. If the feature is not available in core at our + // current API version, and was not enabled by an extension, skip testing the feature. + // We set the structure sType explicitly here as it is zeroed by the constructor. +#define FEATURE(prefix, struct_name, macro_name, var_name) \ + features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \ + SetNext(next, features.var_name); + +#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \ + if (extensions.var_name) { \ + features.var_name.sType = \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \ + SetNext(next, features.var_name); \ + } + + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_EXT(EXT_FEATURE); + if (instance_version >= VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_2(EXT_FEATURE); + } + if (instance_version >= VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_3(EXT_FEATURE); + } + +#undef EXT_FEATURE +#undef FEATURE + + // Perform the feature test. + physical.GetFeatures2(features2); + features.features = features2.features; + + // Some features are mandatory. Check those. +#define CHECK_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \ + suitable = false; \ + } + +#define LOG_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \ + } + + FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE); + FOR_EACH_VK_MANDATORY_FEATURE(CHECK_FEATURE); + +#undef LOG_FEATURE +#undef CHECK_FEATURE + + // Generate linked list of properties. + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + // Set next pointer. + next = &properties2.pNext; + + // Get driver info. + properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + SetNext(next, properties.driver); + + // Retrieve relevant extension properties. + if (extensions.shader_float_controls) { + properties.float_controls.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; + SetNext(next, properties.float_controls); + } + if (extensions.push_descriptor) { + properties.push_descriptor.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + SetNext(next, properties.push_descriptor); + } + if (extensions.subgroup_size_control) { + properties.subgroup_size_control.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; + SetNext(next, properties.subgroup_size_control); + } + if (extensions.transform_feedback) { + properties.transform_feedback.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + SetNext(next, properties.transform_feedback); + } + + // Perform the property fetch. + physical.GetProperties2(properties2); + properties.properties = properties2.properties; + + // Unload extensions if feature support is insufficient. + RemoveUnsuitableExtensions(); + + // Check limits. + struct Limit { u32 minimum; u32 value; const char* name; }; - const VkPhysicalDeviceLimits& limits{properties.limits}; + + const VkPhysicalDeviceLimits& limits{properties.properties.limits}; const std::array limits_report{ - LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, - LimitTuple{16, limits.maxViewports, "maxViewports"}, - LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, - LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, - }; - for (const auto& tuple : limits_report) { - if (tuple.value < tuple.minimum) { - LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, - tuple.minimum, tuple.value); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } - } - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{}; - demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES; - demote.pNext = nullptr; - - VkPhysicalDeviceVariablePointerFeatures variable_pointers{}; - variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES; - variable_pointers.pNext = &demote; - - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; - robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &variable_pointers; - - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{}; - timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; - timeline_semaphore.pNext = &robustness2; - - VkPhysicalDevice16BitStorageFeatures bit16_storage{}; - bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; - bit16_storage.pNext = &timeline_semaphore; - - VkPhysicalDevice8BitStorageFeatures bit8_storage{}; - bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; - bit8_storage.pNext = &bit16_storage; - - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{}; - host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; - host_query_reset.pNext = &bit8_storage; - - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{}; - draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES; - draw_parameters.pNext = &host_query_reset; - - VkPhysicalDeviceFeatures2 features2{}; - features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - features2.pNext = &draw_parameters; - - physical.GetFeatures2(features2); - - const VkPhysicalDeviceFeatures& features{features2.features}; - std::array feature_report{ - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), - std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.imageCubeArray, "imageCubeArray"), - std::make_pair(features.independentBlend, "independentBlend"), - std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"), - std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"), - std::make_pair(features.depthClamp, "depthClamp"), - std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), - std::make_pair(features.largePoints, "largePoints"), - std::make_pair(features.multiViewport, "multiViewport"), - std::make_pair(features.depthBiasClamp, "depthBiasClamp"), - std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), - std::make_pair(features.wideLines, "wideLines"), - std::make_pair(features.geometryShader, "geometryShader"), - std::make_pair(features.tessellationShader, "tessellationShader"), - std::make_pair(features.sampleRateShading, "sampleRateShading"), - std::make_pair(features.dualSrcBlend, "dualSrcBlend"), - std::make_pair(features.logicOp, "logicOp"), - std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), - std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), - std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), - std::make_pair(features.shaderStorageImageWriteWithoutFormat, - "shaderStorageImageWriteWithoutFormat"), - std::make_pair(features.shaderClipDistance, "shaderClipDistance"), - std::make_pair(features.shaderCullDistance, "shaderCullDistance"), - std::make_pair(variable_pointers.variablePointers, "variablePointers"), - std::make_pair(variable_pointers.variablePointersStorageBuffer, - "variablePointersStorageBuffer"), - std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), - std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), - std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), - std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), - std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"), - std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"), - std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess, - "uniformAndStorageBuffer16BitAccess"), - std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"), - std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess, - "uniformAndStorageBuffer8BitAccess"), - std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"), - std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"), + Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + Limit{16, limits.maxViewports, "maxViewports"}, + Limit{8, limits.maxColorAttachments, "maxColorAttachments"}, + Limit{8, limits.maxClipDistances, "maxClipDistances"}, }; - bool has_all_required_features = true; - for (const auto& [is_supported, name] : feature_report) { - if (!is_supported) { - LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - has_all_required_features = false; + for (const auto& [min, value, name] : limits_report) { + if (value < min) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", name, min, value); + suitable = false; } } - if (!has_all_required_features) { - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + // Return whether we were suitable. + return suitable; +} + +void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) { + if (loaded_extensions.contains(extension_name) && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name); + loaded_extensions.erase(extension_name); } } -std::vector Device::LoadExtensions(bool requires_surface) { - std::vector extensions = ExtensionsRequiredForInstanceVersion(instance_version); - if (requires_surface) { - extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - } +void Device::RemoveUnsuitableExtensions() { + // VK_EXT_custom_border_color + extensions.custom_border_color = features.custom_border_color.customBorderColors && + features.custom_border_color.customBorderColorWithoutFormat; + RemoveExtensionIfUnsuitable(extensions.custom_border_color, + VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - bool has_khr_shader_float16_int8{}; - bool has_khr_workgroup_memory_explicit_layout{}; - bool has_khr_pipeline_executable_properties{}; - bool has_khr_image_format_list{}; - bool has_khr_swapchain_mutable_format{}; - bool has_ext_subgroup_size_control{}; - bool has_ext_transform_feedback{}; - bool has_ext_custom_border_color{}; - bool has_ext_extended_dynamic_state{}; - bool has_ext_extended_dynamic_state_2{}; - bool has_ext_extended_dynamic_state_3{}; - bool has_ext_shader_atomic_int64{}; - bool has_ext_provoking_vertex{}; - bool has_ext_vertex_input_dynamic_state{}; - bool has_ext_line_rasterization{}; - bool has_ext_primitive_topology_list_restart{}; - bool has_ext_depth_clip_control{}; - for (const std::string& extension : supported_extensions) { - const auto test = [&](std::optional> status, const char* name, - bool push) { - if (extension != name) { - return; - } - if (push) { - extensions.push_back(name); - } - if (status) { - status->get() = true; - } - }; - test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); - test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); - test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, - true); - test(khr_uniform_buffer_standard_layout, - VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); - test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); - test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); - test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true); - test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); - test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); - test(has_ext_primitive_topology_list_restart, - VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true); - test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); - test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, - true); - test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); - test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); - test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, - true); - test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false); - test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); - test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state_2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, - false); - test(has_ext_extended_dynamic_state_3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME, - false); - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, true); - test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); - test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, - false); - test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); - test(has_khr_workgroup_memory_explicit_layout, - VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); - test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, - false); - test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); - test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true); - if (Settings::values.enable_nsight_aftermath) { - test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, - true); - } - if (Settings::values.renderer_shader_feedback) { - test(has_khr_pipeline_executable_properties, - VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false); - } - } - VkPhysicalDeviceFeatures2 features{}; - features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + // VK_EXT_depth_clip_control + extensions.depth_clip_control = features.depth_clip_control.depthClipControl; + RemoveExtensionIfUnsuitable(extensions.depth_clip_control, + VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); - VkPhysicalDeviceProperties2 physical_properties{}; - physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + // VK_EXT_extended_dynamic_state + extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state, + VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - if (has_khr_shader_float16_int8) { - VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features; - float16_int8_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; - float16_int8_features.pNext = nullptr; - features.pNext = &float16_int8_features; + // VK_EXT_extended_dynamic_state2 + extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state2, + VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - physical.GetFeatures2(features); - is_float16_supported = float16_int8_features.shaderFloat16; - is_int8_supported = float16_int8_features.shaderInt8; - extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); - } - if (has_ext_subgroup_size_control) { - VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features; - subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES; - subgroup_features.pNext = nullptr; - features.pNext = &subgroup_features; - physical.GetFeatures2(features); + // VK_EXT_extended_dynamic_state3 + dynamic_state3_blending = + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable && + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation && + features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask; + dynamic_state3_enables = + features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable && + features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable; - VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties; - subgroup_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; - subgroup_properties.pNext = nullptr; - physical_properties.pNext = &subgroup_properties; - physical.GetProperties2(physical_properties); + extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables; + dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3; + dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state3, + VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + // VK_EXT_provoking_vertex + extensions.provoking_vertex = + features.provoking_vertex.provokingVertexLast && + features.provoking_vertex.transformFeedbackPreservesProvokingVertex; + RemoveExtensionIfUnsuitable(extensions.provoking_vertex, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - if (subgroup_features.subgroupSizeControl && - subgroup_properties.minSubgroupSize <= GuestWarpSize && - subgroup_properties.maxSubgroupSize >= GuestWarpSize) { - extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; - ext_subgroup_size_control = true; - } + // VK_KHR_shader_atomic_int64 + extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && + features.shader_atomic_int64.shaderSharedInt64Atomics; + RemoveExtensionIfUnsuitable(extensions.shader_atomic_int64, + VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + + // VK_EXT_shader_demote_to_helper_invocation + extensions.shader_demote_to_helper_invocation = + features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation; + RemoveExtensionIfUnsuitable(extensions.shader_demote_to_helper_invocation, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + + // VK_EXT_subgroup_size_control + extensions.subgroup_size_control = + features.subgroup_size_control.subgroupSizeControl && + properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize && + properties.subgroup_size_control.maxSubgroupSize >= GuestWarpSize; + RemoveExtensionIfUnsuitable(extensions.subgroup_size_control, + VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + + // VK_EXT_transform_feedback + extensions.transform_feedback = + features.transform_feedback.transformFeedback && + features.transform_feedback.geometryStreams && + properties.transform_feedback.maxTransformFeedbackStreams >= 4 && + properties.transform_feedback.maxTransformFeedbackBuffers > 0 && + properties.transform_feedback.transformFeedbackQueries && + properties.transform_feedback.transformFeedbackDraw; + RemoveExtensionIfUnsuitable(extensions.transform_feedback, + VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + + // VK_EXT_vertex_input_dynamic_state + extensions.vertex_input_dynamic_state = + features.vertex_input_dynamic_state.vertexInputDynamicState; + RemoveExtensionIfUnsuitable(extensions.vertex_input_dynamic_state, + VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + + // VK_KHR_pipeline_executable_properties + if (Settings::values.renderer_shader_feedback.GetValue()) { + extensions.pipeline_executable_properties = + features.pipeline_executable_properties.pipelineExecutableInfo; + RemoveExtensionIfUnsuitable(extensions.pipeline_executable_properties, + VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); } else { - is_warp_potentially_bigger = true; - } - if (has_ext_provoking_vertex) { - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; - provoking_vertex.pNext = nullptr; - features.pNext = &provoking_vertex; - physical.GetFeatures2(features); - - if (provoking_vertex.provokingVertexLast && - provoking_vertex.transformFeedbackPreservesProvokingVertex) { - extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - ext_provoking_vertex = true; - } - } - if (has_ext_vertex_input_dynamic_state) { - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; - vertex_input.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; - vertex_input.pNext = nullptr; - features.pNext = &vertex_input; - physical.GetFeatures2(features); - - if (vertex_input.vertexInputDynamicState) { - extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - ext_vertex_input_dynamic_state = true; - } - } - if (has_ext_shader_atomic_int64) { - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES; - atomic_int64.pNext = nullptr; - features.pNext = &atomic_int64; - physical.GetFeatures2(features); - - if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { - extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); - ext_shader_atomic_int64 = true; - } - } - if (has_ext_transform_feedback) { - VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; - tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - tfb_features.pNext = nullptr; - features.pNext = &tfb_features; - physical.GetFeatures2(features); - - VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; - tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; - tfb_properties.pNext = nullptr; - physical_properties.pNext = &tfb_properties; - physical.GetProperties2(physical_properties); - - if (tfb_features.transformFeedback && tfb_features.geometryStreams && - tfb_properties.maxTransformFeedbackStreams >= 4 && - tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && - tfb_properties.transformFeedbackDraw) { - extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); - ext_transform_feedback = true; - } - } - if (has_ext_custom_border_color) { - VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; - border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - border_features.pNext = nullptr; - features.pNext = &border_features; - physical.GetFeatures2(features); - - if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) { - extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - ext_custom_border_color = true; - } - } - if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; - extended_dynamic_state.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - extended_dynamic_state.pNext = nullptr; - features.pNext = &extended_dynamic_state; - physical.GetFeatures2(features); - - if (extended_dynamic_state.extendedDynamicState) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - ext_extended_dynamic_state = true; - } - } - if (has_ext_extended_dynamic_state_2) { - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT extended_dynamic_state_2; - extended_dynamic_state_2.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT; - extended_dynamic_state_2.pNext = nullptr; - features.pNext = &extended_dynamic_state_2; - physical.GetFeatures2(features); - - if (extended_dynamic_state_2.extendedDynamicState2) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - ext_extended_dynamic_state_2 = true; - ext_extended_dynamic_state_2_extra = - extended_dynamic_state_2.extendedDynamicState2LogicOp; - } - } - if (has_ext_extended_dynamic_state_3) { - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT extended_dynamic_state_3; - extended_dynamic_state_3.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT; - extended_dynamic_state_3.pNext = nullptr; - features.pNext = &extended_dynamic_state_3; - physical.GetFeatures2(features); - - ext_extended_dynamic_state_3_blend = - extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && - extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && - extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; - - ext_extended_dynamic_state_3_enables = - extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && - extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; - - ext_extended_dynamic_state_3 = - ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; - if (ext_extended_dynamic_state_3) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - } - } - if (has_ext_line_rasterization) { - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; - line_raster.pNext = nullptr; - features.pNext = &line_raster; - physical.GetFeatures2(features); - if (line_raster.rectangularLines && line_raster.smoothLines) { - extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); - ext_line_rasterization = true; - } - } - if (has_ext_depth_clip_control) { - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - depth_clip_control_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; - depth_clip_control_features.pNext = nullptr; - features.pNext = &depth_clip_control_features; - physical.GetFeatures2(features); - - if (depth_clip_control_features.depthClipControl) { - extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); - ext_depth_clip_control = true; - } - } - if (has_khr_workgroup_memory_explicit_layout) { - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; - layout.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; - layout.pNext = nullptr; - features.pNext = &layout; - physical.GetFeatures2(features); - - if (layout.workgroupMemoryExplicitLayout && - layout.workgroupMemoryExplicitLayout8BitAccess && - layout.workgroupMemoryExplicitLayout16BitAccess && - layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { - extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); - khr_workgroup_memory_explicit_layout = true; - } - } - if (has_khr_pipeline_executable_properties) { - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - executable_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; - executable_properties.pNext = nullptr; - features.pNext = &executable_properties; - physical.GetFeatures2(features); - - if (executable_properties.pipelineExecutableInfo) { - extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); - khr_pipeline_executable_properties = true; - } - } - if (has_ext_primitive_topology_list_restart) { - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{}; - primitive_topology_list_restart.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT; - primitive_topology_list_restart.pNext = nullptr; - features.pNext = &primitive_topology_list_restart; - physical.GetFeatures2(features); - - is_topology_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyListRestart; - is_patch_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyPatchListRestart; - } - if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) { - extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); - extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); - khr_swapchain_mutable_format = true; - } - if (khr_push_descriptor) { - VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; - push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; - push_descriptor.pNext = nullptr; - - physical_properties.pNext = &push_descriptor; - physical.GetProperties2(physical_properties); - - max_push_descriptors = push_descriptor.maxPushDescriptors; + extensions.pipeline_executable_properties = false; + loaded_extensions.erase(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); } - has_null_descriptor = true; - - return extensions; + // VK_KHR_workgroup_memory_explicit_layout + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; + RemoveExtensionIfUnsuitable(extensions.workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); } void Device::SetupFamilies(VkSurfaceKHR surface) { @@ -1540,53 +960,6 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } } -void Device::SetupFeatures() { - const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; - is_depth_bounds_supported = features.depthBounds; - is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; - is_shader_float64_supported = features.shaderFloat64; - is_shader_int64_supported = features.shaderInt64; - is_shader_int16_supported = features.shaderInt16; - is_shader_storage_image_multisample = features.shaderStorageImageMultisample; - is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(features); - - const VkPhysicalDeviceLimits& limits{properties.limits}; - max_vertex_input_attributes = limits.maxVertexInputAttributes; - max_vertex_input_bindings = limits.maxVertexInputBindings; -} - -void Device::SetupProperties() { - float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; - - VkPhysicalDeviceProperties2KHR properties2{}; - properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - properties2.pNext = &float_controls; - - physical.GetProperties2(properties2); -} - -void Device::CollectTelemetryParameters() { - VkPhysicalDeviceDriverProperties driver{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, - .pNext = nullptr, - .driverID = {}, - .driverName = {}, - .driverInfo = {}, - .conformanceVersion = {}, - }; - - VkPhysicalDeviceProperties2 device_properties{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = &driver, - .properties = {}, - }; - physical.GetProperties2(device_properties); - - driver_id = driver.driverID; - vendor_name = driver.driverName; -} - u64 Device::GetDeviceMemoryUsage() const { VkPhysicalDeviceMemoryBudgetPropertiesEXT budget; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; @@ -1602,7 +975,8 @@ u64 Device::GetDeviceMemoryUsage() const { void Device::CollectPhysicalMemoryInfo() { VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; - const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); + const auto mem_info = + physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr); const auto& mem_properties = mem_info.memoryProperties; const size_t num_properties = mem_properties.memoryHeapCount; device_access_memory = 0; @@ -1618,7 +992,7 @@ void Device::CollectPhysicalMemoryInfo() { if (is_heap_local) { local_memory += mem_properties.memoryHeaps[element].size; } - if (ext_memory_budget) { + if (extensions.memory_budget) { device_initial_usage += budget.heapUsage[element]; device_access_memory += budget.heapBudget[element]; continue; @@ -1634,7 +1008,7 @@ void Device::CollectPhysicalMemoryInfo() { } void Device::CollectToolingInfo() { - if (!ext_tooling_info) { + if (!extensions.tooling_info) { return; } auto tools{physical.GetPhysicalDeviceToolProperties()}; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4bc2671..0662a2d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -11,6 +12,156 @@ #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +// Define all features which may be used by the implementation here. +// Vulkan version in the macro describes the minimum version required for feature availability. +// If the Vulkan version is lower than the required version, the named extension is required. +#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \ + FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \ + FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \ + FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \ + FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \ + FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \ + FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \ + uniform_buffer_standard_layout) \ + FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer) + +#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \ + FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \ + FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \ + FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore) + +#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \ + FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \ + shader_demote_to_helper_invocation) + +// Define all features which may be used by the implementation and require an extension here. +#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \ + FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \ + FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \ + FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ + FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ + FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ + FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ + FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ + primitive_topology_list_restart) \ + FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ + FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \ + FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \ + FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \ + FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \ + pipeline_executable_properties) \ + FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \ + workgroup_memory_explicit_layout) + +// Define miscellaneous extensions which may be used by the implementation here. +#define FOR_EACH_VK_EXTENSION(EXTENSION) \ + EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ + EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ + EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ + EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \ + EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \ + EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \ + EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \ + EXTENSION(EXT, TOOLING_INFO, tooling_info) \ + EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ + EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \ + EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ + EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \ + EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ + EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ + EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ + EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ + EXTENSION(KHR, SWAPCHAIN, swapchain) \ + EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ + EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ + EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ + EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ + EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) + +#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \ + EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32) + +// Define extensions which must be supported. +#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME) + +// Define extensions where the absence of the extension may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME) + +// Define features which must be supported. +#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \ + FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \ + FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \ + FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \ + FEATURE_NAME(features, depthBiasClamp) \ + FEATURE_NAME(features, depthClamp) \ + FEATURE_NAME(features, drawIndirectFirstInstance) \ + FEATURE_NAME(features, dualSrcBlend) \ + FEATURE_NAME(features, fillModeNonSolid) \ + FEATURE_NAME(features, fragmentStoresAndAtomics) \ + FEATURE_NAME(features, geometryShader) \ + FEATURE_NAME(features, imageCubeArray) \ + FEATURE_NAME(features, independentBlend) \ + FEATURE_NAME(features, largePoints) \ + FEATURE_NAME(features, logicOp) \ + FEATURE_NAME(features, multiDrawIndirect) \ + FEATURE_NAME(features, multiViewport) \ + FEATURE_NAME(features, occlusionQueryPrecise) \ + FEATURE_NAME(features, robustBufferAccess) \ + FEATURE_NAME(features, samplerAnisotropy) \ + FEATURE_NAME(features, sampleRateShading) \ + FEATURE_NAME(features, shaderClipDistance) \ + FEATURE_NAME(features, shaderCullDistance) \ + FEATURE_NAME(features, shaderImageGatherExtended) \ + FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \ + FEATURE_NAME(features, tessellationShader) \ + FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \ + FEATURE_NAME(features, wideLines) \ + FEATURE_NAME(host_query_reset, hostQueryReset) \ + FEATURE_NAME(robustness2, nullDescriptor) \ + FEATURE_NAME(robustness2, robustBufferAccess2) \ + FEATURE_NAME(robustness2, robustImageAccess2) \ + FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ + FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ + FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ + FEATURE_NAME(variable_pointer, variablePointers) \ + FEATURE_NAME(variable_pointer, variablePointersStorageBuffer) + +// Define features where the absence of the feature may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(custom_border_color, customBorderColors) \ + FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \ + FEATURE_NAME(index_type_uint8, indexTypeUint8) \ + FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ + FEATURE_NAME(provoking_vertex, provokingVertexLast) \ + FEATURE_NAME(shader_float16_int8, shaderFloat16) \ + FEATURE_NAME(shader_float16_int8, shaderInt8) \ + FEATURE_NAME(transform_feedback, transformFeedback) \ + FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \ + FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState) + namespace Vulkan { class NsightAftermathTracker; @@ -88,69 +239,69 @@ public: /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. u32 ApiVersion() const { - return properties.apiVersion; + return properties.properties.apiVersion; } /// Returns the current driver version provided in Vulkan-formatted version numbers. u32 GetDriverVersion() const { - return properties.driverVersion; + return properties.properties.driverVersion; } /// Returns the device name. std::string_view GetModelName() const { - return properties.deviceName; + return properties.properties.deviceName; } /// Returns the driver ID. VkDriverIdKHR GetDriverID() const { - return driver_id; + return properties.driver.driverID; } bool ShouldBoostClocks() const; /// Returns uniform buffer alignment requeriment. VkDeviceSize GetUniformBufferAlignment() const { - return properties.limits.minUniformBufferOffsetAlignment; + return properties.properties.limits.minUniformBufferOffsetAlignment; } /// Returns storage alignment requeriment. VkDeviceSize GetStorageBufferAlignment() const { - return properties.limits.minStorageBufferOffsetAlignment; + return properties.properties.limits.minStorageBufferOffsetAlignment; } /// Returns the maximum range for storage buffers. VkDeviceSize GetMaxStorageBufferRange() const { - return properties.limits.maxStorageBufferRange; + return properties.properties.limits.maxStorageBufferRange; } /// Returns the maximum size for push constants. VkDeviceSize GetMaxPushConstantsSize() const { - return properties.limits.maxPushConstantsSize; + return properties.properties.limits.maxPushConstantsSize; } /// Returns the maximum size for shared memory. u32 GetMaxComputeSharedMemorySize() const { - return properties.limits.maxComputeSharedMemorySize; + return properties.properties.limits.maxComputeSharedMemorySize; } /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { - return float_controls; + return properties.float_controls; } /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { - return is_optimal_astc_supported; + return features.features.textureCompressionASTC_LDR; } /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { - return is_float16_supported; + return features.shader_float16_int8.shaderFloat16; } /// Returns true if the device supports int8 natively. bool IsInt8Supported() const { - return is_int8_supported; + return features.shader_float16_int8.shaderInt8; } /// Returns true if the device warp size can potentially be bigger than guest's warp size. @@ -160,32 +311,32 @@ public: /// Returns true if the device can be forced to use the guest warp size. bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { - return guest_warp_stages & stage; + return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; } /// Returns the maximum number of push descriptors. u32 MaxPushDescriptors() const { - return max_push_descriptors; + return properties.push_descriptor.maxPushDescriptors; } /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { - return is_formatless_image_load_supported; + return features.features.shaderStorageImageReadWithoutFormat; } /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { - return is_shader_int64_supported; + return features.features.shaderInt64; } /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { - return is_shader_int16_supported; + return features.features.shaderInt16; } // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { - return is_depth_bounds_supported; + return features.features.depthBounds; } /// Returns true when blitting from and to depth stencil images is supported. @@ -195,151 +346,151 @@ public: /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { - return nv_viewport_swizzle; + return extensions.viewport_swizzle; } /// Returns true if the device supports VK_NV_viewport_array2. bool IsNvViewportArray2Supported() const { - return nv_viewport_array2; + return extensions.viewport_array2; } /// Returns true if the device supports VK_NV_geometry_shader_passthrough. bool IsNvGeometryShaderPassthroughSupported() const { - return nv_geometry_shader_passthrough; + return extensions.geometry_shader_passthrough; } /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { - return khr_uniform_buffer_standard_layout; + return extensions.uniform_buffer_standard_layout; } /// Returns true if the device supports VK_KHR_push_descriptor. bool IsKhrPushDescriptorSupported() const { - return khr_push_descriptor; + return extensions.push_descriptor; } /// Returns true if VK_KHR_pipeline_executable_properties is enabled. bool IsKhrPipelineExecutablePropertiesEnabled() const { - return khr_pipeline_executable_properties; + return extensions.pipeline_executable_properties; } /// Returns true if VK_KHR_swapchain_mutable_format is enabled. bool IsKhrSwapchainMutableFormatEnabled() const { - return khr_swapchain_mutable_format; + return extensions.swapchain_mutable_format; } /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { - return khr_workgroup_memory_explicit_layout; + return extensions.workgroup_memory_explicit_layout; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsTopologyListPrimitiveRestartSupported() const { - return is_topology_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyListRestart; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsPatchListPrimitiveRestartSupported() const { - return is_patch_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart; } /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { - return ext_index_type_uint8; + return extensions.index_type_uint8; } /// Returns true if the device supports VK_EXT_sampler_filter_minmax. bool IsExtSamplerFilterMinmaxSupported() const { - return ext_sampler_filter_minmax; + return extensions.sampler_filter_minmax; } /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { - return ext_depth_range_unrestricted; + return extensions.depth_range_unrestricted; } /// Returns true if the device supports VK_EXT_depth_clip_control. bool IsExtDepthClipControlSupported() const { - return ext_depth_clip_control; + return extensions.depth_clip_control; } /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. bool IsExtShaderViewportIndexLayerSupported() const { - return ext_shader_viewport_index_layer; + return extensions.shader_viewport_index_layer; } /// Returns true if the device supports VK_EXT_subgroup_size_control. bool IsExtSubgroupSizeControlSupported() const { - return ext_subgroup_size_control; + return extensions.subgroup_size_control; } /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { - return ext_transform_feedback; + return extensions.transform_feedback; } /// Returns true if the device supports VK_EXT_custom_border_color. bool IsExtCustomBorderColorSupported() const { - return ext_custom_border_color; + return extensions.custom_border_color; } /// Returns true if the device supports VK_EXT_extended_dynamic_state. bool IsExtExtendedDynamicStateSupported() const { - return ext_extended_dynamic_state; + return extensions.extended_dynamic_state; } /// Returns true if the device supports VK_EXT_extended_dynamic_state2. bool IsExtExtendedDynamicState2Supported() const { - return ext_extended_dynamic_state_2; + return extensions.extended_dynamic_state2; } bool IsExtExtendedDynamicState2ExtrasSupported() const { - return ext_extended_dynamic_state_2_extra; + return features.extended_dynamic_state2.extendedDynamicState2LogicOp; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3Supported() const { - return ext_extended_dynamic_state_3; + return extensions.extended_dynamic_state3; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3BlendingSupported() const { - return ext_extended_dynamic_state_3_blend; + return dynamic_state3_blending; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3EnablesSupported() const { - return ext_extended_dynamic_state_3_enables; + return dynamic_state3_enables; } /// Returns true if the device supports VK_EXT_line_rasterization. bool IsExtLineRasterizationSupported() const { - return ext_line_rasterization; + return extensions.line_rasterization; } /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { - return ext_vertex_input_dynamic_state; + return extensions.vertex_input_dynamic_state; } /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { - return ext_shader_stencil_export; + return extensions.shader_stencil_export; } /// Returns true if the device supports VK_EXT_conservative_rasterization. bool IsExtConservativeRasterizationSupported() const { - return ext_conservative_rasterization; + return extensions.conservative_rasterization; } /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { - return ext_provoking_vertex; + return extensions.provoking_vertex; } /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { - return ext_shader_atomic_int64; + return extensions.shader_atomic_int64; } /// Returns the minimum supported version of SPIR-V. @@ -347,7 +498,7 @@ public: if (instance_version >= VK_API_VERSION_1_3) { return 0x00010600U; } - if (khr_spirv_1_4) { + if (extensions.spirv_1_4) { return 0x00010400U; } return 0x00010000U; @@ -365,11 +516,11 @@ public: /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { - return vendor_name; + return properties.driver.driverName; } /// Returns the list of available extensions. - const std::vector& GetAvailableExtensions() const { + const std::set>& GetAvailableExtensions() const { return supported_extensions; } @@ -378,7 +529,7 @@ public: } bool CanReportMemoryUsage() const { - return ext_memory_budget; + return extensions.memory_budget; } u64 GetDeviceMemoryUsage() const; @@ -400,36 +551,29 @@ public: } bool HasNullDescriptor() const { - return has_null_descriptor; + return features.robustness2.nullDescriptor; } u32 GetMaxVertexInputAttributes() const { - return max_vertex_input_attributes; + return properties.properties.limits.maxVertexInputAttributes; } u32 GetMaxVertexInputBindings() const { - return max_vertex_input_bindings; + return properties.properties.limits.maxVertexInputBindings; } private: - /// Checks if the physical device is suitable. - void CheckSuitability(bool requires_swapchain) const; + /// Checks if the physical device is suitable and configures the object state + /// with all necessary info about its properties. + bool GetSuitability(bool requires_swapchain); - /// Loads extensions into a vector and stores available ones in this object. - std::vector LoadExtensions(bool requires_surface); + // Remove extensions which have incomplete feature support. + void RemoveUnsuitableExtensions(); + void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name); /// Sets up queue families. void SetupFamilies(VkSurfaceKHR surface); - /// Sets up device features. - void SetupFeatures(); - - /// Sets up device properties. - void SetupProperties(); - - /// Collects telemetry information from the device. - void CollectTelemetryParameters(); - /// Collects information about attached tools. void CollectToolingInfo(); @@ -440,91 +584,93 @@ private: std::vector GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + bool ComputeIsOptimalAstcSupported() const; /// Returns true if the device natively supports blitting depth stencil images. bool TestDepthStencilBlits() const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - VkDriverIdKHR driver_id{}; ///< Driver ID. - VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. - u64 device_access_memory{}; ///< Total size of device local memory in bytes. - u32 max_push_descriptors{}; ///< Maximum number of push descriptors - u32 sets_per_pool{}; ///< Sets per Description Pool - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetic. - bool is_int8_supported{}; ///< Support for int8 arithmetic. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. - bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. - bool is_depth_bounds_supported{}; ///< Support for depth bounds. - bool is_shader_float64_supported{}; ///< Support for float64. - bool is_shader_int64_supported{}; ///< Support for int64. - bool is_shader_int16_supported{}; ///< Support for int16. - bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. - bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. - bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list - ///< topologies. - bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. - bool is_integrated{}; ///< Is GPU an iGPU. - bool is_virtual{}; ///< Is GPU a virtual GPU. - bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. - bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. - bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. - bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. - bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. - bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. - bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. - bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. - bool khr_pipeline_executable_properties{}; ///< Support for executable properties. - bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. - bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. - bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. - bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. - bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - bool has_null_descriptor{}; ///< Has support for null descriptors. - u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline - u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline +private: + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan instance version. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + + struct Extensions { +#define EXTENSION(prefix, macro_name, var_name) bool var_name{}; +#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_1_2(FEATURE); + FOR_EACH_VK_FEATURE_1_3(FEATURE); + FOR_EACH_VK_FEATURE_EXT(FEATURE); + FOR_EACH_VK_EXTENSION(EXTENSION); + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); + +#undef EXTENSION +#undef FEATURE + }; + + struct Features { +#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features var_name{}; +#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features##prefix var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE); + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT); + +#undef FEATURE_CORE +#undef FEATURE_EXT + + VkPhysicalDeviceFeatures features{}; + }; + + struct Properties { + VkPhysicalDeviceDriverProperties driver{}; + VkPhysicalDeviceFloatControlsProperties float_controls{}; + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; + VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; + + VkPhysicalDeviceProperties properties{}; + }; + + Extensions extensions{}; + Features features{}; + Properties properties{}; + + VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceProperties2 properties2{}; + + // Misc features + bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_integrated{}; ///< Is GPU an iGPU. + bool is_virtual{}; ///< Is GPU a virtual GPU. + bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. + bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 sets_per_pool{}; ///< Sets per Description Pool // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector supported_extensions; ///< Reported Vulkan extensions. - std::vector valid_heap_memory; ///< Heaps used. + std::set> supported_extensions; ///< Reported Vulkan extensions. + std::set> loaded_extensions; ///< Loaded Vulkan extensions. + std::vector valid_heap_memory; ///< Heaps used. /// Format properties dictionary. std::unordered_map format_properties; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 61be1fc..486d4df 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -96,8 +96,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdDrawIndexed); X(vkCmdDrawIndirect); X(vkCmdDrawIndexedIndirect); - X(vkCmdDrawIndirectCountKHR); - X(vkCmdDrawIndexedIndirectCountKHR); + X(vkCmdDrawIndirectCount); + X(vkCmdDrawIndexedIndirectCount); X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); @@ -221,6 +221,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { if (!dld.vkResetQueryPool) { Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device); } + + // Support for draw indirect with count is optional in Vulkan 1.2 + if (!dld.vkCmdDrawIndirectCount) { + Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device); + Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device); + } #undef X } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 412779b..e86f661 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -215,8 +215,8 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; - PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{}; - PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{}; + PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; + PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; @@ -1065,15 +1065,15 @@ public: void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset, - draw_count, stride); + dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, + draw_count, stride); } void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, - count_offset, draw_count, stride); + dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, + count_offset, draw_count, stride); } void ClearAttachments(Span attachments, diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 0eb3779..f05f318 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later CSResourcesFileMapped + LSApplicationCategoryType + public.app-category.games LSRequiresCarbon NSHumanReadableCopyright diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 3d560f3..d659917 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -96,9 +96,9 @@ void EmuThread::run() { m_is_running.store(false); m_is_running.notify_all(); - emit DebugModeEntered(); + EmulationPaused(lk); Common::CondvarWait(m_should_run_cv, lk, stop_token, [&] { return m_should_run; }); - emit DebugModeLeft(); + EmulationResumed(lk); } } @@ -111,6 +111,21 @@ void EmuThread::run() { #endif } +// Unlock while emitting signals so that the main thread can +// continue pumping events. + +void EmuThread::EmulationPaused(std::unique_lock& lk) { + lk.unlock(); + emit DebugModeEntered(); + lk.lock(); +} + +void EmuThread::EmulationResumed(std::unique_lock& lk) { + lk.unlock(); + emit DebugModeLeft(); + lk.lock(); +} + #ifdef HAS_OPENGL class OpenGLSharedContext : public Core::Frontend::GraphicsContext { public: diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index eca16b3..092c620 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -91,6 +91,10 @@ public: m_stop_source.request_stop(); } +private: + void EmulationPaused(std::unique_lock& lk); + void EmulationResumed(std::unique_lock& lk); + private: Core::System& m_system; diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index fdf8485..cc0155a 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -22,6 +22,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !system.IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); + ui->renderer_force_max_clock->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); @@ -40,12 +41,12 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { Settings::values.max_anisotropy.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); - ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock, - &Settings::values.renderer_force_max_clock); ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, &Settings::values.max_anisotropy); ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, !Settings::values.gpu_accuracy.UsingGlobal()); + ConfigurationShared::SetHighlight(ui->renderer_force_max_clock, + !Settings::values.renderer_force_max_clock.UsingGlobal()); ConfigurationShared::SetHighlight(ui->af_label, !Settings::values.max_anisotropy.UsingGlobal()); } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 183cbe5..c40d980 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -1466,6 +1466,12 @@ void ConfigureInputPlayer::mousePressEvent(QMouseEvent* event) { input_subsystem->GetMouse()->PressButton(0, 0, 0, 0, button); } +void ConfigureInputPlayer::wheelEvent(QWheelEvent* event) { + const int x = event->angleDelta().x(); + const int y = event->angleDelta().y(); + input_subsystem->GetMouse()->MouseWheelChange(x, y); +} + void ConfigureInputPlayer::keyPressEvent(QKeyEvent* event) { if (!input_setter || !event) { return; diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index 6d1876f..99a9c87 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -116,6 +116,9 @@ private: /// Handle mouse button press events. void mousePressEvent(QMouseEvent* event) override; + /// Handle mouse wheel move events. + void wheelEvent(QWheelEvent* event) override; + /// Handle key press events. void keyPressEvent(QKeyEvent* event) override; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 43f6153..78fdae7 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1838,9 +1838,11 @@ void GMainWindow::OnEmulationStopTimeExpired() { void GMainWindow::OnEmulationStopped() { shutdown_timer.stop(); - emu_thread->disconnect(); - emu_thread->wait(); - emu_thread = nullptr; + if (emu_thread) { + emu_thread->disconnect(); + emu_thread->wait(); + emu_thread.reset(); + } if (shutdown_dialog) { shutdown_dialog->deleteLater(); @@ -3028,6 +3030,8 @@ void GMainWindow::OnStopGame() { if (OnShutdownBegin()) { OnShutdownBeginDialog(); + } else { + OnEmulationStopped(); } } @@ -3725,15 +3729,36 @@ void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_vie } } +std::string GMainWindow::CreateTASFramesString( + std::array frames) const { + std::string string = ""; + size_t maxPlayerIndex = 0; + for (size_t i = 0; i < frames.size(); i++) { + if (frames[i] != 0) { + if (maxPlayerIndex != 0) + string += ", "; + while (maxPlayerIndex++ != i) + string += "0, "; + string += std::to_string(frames[i]); + } + } + return string; +} + QString GMainWindow::GetTasStateDescription() const { auto [tas_status, current_tas_frame, total_tas_frames] = input_subsystem->GetTas()->GetStatus(); + std::string tas_frames_string = CreateTASFramesString(total_tas_frames); switch (tas_status) { case InputCommon::TasInput::TasState::Running: - return tr("TAS state: Running %1/%2").arg(current_tas_frame).arg(total_tas_frames); + return tr("TAS state: Running %1/%2") + .arg(current_tas_frame) + .arg(QString::fromStdString(tas_frames_string)); case InputCommon::TasInput::TasState::Recording: - return tr("TAS state: Recording %1").arg(total_tas_frames); + return tr("TAS state: Recording %1").arg(total_tas_frames[0]); case InputCommon::TasInput::TasState::Stopped: - return tr("TAS state: Idle %1/%2").arg(current_tas_frame).arg(total_tas_frames); + return tr("TAS state: Idle %1/%2") + .arg(current_tas_frame) + .arg(QString::fromStdString(tas_frames_string)); default: return tr("TAS State: Invalid"); } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index f25ce65..0f61abc 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -12,6 +12,7 @@ #include "common/announce_multiplayer_room.h" #include "common/common_types.h" +#include "input_common/drivers/tas_input.h" #include "yuzu/compatibility_list.h" #include "yuzu/hotkeys.h" @@ -266,6 +267,9 @@ private: void changeEvent(QEvent* event) override; void closeEvent(QCloseEvent* event) override; + std::string CreateTASFramesString( + std::array frames) const; + #ifdef __unix__ void SetupSigInterrupts(); static void HandleSigInterrupt(int);