New upstream version 0-1299+ds
This commit is contained in:
parent
d3b74ab2d5
commit
9209efb76e
170 changed files with 22277 additions and 15633 deletions
2
.ci/scripts/format/script.sh
Normal file → Executable file
2
.ci/scripts/format/script.sh
Normal file → Executable file
|
@ -10,7 +10,7 @@ if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dis
|
|||
fi
|
||||
|
||||
# Default clang-format points to default 3.5 version one
|
||||
CLANG_FORMAT=clang-format-12
|
||||
CLANG_FORMAT=${CLANG_FORMAT:-clang-format-12}
|
||||
$CLANG_FORMAT --version
|
||||
|
||||
if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
|
||||
|
|
|
@ -3,13 +3,8 @@
|
|||
|
||||
cmake_minimum_required(VERSION 3.22)
|
||||
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
include(CMakeDependentOption)
|
||||
|
||||
|
@ -22,6 +17,8 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 binaries" ON
|
|||
# On Linux system SDL2 is likely to be lacking HIDAPI support which have drawbacks but is needed for SDL motion
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_EXTERNAL_SDL2 "Compile external SDL2" ON "ENABLE_SDL2;NOT MSVC" OFF)
|
||||
|
||||
option(ENABLE_LIBUSB "Enable the use of LibUSB" ON)
|
||||
|
||||
option(ENABLE_OPENGL "Enable OpenGL" ON)
|
||||
mark_as_advanced(FORCE ENABLE_OPENGL)
|
||||
option(ENABLE_QT "Enable the Qt frontend" ON)
|
||||
|
@ -35,6 +32,8 @@ option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
|||
|
||||
option(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled FFmpeg" "${WIN32}")
|
||||
|
||||
option(YUZU_USE_EXTERNAL_VULKAN_HEADERS "Use Vulkan-Headers from externals" ON)
|
||||
|
||||
option(YUZU_USE_QT_MULTIMEDIA "Use QtMultimedia for Camera" OFF)
|
||||
|
||||
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
|
||||
|
@ -47,6 +46,8 @@ option(YUZU_TESTS "Compile tests" ON)
|
|||
|
||||
option(YUZU_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
|
||||
|
||||
option(YUZU_ROOM "Compile LDN room server" ON)
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_CRASH_DUMPS "Compile Windows crash dump (Minidump) support" OFF "WIN32" OFF)
|
||||
|
||||
option(YUZU_USE_BUNDLED_VCPKG "Use vcpkg for yuzu dependencies" "${MSVC}")
|
||||
|
@ -201,36 +202,43 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
|
|||
# System imported libraries
|
||||
# =======================================================================
|
||||
|
||||
find_package(enet 1.3)
|
||||
# Enforce the search mode of non-required packages for better and shorter failure messages
|
||||
find_package(enet 1.3 MODULE)
|
||||
find_package(fmt 9 REQUIRED)
|
||||
find_package(inih)
|
||||
find_package(libusb 1.0.24)
|
||||
find_package(inih MODULE)
|
||||
find_package(lz4 REQUIRED)
|
||||
find_package(nlohmann_json 3.8 REQUIRED)
|
||||
find_package(Opus 1.3)
|
||||
find_package(Vulkan 1.3.238)
|
||||
find_package(Opus 1.3 MODULE)
|
||||
find_package(ZLIB 1.2 REQUIRED)
|
||||
find_package(zstd 1.5 REQUIRED)
|
||||
|
||||
if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS)
|
||||
find_package(Vulkan 1.3.238 REQUIRED)
|
||||
endif()
|
||||
|
||||
if (ENABLE_LIBUSB)
|
||||
find_package(libusb 1.0.24 MODULE)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
find_package(xbyak 6)
|
||||
find_package(xbyak 6 CONFIG)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
||||
find_package(dynarmic 6.4.0)
|
||||
find_package(dynarmic 6.4.0 CONFIG)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CUBEB)
|
||||
find_package(cubeb)
|
||||
find_package(cubeb CONFIG)
|
||||
endif()
|
||||
|
||||
if (USE_DISCORD_PRESENCE)
|
||||
find_package(DiscordRPC)
|
||||
find_package(DiscordRPC MODULE)
|
||||
endif()
|
||||
|
||||
if (ENABLE_WEB_SERVICE)
|
||||
find_package(cpp-jwt 1.4)
|
||||
find_package(httplib 0.11)
|
||||
find_package(cpp-jwt 1.4 CONFIG)
|
||||
find_package(httplib 0.11 MODULE)
|
||||
endif()
|
||||
|
||||
if (YUZU_TESTS)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(httplib QUIET CONFIG)
|
||||
if (httplib_FOUND)
|
||||
if (httplib_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(httplib CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
|
@ -4,7 +4,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(lz4 QUIET CONFIG)
|
||||
if (lz4_FOUND)
|
||||
if (lz4_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(lz4 CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
|
@ -4,7 +4,7 @@
|
|||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package(zstd QUIET CONFIG)
|
||||
if (zstd_FOUND)
|
||||
if (zstd_CONSIDERED_CONFIGS)
|
||||
find_package_handle_standard_args(zstd CONFIG_MODE)
|
||||
else()
|
||||
find_package(PkgConfig QUIET)
|
1334
dist/languages/ca.ts
vendored
1334
dist/languages/ca.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/cs.ts
vendored
1334
dist/languages/cs.ts
vendored
File diff suppressed because it is too large
Load diff
1332
dist/languages/da.ts
vendored
1332
dist/languages/da.ts
vendored
File diff suppressed because it is too large
Load diff
1380
dist/languages/de.ts
vendored
1380
dist/languages/de.ts
vendored
File diff suppressed because it is too large
Load diff
1332
dist/languages/el.ts
vendored
1332
dist/languages/el.ts
vendored
File diff suppressed because it is too large
Load diff
1338
dist/languages/es.ts
vendored
1338
dist/languages/es.ts
vendored
File diff suppressed because it is too large
Load diff
1343
dist/languages/fr.ts
vendored
1343
dist/languages/fr.ts
vendored
File diff suppressed because it is too large
Load diff
1332
dist/languages/id.ts
vendored
1332
dist/languages/id.ts
vendored
File diff suppressed because it is too large
Load diff
1431
dist/languages/it.ts
vendored
1431
dist/languages/it.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/ja_JP.ts
vendored
1334
dist/languages/ja_JP.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/ko_KR.ts
vendored
1334
dist/languages/ko_KR.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/nb.ts
vendored
1334
dist/languages/nb.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/nl.ts
vendored
1334
dist/languages/nl.ts
vendored
File diff suppressed because it is too large
Load diff
1335
dist/languages/pl.ts
vendored
1335
dist/languages/pl.ts
vendored
File diff suppressed because it is too large
Load diff
1384
dist/languages/pt_BR.ts
vendored
1384
dist/languages/pt_BR.ts
vendored
File diff suppressed because it is too large
Load diff
1348
dist/languages/pt_PT.ts
vendored
1348
dist/languages/pt_PT.ts
vendored
File diff suppressed because it is too large
Load diff
1501
dist/languages/ru_RU.ts
vendored
1501
dist/languages/ru_RU.ts
vendored
File diff suppressed because it is too large
Load diff
1334
dist/languages/sv.ts
vendored
1334
dist/languages/sv.ts
vendored
File diff suppressed because it is too large
Load diff
1346
dist/languages/tr_TR.ts
vendored
1346
dist/languages/tr_TR.ts
vendored
File diff suppressed because it is too large
Load diff
1475
dist/languages/uk.ts
vendored
1475
dist/languages/uk.ts
vendored
File diff suppressed because it is too large
Load diff
1332
dist/languages/vi.ts
vendored
1332
dist/languages/vi.ts
vendored
File diff suppressed because it is too large
Load diff
1332
dist/languages/vi_VN.ts
vendored
1332
dist/languages/vi_VN.ts
vendored
File diff suppressed because it is too large
Load diff
1337
dist/languages/zh_CN.ts
vendored
1337
dist/languages/zh_CN.ts
vendored
File diff suppressed because it is too large
Load diff
1341
dist/languages/zh_TW.ts
vendored
1341
dist/languages/zh_TW.ts
vendored
File diff suppressed because it is too large
Load diff
23
externals/CMakeLists.txt
vendored
23
externals/CMakeLists.txt
vendored
|
@ -1,9 +1,9 @@
|
|||
# SPDX-FileCopyrightText: 2016 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
# xbyak
|
||||
if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
|
||||
|
@ -12,8 +12,7 @@ endif()
|
|||
|
||||
# Dynarmic
|
||||
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
|
||||
set(DYNARMIC_NO_BUNDLED_FMT ON)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON)
|
||||
add_subdirectory(dynarmic EXCLUDE_FROM_ALL)
|
||||
add_library(dynarmic::dynarmic ALIAS dynarmic)
|
||||
endif()
|
||||
|
@ -45,7 +44,7 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12" AND CMAKE_CXX_COMPILER
|
|||
endif()
|
||||
|
||||
# libusb
|
||||
if (NOT TARGET libusb::usb)
|
||||
if (ENABLE_LIBUSB AND NOT TARGET libusb::usb)
|
||||
add_subdirectory(libusb EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
|
@ -60,10 +59,10 @@ if (YUZU_USE_EXTERNAL_SDL2)
|
|||
Locale Power Render)
|
||||
foreach(_SUB ${SDL_UNUSED_SUBSYSTEMS})
|
||||
string(TOUPPER ${_SUB} _OPT)
|
||||
option(SDL_${_OPT} "" OFF)
|
||||
set(SDL_${_OPT} OFF)
|
||||
endforeach()
|
||||
|
||||
option(HIDAPI "" ON)
|
||||
set(HIDAPI ON)
|
||||
endif()
|
||||
set(SDL_STATIC ON)
|
||||
set(SDL_SHARED OFF)
|
||||
|
@ -83,7 +82,7 @@ endif()
|
|||
|
||||
# Cubeb
|
||||
if (ENABLE_CUBEB AND NOT TARGET cubeb::cubeb)
|
||||
set(BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(BUILD_TESTS OFF)
|
||||
add_subdirectory(cubeb EXCLUDE_FROM_ALL)
|
||||
add_library(cubeb::cubeb ALIAS cubeb)
|
||||
endif()
|
||||
|
@ -98,6 +97,7 @@ endif()
|
|||
# Sirit
|
||||
add_subdirectory(sirit EXCLUDE_FROM_ALL)
|
||||
|
||||
# httplib
|
||||
if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
||||
if (NOT WIN32)
|
||||
find_package(OpenSSL 1.1)
|
||||
|
@ -108,7 +108,7 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
|||
|
||||
if (WIN32 OR NOT OPENSSL_FOUND)
|
||||
# LibreSSL
|
||||
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||
set(LIBRESSL_SKIP_INSTALL ON)
|
||||
set(OPENSSLDIR "/etc/ssl/")
|
||||
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||
|
@ -118,7 +118,6 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
|||
DEFINITION OPENSSL_LIBS)
|
||||
endif()
|
||||
|
||||
# httplib
|
||||
add_library(httplib INTERFACE)
|
||||
target_include_directories(httplib INTERFACE ./cpp-httplib)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
|
@ -152,6 +151,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
|
|||
endif()
|
||||
|
||||
# Vulkan-Headers
|
||||
if (NOT TARGET Vulkan::Headers)
|
||||
if (YUZU_USE_EXTERNAL_VULKAN_HEADERS)
|
||||
add_subdirectory(Vulkan-Headers EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
|
|
@ -1160,6 +1160,14 @@ public:
|
|||
/// TBD
|
||||
Id OpSubgroupAllEqualKHR(Id result_type, Id predicate);
|
||||
|
||||
// Result is true only in the active invocation with the lowest id in the group, otherwise
|
||||
// result is false.
|
||||
Id OpGroupNonUniformElect(Id result_type, Id scope);
|
||||
|
||||
// Result is the Value of the invocation from the active invocation with the lowest id in the
|
||||
// group to all active invocations in the group.
|
||||
Id OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value);
|
||||
|
||||
// Result is the Value of the invocation identified by the id Id to all active invocations in
|
||||
// the group.
|
||||
Id OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id);
|
||||
|
|
|
@ -36,6 +36,17 @@ Id Module::OpSubgroupAllEqualKHR(Id result_type, Id predicate) {
|
|||
return *code << OpId{spv::Op::OpSubgroupAllEqualKHR, result_type} << predicate << EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformElect(Id result_type, Id scope) {
|
||||
code->Reserve(4);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformElect, result_type} << scope << EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformBroadcastFirst(Id result_type, Id scope, Id value) {
|
||||
code->Reserve(5);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformBroadcastFirst, result_type} << scope << value
|
||||
<< EndOp{};
|
||||
}
|
||||
|
||||
Id Module::OpGroupNonUniformBroadcast(Id result_type, Id scope, Id value, Id id) {
|
||||
code->Reserve(6);
|
||||
return *code << OpId{spv::Op::OpGroupNonUniformBroadcast, result_type} << scope << value
|
||||
|
|
|
@ -161,7 +161,10 @@ add_subdirectory(video_core)
|
|||
add_subdirectory(network)
|
||||
add_subdirectory(input_common)
|
||||
add_subdirectory(shader_recompiler)
|
||||
add_subdirectory(dedicated_room)
|
||||
|
||||
if (YUZU_ROOM)
|
||||
add_subdirectory(dedicated_room)
|
||||
endif()
|
||||
|
||||
if (YUZU_TESTS)
|
||||
add_subdirectory(tests)
|
||||
|
|
|
@ -187,11 +187,7 @@ add_library(audio_core STATIC
|
|||
renderer/voice/voice_info.cpp
|
||||
renderer/voice/voice_info.h
|
||||
renderer/voice/voice_state.h
|
||||
sink/cubeb_sink.cpp
|
||||
sink/cubeb_sink.h
|
||||
sink/null_sink.h
|
||||
sink/sdl2_sink.cpp
|
||||
sink/sdl2_sink.h
|
||||
sink/sink.h
|
||||
sink/sink_details.cpp
|
||||
sink/sink_details.h
|
||||
|
@ -222,11 +218,22 @@ if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
|||
target_link_libraries(audio_core PRIVATE dynarmic::dynarmic)
|
||||
endif()
|
||||
|
||||
if(ENABLE_CUBEB)
|
||||
if (ENABLE_CUBEB)
|
||||
target_sources(audio_core PRIVATE
|
||||
sink/cubeb_sink.cpp
|
||||
sink/cubeb_sink.h
|
||||
)
|
||||
|
||||
target_link_libraries(audio_core PRIVATE cubeb::cubeb)
|
||||
target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1)
|
||||
endif()
|
||||
if(ENABLE_SDL2)
|
||||
|
||||
if (ENABLE_SDL2)
|
||||
target_sources(audio_core PRIVATE
|
||||
sink/sdl2_sink.cpp
|
||||
sink/sdl2_sink.h
|
||||
)
|
||||
|
||||
target_link_libraries(audio_core PRIVATE SDL2::SDL2)
|
||||
target_compile_definitions(audio_core PRIVATE HAVE_SDL2)
|
||||
endif()
|
||||
|
|
|
@ -97,6 +97,7 @@ add_library(common STATIC
|
|||
point.h
|
||||
precompiled_headers.h
|
||||
quaternion.h
|
||||
range_map.h
|
||||
reader_writer_queue.h
|
||||
ring_buffer.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp
|
||||
|
|
|
@ -393,12 +393,27 @@ public:
|
|||
}
|
||||
|
||||
// Virtual memory initialization
|
||||
virtual_base = static_cast<u8*>(
|
||||
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
|
||||
#if defined(__FreeBSD__)
|
||||
virtual_base =
|
||||
static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
virtual_base = static_cast<u8*>(
|
||||
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
}
|
||||
#else
|
||||
virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
|
||||
if (virtual_base == MAP_FAILED) {
|
||||
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
|
||||
good = true;
|
||||
}
|
||||
|
|
139
src/common/range_map.h
Normal file
139
src/common/range_map.h
Normal file
|
@ -0,0 +1,139 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename KeyTBase, typename ValueT>
|
||||
class RangeMap {
|
||||
private:
|
||||
using KeyT =
|
||||
std::conditional_t<std::is_signed_v<KeyTBase>, KeyTBase, std::make_signed_t<KeyTBase>>;
|
||||
|
||||
public:
|
||||
explicit RangeMap(ValueT null_value_) : null_value{null_value_} {
|
||||
container.emplace(std::numeric_limits<KeyT>::min(), null_value);
|
||||
};
|
||||
~RangeMap() = default;
|
||||
|
||||
void Map(KeyTBase address, KeyTBase address_end, ValueT value) {
|
||||
KeyT new_address = static_cast<KeyT>(address);
|
||||
KeyT new_address_end = static_cast<KeyT>(address_end);
|
||||
if (new_address < 0) {
|
||||
new_address = 0;
|
||||
}
|
||||
if (new_address_end < 0) {
|
||||
new_address_end = 0;
|
||||
}
|
||||
InternalMap(new_address, new_address_end, value);
|
||||
}
|
||||
|
||||
void Unmap(KeyTBase address, KeyTBase address_end) {
|
||||
Map(address, address_end, null_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return 0;
|
||||
}
|
||||
return ContinousSizeInternal(new_address);
|
||||
}
|
||||
|
||||
[[nodiscard]] ValueT GetValueAt(KeyT address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return null_value;
|
||||
}
|
||||
return GetValueInternal(new_address);
|
||||
}
|
||||
|
||||
private:
|
||||
using MapType = std::map<KeyT, ValueT>;
|
||||
using IteratorType = typename MapType::iterator;
|
||||
using ConstIteratorType = typename MapType::const_iterator;
|
||||
|
||||
size_t ContinousSizeInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end() || it->second == null_value) {
|
||||
return 0;
|
||||
}
|
||||
const auto it_end = std::next(it);
|
||||
if (it_end == container.end()) {
|
||||
return std::numeric_limits<KeyT>::max() - address;
|
||||
}
|
||||
return it_end->first - address;
|
||||
}
|
||||
|
||||
ValueT GetValueInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it;
|
||||
}
|
||||
if (it != container.end() && (it->first == address)) {
|
||||
return it;
|
||||
}
|
||||
--it;
|
||||
return it;
|
||||
}
|
||||
|
||||
ValueT GetFirstValueWithin(KeyT address) {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it->second;
|
||||
}
|
||||
if (it == container.end()) [[unlikely]] { // this would be a bug
|
||||
return null_value;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ValueT GetLastValueWithin(KeyT address) {
|
||||
auto it = container.upper_bound(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
if (it == container.begin()) [[unlikely]] { // this would be a bug
|
||||
return it->second;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void InternalMap(KeyT address, KeyT address_end, ValueT value) {
|
||||
const bool must_add_start = GetFirstValueWithin(address) != value;
|
||||
const ValueT last_value = GetLastValueWithin(address_end);
|
||||
const bool must_add_end = last_value != value;
|
||||
auto it = container.lower_bound(address);
|
||||
const auto it_end = container.upper_bound(address_end);
|
||||
while (it != it_end) {
|
||||
it = container.erase(it);
|
||||
}
|
||||
if (must_add_start) {
|
||||
container.emplace(address, value);
|
||||
}
|
||||
if (must_add_end) {
|
||||
container.emplace(address_end, last_value);
|
||||
}
|
||||
}
|
||||
|
||||
ValueT null_value;
|
||||
MapType container;
|
||||
};
|
||||
|
||||
} // namespace Common
|
|
@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
|||
// Renderer
|
||||
values.fsr_sharpening_slider.SetGlobal(true);
|
||||
values.renderer_backend.SetGlobal(true);
|
||||
values.renderer_force_max_clock.SetGlobal(true);
|
||||
values.vulkan_device.SetGlobal(true);
|
||||
values.aspect_ratio.SetGlobal(true);
|
||||
values.max_anisotropy.SetGlobal(true);
|
||||
|
@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
|||
values.use_asynchronous_shaders.SetGlobal(true);
|
||||
values.use_fast_gpu_time.SetGlobal(true);
|
||||
values.use_pessimistic_flushes.SetGlobal(true);
|
||||
values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
|
||||
values.bg_red.SetGlobal(true);
|
||||
values.bg_green.SetGlobal(true);
|
||||
values.bg_blue.SetGlobal(true);
|
||||
|
|
|
@ -415,6 +415,7 @@ struct Values {
|
|||
// Renderer
|
||||
SwitchableSetting<RendererBackend, true> renderer_backend{
|
||||
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
|
||||
SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
|
||||
Setting<bool> renderer_debug{false, "debug"};
|
||||
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
|
||||
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
|
||||
|
@ -451,6 +452,8 @@ struct Values {
|
|||
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
|
||||
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
|
||||
SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
|
||||
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
|
||||
"use_vulkan_driver_pipeline_cache"};
|
||||
|
||||
SwitchableSetting<u8> bg_red{0, "bg_red"};
|
||||
SwitchableSetting<u8> bg_green{0, "bg_green"};
|
||||
|
@ -531,6 +534,7 @@ struct Values {
|
|||
Setting<bool> reporting_services{false, "reporting_services"};
|
||||
Setting<bool> quest_flag{false, "quest_flag"};
|
||||
Setting<bool> disable_macro_jit{false, "disable_macro_jit"};
|
||||
Setting<bool> disable_macro_hle{false, "disable_macro_hle"};
|
||||
Setting<bool> extended_logging{false, "extended_logging"};
|
||||
Setting<bool> use_debug_asserts{false, "use_debug_asserts"};
|
||||
Setting<bool> use_auto_stub{false, "use_auto_stub"};
|
||||
|
|
|
@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
|
|
@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
|
|
@ -194,9 +194,9 @@ std::size_t VfsFile::WriteBytes(const std::vector<u8>& data, std::size_t offset)
|
|||
|
||||
std::string VfsFile::GetFullPath() const {
|
||||
if (GetContainingDirectory() == nullptr)
|
||||
return "/" + GetName();
|
||||
return '/' + GetName();
|
||||
|
||||
return GetContainingDirectory()->GetFullPath() + "/" + GetName();
|
||||
return GetContainingDirectory()->GetFullPath() + '/' + GetName();
|
||||
}
|
||||
|
||||
VirtualFile VfsDirectory::GetFileRelative(std::string_view path) const {
|
||||
|
@ -435,7 +435,7 @@ std::string VfsDirectory::GetFullPath() const {
|
|||
if (IsRoot())
|
||||
return GetName();
|
||||
|
||||
return GetParentDirectory()->GetFullPath() + "/" + GetName();
|
||||
return GetParentDirectory()->GetFullPath() + '/' + GetName();
|
||||
}
|
||||
|
||||
bool ReadOnlyVfsDirectory::IsWritable() const {
|
||||
|
|
|
@ -40,6 +40,11 @@ void EmulatedConsole::SetTouchParams() {
|
|||
touch_params[index++] = std::move(touchscreen_param);
|
||||
}
|
||||
|
||||
if (Settings::values.touch_from_button_maps.empty()) {
|
||||
LOG_WARNING(Input, "touch_from_button_maps is unset by frontend config");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto button_index =
|
||||
static_cast<u64>(Settings::values.touch_from_button_map_index.GetValue());
|
||||
const auto& touch_buttons = Settings::values.touch_from_button_maps[button_index].buttons;
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
namespace Core::HID {
|
||||
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
|
||||
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
|
||||
// Use a common UUID for TAS and Virtual Gamepad
|
||||
constexpr Common::UUID TAS_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
constexpr Common::UUID VIRTUAL_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
|
||||
|
||||
|
@ -348,10 +353,6 @@ void EmulatedController::ReloadInput() {
|
|||
}
|
||||
}
|
||||
|
||||
// Use a common UUID for TAS
|
||||
static constexpr Common::UUID TAS_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register TAS devices. No need to force update
|
||||
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
|
||||
if (!tas_button_devices[index]) {
|
||||
|
@ -377,10 +378,6 @@ void EmulatedController::ReloadInput() {
|
|||
});
|
||||
}
|
||||
|
||||
// Use a common UUID for Virtual Gamepad
|
||||
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register virtual devices. No need to force update
|
||||
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
|
||||
if (!virtual_button_devices[index]) {
|
||||
|
@ -780,7 +777,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
|
|||
|
||||
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
|
||||
if (controller.stick_values[index].uuid != uuid) {
|
||||
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
|
||||
const bool is_tas = uuid == TAS_UUID;
|
||||
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
|
||||
return;
|
||||
}
|
||||
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
|
||||
!stick_value.right) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,15 +22,19 @@ namespace {
|
|||
|
||||
namespace Service::NIFM {
|
||||
|
||||
// This is nn::nifm::RequestState
|
||||
enum class RequestState : u32 {
|
||||
NotSubmitted = 1,
|
||||
Error = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
Pending = 2,
|
||||
Connected = 3,
|
||||
Invalid = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
OnHold = 2,
|
||||
Accepted = 3,
|
||||
Blocking = 4,
|
||||
};
|
||||
|
||||
enum class InternetConnectionType : u8 {
|
||||
WiFi = 1,
|
||||
// This is nn::nifm::NetworkInterfaceType
|
||||
enum class NetworkInterfaceType : u32 {
|
||||
Invalid = 0,
|
||||
WiFi_Ieee80211 = 1,
|
||||
Ethernet = 2,
|
||||
};
|
||||
|
||||
|
@ -42,14 +46,23 @@ enum class InternetConnectionStatus : u8 {
|
|||
Connected,
|
||||
};
|
||||
|
||||
// This is nn::nifm::NetworkProfileType
|
||||
enum class NetworkProfileType : u32 {
|
||||
User,
|
||||
SsidList,
|
||||
Temporary,
|
||||
};
|
||||
|
||||
// This is nn::nifm::IpAddressSetting
|
||||
struct IpAddressSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address current_address{};
|
||||
Network::IPv4Address ip_address{};
|
||||
Network::IPv4Address subnet_mask{};
|
||||
Network::IPv4Address gateway{};
|
||||
Network::IPv4Address default_gateway{};
|
||||
};
|
||||
static_assert(sizeof(IpAddressSetting) == 0xD, "IpAddressSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::DnsSetting
|
||||
struct DnsSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address primary_dns{};
|
||||
|
@ -57,18 +70,26 @@ struct DnsSetting {
|
|||
};
|
||||
static_assert(sizeof(DnsSetting) == 0x9, "DnsSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::AuthenticationSetting
|
||||
struct AuthenticationSetting {
|
||||
bool is_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
};
|
||||
static_assert(sizeof(AuthenticationSetting) == 0x41, "AuthenticationSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::ProxySetting
|
||||
struct ProxySetting {
|
||||
bool enabled{};
|
||||
bool is_enabled{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
u16 port{};
|
||||
std::array<char, 0x64> proxy_server{};
|
||||
bool automatic_auth_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
AuthenticationSetting authentication{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
};
|
||||
static_assert(sizeof(ProxySetting) == 0xAA, "ProxySetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::IpSettingData
|
||||
struct IpSettingData {
|
||||
IpAddressSetting ip_address_setting{};
|
||||
DnsSetting dns_setting{};
|
||||
|
@ -101,6 +122,7 @@ static_assert(sizeof(NifmWirelessSettingData) == 0x70,
|
|||
"NifmWirelessSettingData has incorrect size.");
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// This is nn::nifm::detail::sf::NetworkProfileData
|
||||
struct SfNetworkProfileData {
|
||||
IpSettingData ip_setting_data{};
|
||||
u128 uuid{};
|
||||
|
@ -114,13 +136,14 @@ struct SfNetworkProfileData {
|
|||
};
|
||||
static_assert(sizeof(SfNetworkProfileData) == 0x17C, "SfNetworkProfileData has incorrect size.");
|
||||
|
||||
// This is nn::nifm::NetworkProfileData
|
||||
struct NifmNetworkProfileData {
|
||||
u128 uuid{};
|
||||
std::array<char, 0x40> network_name{};
|
||||
u32 unknown_1{};
|
||||
u32 unknown_2{};
|
||||
u8 unknown_3{};
|
||||
u8 unknown_4{};
|
||||
NetworkProfileType network_profile_type{};
|
||||
NetworkInterfaceType network_interface_type{};
|
||||
bool is_auto_connect{};
|
||||
bool is_large_capacity{};
|
||||
INSERT_PADDING_BYTES(2);
|
||||
NifmWirelessSettingData wireless_setting_data{};
|
||||
IpSettingData ip_setting_data{};
|
||||
|
@ -184,6 +207,7 @@ public:
|
|||
|
||||
event1 = CreateKEvent(service_context, "IRequest:Event1");
|
||||
event2 = CreateKEvent(service_context, "IRequest:Event2");
|
||||
state = RequestState::NotSubmitted;
|
||||
}
|
||||
|
||||
~IRequest() override {
|
||||
|
@ -196,7 +220,7 @@ private:
|
|||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
if (state == RequestState::NotSubmitted) {
|
||||
UpdateState(RequestState::Pending);
|
||||
UpdateState(RequestState::OnHold);
|
||||
}
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
|
@ -219,14 +243,14 @@ private:
|
|||
switch (state) {
|
||||
case RequestState::NotSubmitted:
|
||||
return has_connection ? ResultSuccess : ResultNetworkCommunicationDisabled;
|
||||
case RequestState::Pending:
|
||||
case RequestState::OnHold:
|
||||
if (has_connection) {
|
||||
UpdateState(RequestState::Connected);
|
||||
UpdateState(RequestState::Accepted);
|
||||
} else {
|
||||
UpdateState(RequestState::Error);
|
||||
UpdateState(RequestState::Invalid);
|
||||
}
|
||||
return ResultPendingConnection;
|
||||
case RequestState::Connected:
|
||||
case RequestState::Accepted:
|
||||
default:
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
@ -338,9 +362,9 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
.ip_setting_data{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
|
@ -348,12 +372,14 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
.secondary_dns{1, 0, 0, 1},
|
||||
},
|
||||
.proxy_setting{
|
||||
.enabled{false},
|
||||
.is_enabled{false},
|
||||
.port{},
|
||||
.proxy_server{},
|
||||
.automatic_auth_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
.authentication{
|
||||
.is_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
},
|
||||
},
|
||||
.mtu{1500},
|
||||
},
|
||||
|
@ -370,7 +396,7 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
|||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
network_profile_data.ip_setting_data.ip_address_setting.current_address =
|
||||
network_profile_data.ip_setting_data.ip_address_setting.ip_address =
|
||||
room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
|
@ -444,9 +470,9 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
|||
return IpConfigInfo{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
|
@ -459,7 +485,7 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
|||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
ip_config_info.ip_address_setting.current_address = room_member->GetFakeIpAddress();
|
||||
ip_config_info.ip_address_setting.ip_address = room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -480,7 +506,7 @@ void IGeneralService::GetInternetConnectionStatus(Kernel::HLERequestContext& ctx
|
|||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
struct Output {
|
||||
InternetConnectionType type{InternetConnectionType::WiFi};
|
||||
u8 type{static_cast<u8>(NetworkInterfaceType::WiFi_Ieee80211)};
|
||||
u8 wifi_strength{3};
|
||||
InternetConnectionStatus state{InternetConnectionStatus::Connected};
|
||||
};
|
||||
|
|
|
@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
|
|||
return Errno::NETUNREACH;
|
||||
case WSAEMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case WSAETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
|
@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
|
|||
return Errno::NETUNREACH;
|
||||
case EMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case ETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
|
@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
|
|||
int e = errno;
|
||||
#endif
|
||||
const Errno err = TranslateNativeError(e);
|
||||
if (err == Errno::AGAIN) {
|
||||
if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
|
||||
return err;
|
||||
}
|
||||
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
|
||||
|
|
|
@ -436,7 +436,7 @@ struct Memory::Impl {
|
|||
}
|
||||
|
||||
if (Settings::IsFastmemEnabled()) {
|
||||
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
|
||||
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
|
||||
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# SPDX-FileCopyrightText: 2017 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
|
||||
|
||||
add_executable(yuzu-room
|
||||
precompiled_headers.h
|
||||
yuzu_room.cpp
|
||||
|
|
|
@ -4,14 +4,10 @@
|
|||
add_library(input_common STATIC
|
||||
drivers/camera.cpp
|
||||
drivers/camera.h
|
||||
drivers/gc_adapter.cpp
|
||||
drivers/gc_adapter.h
|
||||
drivers/keyboard.cpp
|
||||
drivers/keyboard.h
|
||||
drivers/mouse.cpp
|
||||
drivers/mouse.h
|
||||
drivers/sdl_driver.cpp
|
||||
drivers/sdl_driver.h
|
||||
drivers/tas_input.cpp
|
||||
drivers/tas_input.h
|
||||
drivers/touch_screen.cpp
|
||||
|
@ -62,8 +58,17 @@ if (ENABLE_SDL2)
|
|||
target_compile_definitions(input_common PRIVATE HAVE_SDL2)
|
||||
endif()
|
||||
|
||||
if (ENABLE_LIBUSB)
|
||||
target_sources(input_common PRIVATE
|
||||
drivers/gc_adapter.cpp
|
||||
drivers/gc_adapter.h
|
||||
)
|
||||
target_link_libraries(input_common PRIVATE libusb::usb)
|
||||
target_compile_definitions(input_common PRIVATE HAVE_LIBUSB)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(input_common)
|
||||
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost libusb::usb)
|
||||
target_link_libraries(input_common PUBLIC core PRIVATE common Boost::boost)
|
||||
|
||||
if (YUZU_USE_PRECOMPILED_HEADERS)
|
||||
target_precompile_headers(input_common PRIVATE precompiled_headers.h)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "common/input.h"
|
||||
#include "common/param_package.h"
|
||||
#include "input_common/drivers/camera.h"
|
||||
#include "input_common/drivers/gc_adapter.h"
|
||||
#include "input_common/drivers/keyboard.h"
|
||||
#include "input_common/drivers/mouse.h"
|
||||
#include "input_common/drivers/tas_input.h"
|
||||
|
@ -19,6 +18,10 @@
|
|||
#include "input_common/input_mapping.h"
|
||||
#include "input_common/input_poller.h"
|
||||
#include "input_common/main.h"
|
||||
|
||||
#ifdef HAVE_LIBUSB
|
||||
#include "input_common/drivers/gc_adapter.h"
|
||||
#endif
|
||||
#ifdef HAVE_SDL2
|
||||
#include "input_common/drivers/sdl_driver.h"
|
||||
#endif
|
||||
|
@ -45,7 +48,9 @@ struct InputSubsystem::Impl {
|
|||
RegisterEngine("keyboard", keyboard);
|
||||
RegisterEngine("mouse", mouse);
|
||||
RegisterEngine("touch", touch_screen);
|
||||
#ifdef HAVE_LIBUSB
|
||||
RegisterEngine("gcpad", gcadapter);
|
||||
#endif
|
||||
RegisterEngine("cemuhookudp", udp_client);
|
||||
RegisterEngine("tas", tas_input);
|
||||
RegisterEngine("camera", camera);
|
||||
|
@ -72,7 +77,9 @@ struct InputSubsystem::Impl {
|
|||
UnregisterEngine(keyboard);
|
||||
UnregisterEngine(mouse);
|
||||
UnregisterEngine(touch_screen);
|
||||
#ifdef HAVE_LIBUSB
|
||||
UnregisterEngine(gcadapter);
|
||||
#endif
|
||||
UnregisterEngine(udp_client);
|
||||
UnregisterEngine(tas_input);
|
||||
UnregisterEngine(camera);
|
||||
|
@ -95,8 +102,10 @@ struct InputSubsystem::Impl {
|
|||
devices.insert(devices.end(), keyboard_devices.begin(), keyboard_devices.end());
|
||||
auto mouse_devices = mouse->GetInputDevices();
|
||||
devices.insert(devices.end(), mouse_devices.begin(), mouse_devices.end());
|
||||
#ifdef HAVE_LIBUSB
|
||||
auto gcadapter_devices = gcadapter->GetInputDevices();
|
||||
devices.insert(devices.end(), gcadapter_devices.begin(), gcadapter_devices.end());
|
||||
#endif
|
||||
auto udp_devices = udp_client->GetInputDevices();
|
||||
devices.insert(devices.end(), udp_devices.begin(), udp_devices.end());
|
||||
#ifdef HAVE_SDL2
|
||||
|
@ -119,9 +128,11 @@ struct InputSubsystem::Impl {
|
|||
if (engine == mouse->GetEngineName()) {
|
||||
return mouse;
|
||||
}
|
||||
#ifdef HAVE_LIBUSB
|
||||
if (engine == gcadapter->GetEngineName()) {
|
||||
return gcadapter;
|
||||
}
|
||||
#endif
|
||||
if (engine == udp_client->GetEngineName()) {
|
||||
return udp_client;
|
||||
}
|
||||
|
@ -194,9 +205,11 @@ struct InputSubsystem::Impl {
|
|||
if (engine == mouse->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
#ifdef HAVE_LIBUSB
|
||||
if (engine == gcadapter->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
if (engine == udp_client->GetEngineName()) {
|
||||
return true;
|
||||
}
|
||||
|
@ -217,7 +230,9 @@ struct InputSubsystem::Impl {
|
|||
void BeginConfiguration() {
|
||||
keyboard->BeginConfiguration();
|
||||
mouse->BeginConfiguration();
|
||||
#ifdef HAVE_LIBUSB
|
||||
gcadapter->BeginConfiguration();
|
||||
#endif
|
||||
udp_client->BeginConfiguration();
|
||||
#ifdef HAVE_SDL2
|
||||
sdl->BeginConfiguration();
|
||||
|
@ -227,7 +242,9 @@ struct InputSubsystem::Impl {
|
|||
void EndConfiguration() {
|
||||
keyboard->EndConfiguration();
|
||||
mouse->EndConfiguration();
|
||||
#ifdef HAVE_LIBUSB
|
||||
gcadapter->EndConfiguration();
|
||||
#endif
|
||||
udp_client->EndConfiguration();
|
||||
#ifdef HAVE_SDL2
|
||||
sdl->EndConfiguration();
|
||||
|
@ -248,7 +265,6 @@ struct InputSubsystem::Impl {
|
|||
|
||||
std::shared_ptr<Keyboard> keyboard;
|
||||
std::shared_ptr<Mouse> mouse;
|
||||
std::shared_ptr<GCAdapter> gcadapter;
|
||||
std::shared_ptr<TouchScreen> touch_screen;
|
||||
std::shared_ptr<TasInput::Tas> tas_input;
|
||||
std::shared_ptr<CemuhookUDP::UDPClient> udp_client;
|
||||
|
@ -256,6 +272,10 @@ struct InputSubsystem::Impl {
|
|||
std::shared_ptr<VirtualAmiibo> virtual_amiibo;
|
||||
std::shared_ptr<VirtualGamepad> virtual_gamepad;
|
||||
|
||||
#ifdef HAVE_LIBUSB
|
||||
std::shared_ptr<GCAdapter> gcadapter;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SDL2
|
||||
std::shared_ptr<SDLDriver> sdl;
|
||||
#endif
|
||||
|
|
|
@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::FrontFace:
|
||||
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
|
||||
break;
|
||||
|
@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
|
|||
EmitContext ctx{program, bindings, profile, runtime_info};
|
||||
Precolor(program);
|
||||
EmitCode(ctx, program);
|
||||
const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
|
||||
const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
|
||||
ctx.header.insert(0, version);
|
||||
if (program.shared_memory_size > 0) {
|
||||
const auto requested_size{program.shared_memory_size};
|
||||
|
|
|
@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
|||
case IR::Attribute::FrontFace:
|
||||
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddF32("{}=itof(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get attribute {}", attr);
|
||||
}
|
||||
|
@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
|
|||
case IR::Attribute::VertexId:
|
||||
ctx.AddU32("{}=uint(gl_VertexID);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddU32("{}=uint(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
|||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
|
||||
ctx.Const(element)));
|
||||
return ctx.OpLoad(
|
||||
ctx.F32[1],
|
||||
ctx.need_input_position_indirect
|
||||
? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
|
||||
ctx.Const(element))
|
||||
: AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
|
||||
case IR::Attribute::InstanceId:
|
||||
if (ctx.profile.support_vertex_instance_id) {
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
|
||||
|
@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
|||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
|
||||
case IR::Attribute::FrontFace:
|
||||
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
|
||||
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
|
||||
|
@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
|||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpISub(ctx.U32[1], index, base);
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
|
|
@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
|
|||
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
|
||||
}
|
||||
|
||||
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
|
||||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
|
|||
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
|
||||
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
|
||||
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
|
||||
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
|
||||
|
||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
|
|
@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
|||
U16 = Name(TypeInt(16, false), "u16");
|
||||
S16 = Name(TypeInt(16, true), "s16");
|
||||
}
|
||||
if (info.uses_int64) {
|
||||
if (info.uses_int64 && profile.support_int64) {
|
||||
AddCapability(spv::Capability::Int64);
|
||||
U64 = Name(TypeInt(64, false), "u64");
|
||||
}
|
||||
|
@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
|
|||
size_t label_index{0};
|
||||
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
AddLabel(labels[label_index]);
|
||||
const Id pointer{is_array
|
||||
? OpAccessChain(input_f32, input_position, vertex, masked_index)
|
||||
: OpAccessChain(input_f32, input_position, masked_index)};
|
||||
const Id pointer{[&]() {
|
||||
if (need_input_position_indirect) {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
|
||||
masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, u32_zero_value,
|
||||
masked_index);
|
||||
} else {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, masked_index);
|
||||
}
|
||||
}()};
|
||||
const Id result{OpLoad(F32[1], pointer)};
|
||||
OpReturnValue(result);
|
||||
++label_index;
|
||||
|
@ -1367,30 +1379,56 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
Decorate(layer, spv::Decoration::Flat);
|
||||
}
|
||||
if (loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
const bool is_fragment{stage == Stage::Fragment};
|
||||
if (!is_fragment && profile.has_broken_spirv_position_input) {
|
||||
need_input_position_indirect = true;
|
||||
|
||||
const Id input_position_struct = TypeStruct(F32[4]);
|
||||
input_position = DefineInput(*this, input_position_struct, true);
|
||||
|
||||
MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
|
||||
static_cast<unsigned>(spv::BuiltIn::Position));
|
||||
Decorate(input_position_struct, spv::Decoration::Block);
|
||||
} else {
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
|
||||
: spv::BuiltIn::Position};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (loads[IR::Attribute::InstanceId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
|
||||
if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
if (loads[IR::Attribute::VertexId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
|
||||
if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
if (loads[IR::Attribute::DrawID]) {
|
||||
draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
|
||||
}
|
||||
if (loads[IR::Attribute::FrontFace]) {
|
||||
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
|
||||
|
|
|
@ -218,6 +218,7 @@ public:
|
|||
Id base_instance{};
|
||||
Id vertex_id{};
|
||||
Id vertex_index{};
|
||||
Id draw_index{};
|
||||
Id base_vertex{};
|
||||
Id front_face{};
|
||||
Id point_coord{};
|
||||
|
@ -279,6 +280,7 @@ public:
|
|||
Id write_global_func_u32x2{};
|
||||
Id write_global_func_u32x4{};
|
||||
|
||||
bool need_input_position_indirect{};
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
||||
|
|
|
@ -34,6 +34,11 @@ public:
|
|||
|
||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
|
||||
|
||||
[[nodiscard]] virtual bool HasHLEMacroState() const = 0;
|
||||
|
||||
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
||||
u32 offset) = 0;
|
||||
|
||||
virtual void Dump(u64 hash) = 0;
|
||||
|
||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||
|
@ -52,11 +57,16 @@ public:
|
|||
return start_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
|
||||
return is_propietary_driver;
|
||||
}
|
||||
|
||||
protected:
|
||||
ProgramHeader sph{};
|
||||
std::array<u32, 8> gp_passthrough_mask{};
|
||||
Stage stage{};
|
||||
u32 start_address{};
|
||||
bool is_propietary_driver{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) {
|
|||
return "ViewportMask";
|
||||
case Attribute::FrontFace:
|
||||
return "FrontFace";
|
||||
case Attribute::BaseInstance:
|
||||
return "BaseInstance";
|
||||
case Attribute::BaseVertex:
|
||||
return "BaseVertex";
|
||||
case Attribute::DrawID:
|
||||
return "DrawID";
|
||||
}
|
||||
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
|
||||
}
|
||||
|
|
|
@ -219,6 +219,11 @@ enum class Attribute : u64 {
|
|||
FixedFncTexture9Q = 231,
|
||||
ViewportMask = 232,
|
||||
FrontFace = 255,
|
||||
|
||||
// Implementation attributes
|
||||
BaseInstance = 256,
|
||||
BaseVertex = 257,
|
||||
DrawID = 258,
|
||||
};
|
||||
|
||||
constexpr size_t NUM_GENERICS = 32;
|
||||
|
|
|
@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
|
|||
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
|
||||
return GetAttributeU32(attribute, Imm32(0));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
|
||||
return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
|
||||
}
|
||||
|
||||
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
|
||||
Inst(Opcode::SetAttribute, attribute, value, vertex);
|
||||
}
|
||||
|
|
|
@ -74,6 +74,8 @@ public:
|
|||
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
|
||||
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
|
||||
|
||||
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
|
||||
|
|
|
@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
|
|||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
|
||||
const Shader::VaryingState& passthrough_mask,
|
||||
bool passthrough_position,
|
||||
std::optional<IR::Attribute> passthrough_layer_attr) {
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!passthrough_mask.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_position) {
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_layer_attr) {
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
|
||||
ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
}
|
||||
|
||||
u32 GetOutputTopologyVertices(OutputTopology output_topology) {
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
return 1;
|
||||
case OutputTopology::LineStrip:
|
||||
return 2;
|
||||
default:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
EmitGeometryPassthrough(
|
||||
ir, program, program.info.passthrough,
|
||||
program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
|
@ -198,6 +262,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
|
||||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||
}
|
||||
|
||||
if (!host_info.support_geometry_shader_passthrough) {
|
||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||
LowerGeometryPassthrough(program, host_info);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
||||
Optimization::ConstantPropagationPass(program);
|
||||
Optimization::ConstantPropagationPass(env, program);
|
||||
|
||||
Optimization::PositionPass(env, program);
|
||||
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program);
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
||||
Optimization::TexturePass(env, program, host_info);
|
||||
|
||||
if (Settings::values.resolution_info.active) {
|
||||
|
@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
|||
IR::Program program;
|
||||
program.stage = Stage::Geometry;
|
||||
program.output_topology = output_topology;
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
program.output_vertices = 1;
|
||||
break;
|
||||
case OutputTopology::LineStrip:
|
||||
program.output_vertices = 2;
|
||||
break;
|
||||
default:
|
||||
program.output_vertices = 3;
|
||||
break;
|
||||
}
|
||||
program.output_vertices = GetOutputTopologyVertices(output_topology);
|
||||
|
||||
program.is_geometry_passthrough = false;
|
||||
program.info.loads.mask = source_program.info.stores.mask;
|
||||
|
@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
|||
node.data.block = current_block;
|
||||
|
||||
IR::IREmitter ir{*current_block};
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!program.info.stores.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
|
||||
ir.Imm32(0));
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
EmitGeometryPassthrough(ir, program, program.info.stores, true,
|
||||
source_program.info.emulated_layer);
|
||||
|
||||
IR::Block* return_block{block_pool.Create(inst_pool)};
|
||||
IR::IREmitter{*return_block}.Epilogue();
|
||||
|
|
|
@ -15,6 +15,9 @@ struct HostTranslateInfo {
|
|||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <type_traits>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
|
@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
case IR::Attribute::PrimitiveId:
|
||||
case IR::Attribute::InstanceId:
|
||||
case IR::Attribute::VertexId:
|
||||
case IR::Attribute::BaseVertex:
|
||||
case IR::Attribute::BaseInstance:
|
||||
case IR::Attribute::DrawID:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
|
@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
const auto offset_value = offset.U32();
|
||||
auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
|
||||
if (!replacement) {
|
||||
return;
|
||||
}
|
||||
const auto new_attribute = [replacement]() {
|
||||
switch (*replacement) {
|
||||
case ReplaceConstant::BaseInstance:
|
||||
return IR::Attribute::BaseInstance;
|
||||
case ReplaceConstant::BaseVertex:
|
||||
return IR::Attribute::BaseVertex;
|
||||
case ReplaceConstant::DrawID:
|
||||
return IR::Attribute::DrawID;
|
||||
default:
|
||||
throw NotImplementedException("Not implemented replacement variable {}", *replacement);
|
||||
}
|
||||
}();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
|
||||
} else {
|
||||
inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
|
||||
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
if (bank_value != which_bank) {
|
||||
return;
|
||||
}
|
||||
const auto offset_value = offset.U32();
|
||||
if (offset_value < offset_start || offset_value >= offset_end) {
|
||||
return;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(
|
||||
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetRegister:
|
||||
return FoldGetRegister(inst);
|
||||
|
@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
IR::Opcode::CompositeInsertF16x4);
|
||||
case IR::Opcode::FSwizzleAdd:
|
||||
return FoldFSwizzleAdd(block, inst);
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
if (env.HasHLEMacroState()) {
|
||||
FoldConstBuffer(env, block, inst);
|
||||
}
|
||||
if (env.IsPropietaryDriver()) {
|
||||
FoldDriverConstBuffer(env, block, inst, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagationPass(IR::Program& program) {
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program) {
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
ConstantPropagation(*block, inst);
|
||||
ConstantPropagation(env, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
}
|
||||
|
||||
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::U32 offset;
|
||||
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
||||
|
@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
|||
}
|
||||
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
||||
// buffer offset in bytes.
|
||||
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
}
|
||||
|
||||
|
@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
StorageInfo info;
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
|
@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
|||
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
|
||||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@ struct HostTranslateInfo;
|
|||
namespace Shader::Optimization {
|
||||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||
void ConstantPropagationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
|
|
|
@ -55,6 +55,8 @@ struct Profile {
|
|||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
/// The Position builtin needs to be wrapped in a struct when used as an input
|
||||
bool has_broken_spirv_position_input{};
|
||||
/// Offset image operands with an unsigned type do not work
|
||||
bool has_broken_unsigned_image_offsets{};
|
||||
/// Signed instructions with unsigned data types are misinterpreted
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
|
||||
namespace Shader {
|
||||
|
||||
enum class ReplaceConstant : u32 {
|
||||
BaseInstance,
|
||||
BaseVertex,
|
||||
DrawID,
|
||||
};
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
|
@ -59,6 +65,8 @@ enum class Interpolation {
|
|||
struct ConstantBufferDescriptor {
|
||||
u32 index;
|
||||
u32 count;
|
||||
|
||||
auto operator<=>(const ConstantBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct StorageBufferDescriptor {
|
||||
|
@ -66,6 +74,8 @@ struct StorageBufferDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
bool is_written;
|
||||
|
||||
auto operator<=>(const StorageBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct TextureBufferDescriptor {
|
||||
|
@ -78,6 +88,8 @@ struct TextureBufferDescriptor {
|
|||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureBufferDescriptor&) const = default;
|
||||
};
|
||||
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
|
||||
|
||||
|
@ -89,6 +101,8 @@ struct ImageBufferDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageBufferDescriptor&) const = default;
|
||||
};
|
||||
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
|
||||
|
||||
|
@ -104,6 +118,8 @@ struct TextureDescriptor {
|
|||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureDescriptor&) const = default;
|
||||
};
|
||||
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
|
||||
|
||||
|
@ -116,6 +132,8 @@ struct ImageDescriptor {
|
|||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageDescriptor&) const = default;
|
||||
};
|
||||
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
namespace Shader {
|
||||
|
||||
struct VaryingState {
|
||||
std::bitset<256> mask{};
|
||||
std::bitset<512> mask{};
|
||||
|
||||
void Set(IR::Attribute attribute, bool state = true) {
|
||||
mask[static_cast<size_t>(attribute)] = state;
|
||||
|
|
|
@ -7,6 +7,7 @@ add_executable(tests
|
|||
common/fibers.cpp
|
||||
common/host_memory.cpp
|
||||
common/param_package.cpp
|
||||
common/range_map.cpp
|
||||
common/ring_buffer.cpp
|
||||
common/scratch_buffer.cpp
|
||||
common/unique_function.cpp
|
||||
|
|
70
src/tests/common/range_map.cpp
Normal file
70
src/tests/common/range_map.cpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
#include "common/range_map.h"
|
||||
|
||||
enum class MappedEnum : u32 {
|
||||
Invalid = 0,
|
||||
Valid_1 = 1,
|
||||
Valid_2 = 2,
|
||||
Valid_3 = 3,
|
||||
};
|
||||
|
||||
TEST_CASE("Range Map: Setup", "[video_core]") {
|
||||
Common::RangeMap<u64, MappedEnum> my_map(MappedEnum::Invalid);
|
||||
my_map.Map(3000, 3500, MappedEnum::Valid_1);
|
||||
my_map.Unmap(3200, 3600);
|
||||
my_map.Map(4000, 4500, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4200) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2900) == 0);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(2900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(3100) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3200) == MappedEnum::Invalid);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(4199) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4200) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(4400) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4500) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(4600) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Unmap(0, 6000);
|
||||
for (u64 address = 0; address < 10000; address += 1000) {
|
||||
REQUIRE(my_map.GetContinousSizeFrom(address) == 0);
|
||||
}
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_1);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 4000);
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_2);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1500);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2500) == 1600);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4100) == 900);
|
||||
REQUIRE(my_map.GetValueAt(900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2500) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(4100) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(5000) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Map(2000, 6000, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1000);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 3000);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1999) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1500) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2001) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(5999) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(6000) == MappedEnum::Invalid);
|
||||
}
|
|
@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
|
|||
int num = 0;
|
||||
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
REQUIRE(num == 0);
|
||||
REQUIRE(num == 1);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
|
|
|
@ -13,6 +13,7 @@ add_library(video_core STATIC
|
|||
buffer_cache/buffer_base.h
|
||||
buffer_cache/buffer_cache.cpp
|
||||
buffer_cache/buffer_cache.h
|
||||
cache_types.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
compatible_formats.cpp
|
||||
|
@ -84,6 +85,7 @@ add_library(video_core STATIC
|
|||
gpu.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
invalidation_accumulator.h
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
precompiled_headers.h
|
||||
|
@ -189,6 +191,8 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_texture_cache_base.cpp
|
||||
renderer_vulkan/vk_turbo_mode.cpp
|
||||
renderer_vulkan/vk_turbo_mode.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.cpp
|
||||
|
|
|
@ -430,7 +430,7 @@ private:
|
|||
if (query_begin >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||
|
@ -483,7 +483,7 @@ private:
|
|||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||
}
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
||||
const u64 word = current_word;
|
||||
u64 page = page_begin;
|
||||
page_begin = 0;
|
||||
|
||||
|
@ -531,7 +531,7 @@ private:
|
|||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
|
@ -539,8 +539,7 @@ private:
|
|||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -564,7 +563,7 @@ private:
|
|||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
|
@ -574,8 +573,7 @@ private:
|
|||
u64 begin = std::numeric_limits<u64>::max();
|
||||
u64 end = 0;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -200,7 +200,16 @@ public:
|
|||
/// Return true when a CPU region is modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||
|
||||
std::mutex mutex;
|
||||
void SetDrawIndirect(
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
|
||||
current_draw_indirect = current_draw_indirect_;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
Runtime& runtime;
|
||||
|
||||
private:
|
||||
|
@ -272,6 +281,8 @@ private:
|
|||
|
||||
void BindHostVertexBuffers();
|
||||
|
||||
void BindHostDrawIndirectBuffers();
|
||||
|
||||
void BindHostGraphicsUniformBuffers(size_t stage);
|
||||
|
||||
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
|
||||
|
@ -298,6 +309,8 @@ private:
|
|||
|
||||
void UpdateVertexBuffer(u32 index);
|
||||
|
||||
void UpdateDrawIndirect();
|
||||
|
||||
void UpdateUniformBuffers(size_t stage);
|
||||
|
||||
void UpdateStorageBuffers(size_t stage);
|
||||
|
@ -372,6 +385,8 @@ private:
|
|||
SlotVector<Buffer> slot_buffers;
|
||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
|
||||
|
||||
u32 last_index_count = 0;
|
||||
|
||||
Binding index_buffer;
|
||||
|
@ -380,6 +395,8 @@ private:
|
|||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
|
@ -674,6 +691,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
|||
}
|
||||
BindHostVertexBuffers();
|
||||
BindHostTransformFeedbackBuffers();
|
||||
if (current_draw_indirect) {
|
||||
BindHostDrawIndirectBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -823,6 +843,7 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
|||
template <class P>
|
||||
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
AccumulateFlushes();
|
||||
|
||||
if (committed_ranges.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -869,7 +890,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
buffer_id,
|
||||
});
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 256ULL;
|
||||
constexpr u64 align = 8ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = std::max(largest_copy, new_size);
|
||||
|
@ -1041,6 +1062,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
||||
const auto bind_buffer = [this](const Binding& binding) {
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
bind_buffer(count_buffer_binding);
|
||||
}
|
||||
bind_buffer(indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
u32 dirty = ~0U;
|
||||
|
@ -1272,6 +1306,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
|||
UpdateStorageBuffers(stage);
|
||||
UpdateTextureBuffers(stage);
|
||||
}
|
||||
if (current_draw_indirect) {
|
||||
UpdateDrawIndirect();
|
||||
}
|
||||
} while (has_deleted_buffers);
|
||||
}
|
||||
|
||||
|
@ -1289,7 +1326,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const auto& index_array = draw_state.index_buffer;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
|
||||
if (!flags[Dirty::IndexBuffer]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::IndexBuffer] = false;
|
||||
|
@ -1361,6 +1398,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
|||
};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateDrawIndirect() {
|
||||
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
binding = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
binding = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = static_cast<u32>(size),
|
||||
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
||||
};
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||
}
|
||||
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||
indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
|
@ -1880,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
|||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
const u32 alignment = runtime.GetStorageBufferAlignment();
|
||||
|
||||
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
||||
const u32 aligned_size =
|
||||
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
|
||||
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
||||
if (!cpu_addr || size == 0) {
|
||||
return NULL_BINDING;
|
||||
}
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
|
||||
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
|
||||
const Binding binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
return binding;
|
||||
|
@ -1941,4 +2006,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
24
src/video_core/cache_types.h
Normal file
24
src/video_core/cache_types.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
enum class CacheType : u32 {
|
||||
None = 0,
|
||||
TextureCache = 1 << 0,
|
||||
QueryCache = 1 << 1,
|
||||
BufferCache = 1 << 2,
|
||||
ShaderCache = 1 << 3,
|
||||
NoTextureCache = QueryCache | BufferCache | ShaderCache,
|
||||
NoBufferCache = TextureCache | QueryCache | ShaderCache,
|
||||
NoQueryCache = TextureCache | BufferCache | ShaderCache,
|
||||
All = TextureCache | QueryCache | BufferCache | ShaderCache,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(CacheType)
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -61,7 +61,7 @@ bool DmaPusher::Step() {
|
|||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
dma_state.dma_get = command_list_header.addr;
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
|
@ -75,12 +75,22 @@ bool DmaPusher::Step() {
|
|||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize_destructive(command_list_header.size);
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
constexpr u32 MacroRegistersStart = 0xE00;
|
||||
if (dma_state.method < MacroRegistersStart) {
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
const size_t copy_size = command_list_header.size * sizeof(u32);
|
||||
if (subchannels[dma_state.subchannel]) {
|
||||
subchannels[dma_state.subchannel]->current_dirty =
|
||||
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
|
||||
}
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
|
||||
}
|
||||
ProcessCommands(command_headers);
|
||||
}
|
||||
|
@ -94,6 +104,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
|
||||
if (dma_state.method_count) {
|
||||
// Data word of methods command
|
||||
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||
if (dma_state.non_incrementing) {
|
||||
const u32 max_write = static_cast<u32>(
|
||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||
|
@ -132,6 +143,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
case SubmissionMode::Inline:
|
||||
dma_state.method = command_header.method;
|
||||
dma_state.subchannel = command_header.subchannel;
|
||||
dma_state.dma_word_offset = static_cast<u64>(
|
||||
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
|
||||
CallMethod(command_header.arg_count);
|
||||
dma_state.non_incrementing = true;
|
||||
dma_increment_once = false;
|
||||
|
@ -164,8 +177,14 @@ void DmaPusher::CallMethod(u32 argument) const {
|
|||
dma_state.method_count,
|
||||
});
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
|
||||
dma_state.is_last_call);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
|
||||
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
||||
return;
|
||||
}
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,8 +193,11 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
|||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
|
||||
num_methods, dma_state.method_count);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -156,6 +156,8 @@ private:
|
|||
u32 subchannel; ///< Current subchannel
|
||||
u32 method_count; ///< Current method count
|
||||
u32 length_pending; ///< Large NI command length pending
|
||||
GPUVAddr dma_get; ///< Currently read segment
|
||||
u64 dma_word_offset; ///< Current word ofset from address
|
||||
bool non_incrementing; ///< Current command's NI flag
|
||||
bool is_last_call;
|
||||
};
|
||||
|
|
|
@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
|||
ProcessDraw(true, num_instances);
|
||||
}
|
||||
|
||||
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
|
||||
draw_state.topology = topology;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
|
||||
u32 index_count) {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
draw_state.topology = topology;
|
||||
draw_state.index_buffer = regs.index_buffer;
|
||||
draw_state.index_buffer.first = index_first;
|
||||
draw_state.index_buffer.count = index_count;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::SetInlineIndexBuffer(u32 index) {
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
|
||||
|
@ -198,4 +215,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
|
|||
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
|
||||
}
|
||||
}
|
||||
|
||||
void DrawManager::ProcessDrawIndirect() {
|
||||
LOG_TRACE(
|
||||
HW_GPU,
|
||||
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
|
||||
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
|
||||
indirect_state.buffer_size, indirect_state.max_draw_counts);
|
||||
|
||||
UpdateTopology();
|
||||
|
||||
if (maxwell3d->ShouldExecute()) {
|
||||
maxwell3d->rasterizer->DrawIndirect();
|
||||
}
|
||||
}
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -32,6 +32,16 @@ public:
|
|||
std::vector<u8> inline_index_draw_indexes;
|
||||
};
|
||||
|
||||
struct IndirectParams {
|
||||
bool is_indexed;
|
||||
bool include_count;
|
||||
GPUVAddr count_start_address;
|
||||
GPUVAddr indirect_start_address;
|
||||
size_t buffer_size;
|
||||
size_t max_draw_counts;
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
explicit DrawManager(Maxwell3D* maxwell_3d);
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument);
|
||||
|
@ -46,10 +56,22 @@ public:
|
|||
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
||||
u32 base_instance, u32 num_instances);
|
||||
|
||||
void DrawArrayIndirect(PrimitiveTopology topology);
|
||||
|
||||
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
||||
|
||||
const State& GetDrawState() const {
|
||||
return draw_state;
|
||||
}
|
||||
|
||||
IndirectParams& GetIndirectParams() {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
const IndirectParams& GetIndirectParams() const {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetInlineIndexBuffer(u32 index);
|
||||
|
||||
|
@ -63,7 +85,10 @@ private:
|
|||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
|
||||
void ProcessDrawIndirect();
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
IndirectParams indirect_state{};
|
||||
};
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
@ -17,6 +21,26 @@ public:
|
|||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
|
||||
void ConsumeSink() {
|
||||
if (method_sink.empty()) {
|
||||
return;
|
||||
}
|
||||
ConsumeSinkImpl();
|
||||
}
|
||||
|
||||
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
|
||||
std::vector<std::pair<u32, u32>> method_sink{};
|
||||
bool current_dirty{};
|
||||
GPUVAddr current_dma_segment;
|
||||
|
||||
protected:
|
||||
virtual void ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
CallMethod(method, value, true);
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
|
|||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
@ -20,11 +21,14 @@ namespace Tegra::Engines {
|
|||
|
||||
using namespace Texture;
|
||||
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
@ -49,6 +53,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
|
|||
}
|
||||
}
|
||||
|
||||
void Fermi2D::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
MICROPROFILE_SCOPE(GPU_BlitEngine);
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
|
@ -94,6 +105,7 @@ void Fermi2D::Blit() {
|
|||
config.src_x0 = 0;
|
||||
}
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
sw_blitter->Blit(src, regs.dst, config);
|
||||
}
|
||||
|
|
|
@ -305,10 +305,13 @@ public:
|
|||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
|
|
@ -14,7 +14,12 @@
|
|||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
|
||||
}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
|
@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
|
|||
upload_state.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
void KeplerCompute::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
|
|
@ -204,6 +204,8 @@ public:
|
|||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
|
|
|
@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
|
|||
|
||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
upload_state.BindRasterizer(rasterizer_);
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
|
||||
}
|
||||
|
||||
void KeplerMemory::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
|
|
@ -73,6 +73,8 @@ public:
|
|||
} regs{};
|
||||
|
||||
private:
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
|
@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
|||
regs.upload} {
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
execution_mask.reset();
|
||||
for (size_t i = 0; i < execution_mask.size(); i++) {
|
||||
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
|
||||
}
|
||||
}
|
||||
|
||||
Maxwell3D::~Maxwell3D() = default;
|
||||
|
@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
|||
shadow_state = regs;
|
||||
}
|
||||
|
||||
bool Maxwell3D::IsMethodExecutable(u32 method) {
|
||||
if (method >= MacroRegistersStart) {
|
||||
return true;
|
||||
}
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(draw.end):
|
||||
case MAXWELL3D_REG_INDEX(draw.begin):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(draw_inline_index):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_first):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.start_address):
|
||||
case MAXWELL3D_REG_INDEX(falcon[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(topology_override):
|
||||
case MAXWELL3D_REG_INDEX(clear_surface):
|
||||
case MAXWELL3D_REG_INDEX(report_semaphore.query):
|
||||
case MAXWELL3D_REG_INDEX(render_enable.mode):
|
||||
case MAXWELL3D_REG_INDEX(clear_report_value):
|
||||
case MAXWELL3D_REG_INDEX(sync_info):
|
||||
case MAXWELL3D_REG_INDEX(launch_dma):
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
|
@ -130,14 +201,72 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
|||
}
|
||||
|
||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||
for (size_t i = 0; i < amount; i++) {
|
||||
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||
}
|
||||
macro_segments.emplace_back(current_dma_segment, amount);
|
||||
current_macro_dirty |= current_dirty;
|
||||
current_dirty = false;
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
ConsumeSink();
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
macro_addresses.clear();
|
||||
macro_segments.clear();
|
||||
current_macro_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::RefreshParametersImpl() {
|
||||
size_t current_index = 0;
|
||||
for (auto& segment : macro_segments) {
|
||||
if (segment.first == 0) {
|
||||
current_index += segment.second;
|
||||
continue;
|
||||
}
|
||||
memory_manager.ReadBlock(segment.first, ¯o_params[current_index],
|
||||
sizeof(u32) * segment.second);
|
||||
current_index += segment.second;
|
||||
}
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||
u32 num_vertices = 0;
|
||||
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||
const auto& array = regs.vertex_streams[index];
|
||||
if (array.enable == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto& attribute = regs.vertex_attrib_format[index];
|
||||
if (attribute.constant) {
|
||||
num_vertices = std::max(num_vertices, 1U);
|
||||
continue;
|
||||
}
|
||||
const auto& limit = regs.vertex_stream_limits[index];
|
||||
const GPUVAddr gpu_addr_begin = array.Address();
|
||||
const GPUVAddr gpu_addr_end = limit.Address() + 1;
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
num_vertices = std::max(
|
||||
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
|
||||
}
|
||||
return num_vertices;
|
||||
}
|
||||
|
||||
size_t Maxwell3D::EstimateIndexBufferSize() {
|
||||
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
||||
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
||||
constexpr std::array<size_t, 4> max_sizes = {
|
||||
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
|
||||
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
|
||||
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||
return std::min<size_t>(
|
||||
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
|
||||
byte_size,
|
||||
static_cast<size_t>(end_address - start_address));
|
||||
}
|
||||
|
||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
|
@ -152,6 +281,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
|||
return argument;
|
||||
}
|
||||
|
||||
void Maxwell3D::ConsumeSinkImpl() {
|
||||
SCOPE_EXIT({ method_sink.clear(); });
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
if (control == Regs::ShadowRamControl::Track ||
|
||||
control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
|
||||
for (auto [method, value] : method_sink) {
|
||||
shadow_state.reg_array[method] = value;
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (control == Regs::ShadowRamControl::Replay) {
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
||||
if (regs.reg_array[method] == argument) {
|
||||
return;
|
||||
|
@ -263,7 +415,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
|||
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
}
|
||||
|
||||
|
@ -294,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
|||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
ProcessCBMultiData(base_start, amount);
|
||||
break;
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(inline_data): {
|
||||
ASSERT(methods_pending == amount);
|
||||
upload_state.ProcessData(base_start, amount);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
for (u32 i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
|
@ -332,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
|
||||
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.report_semaphore.query.operation) {
|
||||
case Regs::ReportSemaphore::Operation::Release:
|
||||
if (regs.report_semaphore.query.short_query != 0) {
|
||||
|
@ -389,7 +537,11 @@ void Maxwell3D::ProcessQueryCondition() {
|
|||
case Regs::RenderEnable::Override::NeverRender:
|
||||
execute_on = false;
|
||||
break;
|
||||
case Regs::RenderEnable::Override::UseRenderEnable:
|
||||
case Regs::RenderEnable::Override::UseRenderEnable: {
|
||||
if (rasterizer->AccelerateConditionalRendering()) {
|
||||
execute_on = true;
|
||||
return;
|
||||
}
|
||||
switch (regs.render_enable.mode) {
|
||||
case Regs::RenderEnable::Mode::True: {
|
||||
execute_on = true;
|
||||
|
@ -427,6 +579,7 @@ void Maxwell3D::ProcessQueryCondition() {
|
|||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCounterReset() {
|
||||
|
@ -463,7 +616,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessCBBind(size_t stage_index) {
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
|
||||
// stage.
|
||||
const auto& bind_data = regs.bind_groups[stage_index];
|
||||
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
|
||||
buffer.enabled = bind_data.valid.Value() != 0;
|
||||
|
@ -490,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
|
|||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
||||
const size_t copy_size = amount * sizeof(u32);
|
||||
memory_manager.WriteBlock(address, start_base, copy_size);
|
||||
memory_manager.WriteBlockCached(address, start_base, copy_size);
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
||||
|
@ -524,4 +678,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
|||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
|
||||
HLEReplacementAttributeType name) {
|
||||
const u64 key = (static_cast<u64>(bank) << 32) | offset;
|
||||
replace_table.emplace(key, name);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -272,6 +272,7 @@ public:
|
|||
};
|
||||
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, Mode> mode;
|
||||
BitField<4, 8, u32> pad;
|
||||
};
|
||||
|
@ -1217,10 +1218,12 @@ public:
|
|||
|
||||
struct Window {
|
||||
union {
|
||||
u32 raw_x;
|
||||
BitField<0, 16, u32> x_min;
|
||||
BitField<16, 16, u32> x_max;
|
||||
};
|
||||
union {
|
||||
u32 raw_y;
|
||||
BitField<0, 16, u32> y_min;
|
||||
BitField<16, 16, u32> y_max;
|
||||
};
|
||||
|
@ -2708,7 +2711,7 @@ public:
|
|||
u32 post_z_pixel_imask; ///< 0x0F1C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x20);
|
||||
ConstantColorRendering const_color_rendering; ///< 0x0F40
|
||||
s32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_mask; ///< 0x0F58
|
||||
u32 stencil_back_func_mask; ///< 0x0F5C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x14);
|
||||
|
@ -2832,9 +2835,9 @@ public:
|
|||
Blend blend; ///< 0x133C
|
||||
u32 stencil_enable; ///< 0x1380
|
||||
StencilOp stencil_front_op; ///< 0x1384
|
||||
s32 stencil_front_ref; ///< 0x1394
|
||||
s32 stencil_front_func_mask; ///< 0x1398
|
||||
s32 stencil_front_mask; ///< 0x139C
|
||||
u32 stencil_front_ref; ///< 0x1394
|
||||
u32 stencil_front_func_mask; ///< 0x1398
|
||||
u32 stencil_front_mask; ///< 0x139C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x4);
|
||||
u32 draw_auto_start_byte_count; ///< 0x13A4
|
||||
PsSaturate frag_color_clamp; ///< 0x13A8
|
||||
|
@ -3020,6 +3023,24 @@ public:
|
|||
/// Store temporary hw register values, used by some calls to restore state after a operation
|
||||
Regs shadow_state;
|
||||
|
||||
// None Engine
|
||||
enum class EngineHint : u32 {
|
||||
None = 0x0,
|
||||
OnHLEMacro = 0x1,
|
||||
};
|
||||
|
||||
EngineHint engine_state{EngineHint::None};
|
||||
|
||||
enum class HLEReplacementAttributeType : u32 {
|
||||
BaseVertex = 0x0,
|
||||
BaseInstance = 0x1,
|
||||
DrawID = 0x2,
|
||||
};
|
||||
|
||||
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
|
||||
|
||||
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||
|
||||
|
@ -3067,6 +3088,35 @@ public:
|
|||
std::unique_ptr<DrawManager> draw_manager;
|
||||
friend class DrawManager;
|
||||
|
||||
GPUVAddr GetMacroAddress(size_t index) const {
|
||||
return macro_addresses[index];
|
||||
}
|
||||
|
||||
void RefreshParameters() {
|
||||
if (!current_macro_dirty) {
|
||||
return;
|
||||
}
|
||||
RefreshParametersImpl();
|
||||
}
|
||||
|
||||
bool AnyParametersDirty() const {
|
||||
return current_macro_dirty;
|
||||
}
|
||||
|
||||
u32 GetMaxCurrentVertices();
|
||||
|
||||
size_t EstimateIndexBufferSize();
|
||||
|
||||
/// Handles a write to the CLEAR_BUFFERS register.
|
||||
void ProcessClearBuffers(u32 layer_count);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
|
@ -3076,6 +3126,8 @@ private:
|
|||
|
||||
void ProcessDirtyRegisters(u32 method, u32 argument);
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
|
@ -3116,16 +3168,13 @@ private:
|
|||
/// Handles writes to syncing register.
|
||||
void ProcessSyncPoint();
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||
std::optional<u64> GetQueryResult();
|
||||
|
||||
void RefreshParametersImpl();
|
||||
|
||||
bool IsMethodExecutable(u32 method);
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
|
@ -3145,6 +3194,10 @@ private:
|
|||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||
std::vector<GPUVAddr> macro_addresses;
|
||||
bool current_macro_dirty{};
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
|
|
@ -21,7 +21,10 @@ namespace Tegra::Engines {
|
|||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
: system{system_}, memory_manager{memory_manager_} {
|
||||
execution_mask.reset();
|
||||
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
|
||||
}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
|
@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
|||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void MaxwellDMA::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
|
@ -59,7 +69,7 @@ void MaxwellDMA::Launch() {
|
|||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
CopyBlockLinearToBlockLinear();
|
||||
|
@ -94,6 +104,7 @@ void MaxwellDMA::Launch() {
|
|||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.FlushCaching();
|
||||
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
|
@ -111,8 +122,8 @@ void MaxwellDMA::Launch() {
|
|||
memory_manager.ReadBlockUnsafe(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
}
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
|
@ -122,7 +133,7 @@ void MaxwellDMA::Launch() {
|
|||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlock(
|
||||
memory_manager.WriteBlockCached(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
}
|
||||
|
@ -131,8 +142,8 @@ void MaxwellDMA::Launch() {
|
|||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
|||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
|
@ -246,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
|
|||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
|
@ -277,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
|||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
|
@ -337,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
|||
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
||||
dst.block_size.height, dst.block_size.depth, pitch);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
|
|
|
@ -231,6 +231,8 @@ private:
|
|||
|
||||
void ReleaseSemaphore();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue