summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.travis.yml2
-rwxr-xr-x.travis/linux-mingw/build.sh2
-rwxr-xr-x.travis/linux-mingw/deps.sh2
-rwxr-xr-x.travis/linux-mingw/docker.sh14
-rwxr-xr-x.travis/linux/build.sh2
-rwxr-xr-x.travis/linux/deps.sh2
-rwxr-xr-x.travis/linux/docker.sh5
-rwxr-xr-x.travis/macos/build.sh1
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--externals/CMakeLists.txt8
m---------externals/dynarmic0
-rw-r--r--src/CMakeLists.txt30
-rw-r--r--src/common/zstd_compression.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp7
-rw-r--r--src/core/hle/kernel/vm_manager.h8
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/core/memory.h9
-rw-r--r--src/core/telemetry_session.cpp17
-rw-r--r--src/core/telemetry_session.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/engine_upload.cpp48
-rw-r--r--src/video_core/engines/engine_upload.h75
-rw-r--r--src/video_core/engines/fermi_2d.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp37
-rw-r--r--src/video_core/engines/kepler_compute.h175
-rw-r--r--src/video_core/engines/kepler_memory.cpp45
-rw-r--r--src/video_core/engines/kepler_memory.h66
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h30
-rw-r--r--src/video_core/engines/maxwell_dma.cpp83
-rw-r--r--src/video_core/engines/maxwell_dma.h43
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp244
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h54
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h53
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp10
-rw-r--r--src/video_core/shader/decode.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp22
-rw-r--r--src/video_core/shader/decode/conversion.cpp7
-rw-r--r--src/video_core/shader/decode/hfma2.cpp9
-rw-r--r--src/video_core/shader/decode/texture.cpp3
-rw-r--r--src/video_core/shader/decode/xmad.cpp5
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/surface.cpp86
-rw-r--r--src/video_core/textures/astc.cpp4
-rw-r--r--src/yuzu/compatdb.cpp6
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp4
-rw-r--r--src/yuzu/hotkeys.h2
55 files changed, 854 insertions, 444 deletions
diff --git a/.travis.yml b/.travis.yml
index 9512f7843..93fda1dfa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
- os: osx
env: NAME="macos build"
sudo: false
- osx_image: xcode10.1
+ osx_image: xcode10.2
install: "./.travis/macos/deps.sh"
script: "./.travis/macos/build.sh"
after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/linux-mingw/build.sh b/.travis/linux-mingw/build.sh
index be03cc0f3..b12d70b12 100755
--- a/.travis/linux-mingw/build.sh
+++ b/.travis/linux-mingw/build.sh
@@ -1,3 +1,3 @@
#!/bin/bash -ex
mkdir "$HOME/.ccache" || true
-docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
+docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
diff --git a/.travis/linux-mingw/deps.sh b/.travis/linux-mingw/deps.sh
index 540bb934a..55b5d6006 100755
--- a/.travis/linux-mingw/deps.sh
+++ b/.travis/linux-mingw/deps.sh
@@ -1,3 +1,3 @@
#!/bin/sh -ex
-docker pull ubuntu:18.04
+docker pull yuzuemu/build-environments:linux-mingw
diff --git a/.travis/linux-mingw/docker.sh b/.travis/linux-mingw/docker.sh
index 6cf43a006..28033acfb 100755
--- a/.travis/linux-mingw/docker.sh
+++ b/.travis/linux-mingw/docker.sh
@@ -1,16 +1,6 @@
#!/bin/bash -ex
cd /yuzu
-MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
-apt-get update
-apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
-echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
-apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
-apt-get update
-apt-get install -y ${MINGW_PACKAGES}
-
-# fix a problem in current MinGW headers
-wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
# override Travis CI unreasonable ccache size
echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"
@@ -23,8 +13,8 @@ echo '' >> /bin/cmd
chmod +x /bin/cmd
mkdir build && cd build
-cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
-make -j4
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+ninja
# Clean up the dirty hacks
rm /bin/uname && mv /bin/uname1 /bin/uname
diff --git a/.travis/linux/build.sh b/.travis/linux/build.sh
index 2fced727d..3929f97fc 100755
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,4 +1,4 @@
#!/bin/bash -ex
mkdir -p "$HOME/.ccache"
-docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
+docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh
diff --git a/.travis/linux/deps.sh b/.travis/linux/deps.sh
index 540bb934a..8d23c517d 100755
--- a/.travis/linux/deps.sh
+++ b/.travis/linux/deps.sh
@@ -1,3 +1,3 @@
#!/bin/sh -ex
-docker pull ubuntu:18.04
+docker pull yuzuemu/build-environments:linux-fresh
diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh
index 8b7e65911..3a9970384 100755
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -1,12 +1,9 @@
#!/bin/bash -ex
-apt-get update
-apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
-
cd /yuzu
mkdir build && cd build
-cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
+cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
ninja
ccache -s
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index b7b4c6f8c..0abd1a93a 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
export UNICORNDIR=$(pwd)/externals/unicorn
export PATH="/usr/local/opt/ccache/libexec:$PATH"
+# TODO: Build using ninja instead of make
mkdir build && cd build
cmake --version
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 08315a1f1..5e00d839f 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -19,7 +19,7 @@ set(BUILD_VERSION "0")
if (BUILD_REPOSITORY)
# regex capture the string nightly or canary into CMAKE_MATCH_1
string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
- if (${CMAKE_MATCH_COUNT} GREATER 0)
+ if ("${CMAKE_MATCH_COUNT}" GREATER 0)
# capitalize the first letter of each word in the repo name.
string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
foreach(WORD ${REPO_NAME_LIST})
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 3f8b6cda8..e6fa11a03 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,6 +7,10 @@ include(DownloadExternals)
add_library(catch-single-include INTERFACE)
target_include_directories(catch-single-include INTERFACE catch/single_include)
+# libfmt
+add_subdirectory(fmt)
+add_library(fmt::fmt ALIAS fmt)
+
# Dynarmic
if (ARCHITECTURE_x86_64)
set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
add_subdirectory(dynarmic)
endif()
-# libfmt
-add_subdirectory(fmt)
-add_library(fmt::fmt ALIAS fmt)
-
# getopt
if (MSVC)
add_subdirectory(getopt)
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 4e6848d1c9e8dadc70595c15b5589f8b14aad47
+Subproject 2683a9a3e316b5c3f387bbe6787732b9ff44b8d
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9aea4af87..a1d87bbbc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,15 +21,27 @@ if (MSVC)
# Ensure that projects build with Unicode support.
add_definitions(-DUNICODE -D_UNICODE)
- # /W3 - Level 3 warnings
- # /MP - Multi-threaded compilation
- # /Zi - Output debugging information
- # /Zo - enhanced debug info for optimized builds
- # /permissive- - enables stricter C++ standards conformance checks
- # /EHsc - C++-only exception handling semantics
- # /Zc:throwingNew - let codegen assume `operator new` will never return null
- # /Zc:inline - let codegen omit inline functions in object files
- add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+ # /W3 - Level 3 warnings
+ # /MP - Multi-threaded compilation
+ # /Zi - Output debugging information
+ # /Zo - Enhanced debug info for optimized builds
+ # /permissive- - Enables stricter C++ standards conformance checks
+ # /EHsc - C++-only exception handling semantics
+ # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
+ # /Zc:inline - Let codegen omit inline functions in object files
+ # /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
+ add_compile_options(
+ /W3
+ /MP
+ /Zi
+ /Zo
+ /permissive-
+ /EHsc
+ /std:c++latest
+ /Zc:externConstexpr
+ /Zc:inline
+ /Zc:throwingNew
+ )
# /GS- - No stack buffer overflow checks
add_compile_options("$<$<CONFIG:Release>:/GS->")
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
index 60a35c67c..978526492 100644
--- a/src/common/zstd_compression.cpp
+++ b/src/common/zstd_compression.cpp
@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#pragma once
-
#include <algorithm>
#include <zstd.h>
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 1320bbe77..eda466a5d 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -10,6 +10,8 @@
namespace Core::Frontend {
+GraphicsContext::~GraphicsContext() = default;
+
class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
public std::enable_shared_from_this<TouchState> {
public:
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 70a522556..e2c290dc1 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -19,6 +19,8 @@ namespace Core::Frontend {
*/
class GraphicsContext {
public:
+ virtual ~GraphicsContext();
+
/// Makes the graphics context current for the caller thread
virtual void MakeCurrent() = 0;
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 20d01fc88..0775a89fb 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -241,7 +241,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
}
Process::Process(Core::System& system)
- : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+ : WaitObject{system.Kernel()}, vm_manager{system},
+ address_arbiter{system}, mutex{system}, system{system} {}
Process::~Process() = default;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index f0c0c12fc..48b13cfdd 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
return true;
}
-VMManager::VMManager() {
+VMManager::VMManager(Core::System& system) : system{system} {
// Default to assuming a 39-bit address space. This way we have a sane
// starting point with executables that don't provide metadata.
Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
@@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
VirtualMemoryArea& final_vma = vma_handle->second;
ASSERT(final_vma.size == size);
- auto& system = Core::System::GetInstance();
system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
VMAPermission::ReadWriteExecute);
system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
@@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
VirtualMemoryArea& final_vma = vma_handle->second;
ASSERT(final_vma.size == size);
- auto& system = Core::System::GetInstance();
system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
@@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
ASSERT(FindVMA(target)->second.size >= size);
- auto& system = Core::System::GetInstance();
system.ArmInterface(0).UnmapMemory(target, size);
system.ArmInterface(1).UnmapMemory(target, size);
system.ArmInterface(2).UnmapMemory(target, size);
@@ -376,7 +373,7 @@ ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64
Reprotect(src_vma_iter, VMAPermission::ReadWrite);
if (dst_memory_state == MemoryState::ModuleCode) {
- Core::System::GetInstance().InvalidateCpuInstructionCaches();
+ system.InvalidateCpuInstructionCaches();
}
return unmap_result;
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 288eb9450..ec84d9a70 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,10 @@
#include "core/hle/result.h"
#include "core/memory.h"
+namespace Core {
+class System;
+}
+
namespace FileSys {
enum class ProgramAddressSpaceType : u8;
}
@@ -321,7 +325,7 @@ class VMManager final {
public:
using VMAHandle = VMAMap::const_iterator;
- VMManager();
+ explicit VMManager(Core::System& system);
~VMManager();
/// Clears the address space map, re-initializing with a single free area.
@@ -712,5 +716,7 @@ private:
// The end of the currently allocated heap. This is not an inclusive
// end of the range. This is essentially 'base_address + current_size'.
VAddr heap_end = 0;
+
+ Core::System& system;
};
} // namespace Kernel
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index a86653204..8592b1f44 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,8 +21,6 @@
#include "core/memory.h"
#include "core/settings.h"
-#pragma optimize("", off)
-
namespace Loader {
namespace {
struct MODHeader {
diff --git a/src/core/memory.h b/src/core/memory.h
index b9fa18b1d..04e2c5f1d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
std::string ReadCString(VAddr vaddr, std::size_t max_length);
-enum class FlushMode {
- /// Write back modified surfaces to RAM
- Flush,
- /// Remove region from the cache
- Invalidate,
- /// Write back modified surfaces to RAM, and also remove them from the cache
- FlushAndInvalidate,
-};
-
/**
* Mark each page touching the region as cached.
*/
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index e1db06811..4b17bada5 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
}
TelemetrySession::TelemetrySession() {
-#ifdef ENABLE_WEB_SERVICE
- backend = std::make_unique<WebService::TelemetryJson>(
- Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
-#else
- backend = std::make_unique<Telemetry::NullVisitor>();
-#endif
// Log one-time top-level information
AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
.count()};
AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
+#ifdef ENABLE_WEB_SERVICE
+ auto backend = std::make_unique<WebService::TelemetryJson>(
+ Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+#else
+ auto backend = std::make_unique<Telemetry::NullVisitor>();
+#endif
+
// Complete the session, submitting to web service if necessary
- // This is just a placeholder to wrap up the session once the core completes and this is
- // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
field_collection.Accept(*backend);
if (Settings::values.enable_telemetry)
backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
bool TelemetrySession::SubmitTestcase() {
#ifdef ENABLE_WEB_SERVICE
+ auto backend = std::make_unique<WebService::TelemetryJson>(
+ Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
field_collection.Accept(*backend);
return backend->SubmitTestcase();
#else
diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h
index 023612b79..cae5a45a0 100644
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -39,7 +39,6 @@ public:
private:
Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
- std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
};
/**
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6821f275d..1e010e4da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
+ engines/engine_upload.cpp
+ engines/engine_upload.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/kepler_compute.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 6674d9405..036e66f05 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -105,6 +105,8 @@ bool DmaPusher::Step() {
dma_state.non_incrementing = false;
dma_increment_once = true;
break;
+ default:
+ break;
}
}
}
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 000000000..f8aa4ff55
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,48 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager, Registers& regs)
+ : memory_manager(memory_manager), regs(regs) {}
+
+void State::ProcessExec(const bool is_linear) {
+ write_offset = 0;
+ copy_size = regs.line_length_in * regs.line_count;
+ inner_buffer.resize(copy_size);
+ this->is_linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+ const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+ std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+ write_offset += sub_copy_size;
+ if (!is_last_call) {
+ return;
+ }
+ const GPUVAddr address{regs.dest.Address()};
+ if (is_linear) {
+ memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+ } else {
+ UNIMPLEMENTED_IF(regs.dest.z != 0);
+ UNIMPLEMENTED_IF(regs.dest.depth != 1);
+ UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+ UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+ const std::size_t dst_size = Tegra::Texture::CalculateSize(
+ true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+ tmp_buffer.resize(dst_size);
+ memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+ Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+ regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+ tmp_buffer.data());
+ memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+ }
+}
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 000000000..9c6e0d21c
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,75 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+ u32 line_length_in;
+ u32 line_count;
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 pitch;
+ union {
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ };
+ u32 width;
+ u32 height;
+ u32 depth;
+ u32 z;
+ u32 x;
+ u32 y;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+ }
+
+ u32 BlockWidth() const {
+ return 1U << block_width.Value();
+ }
+
+ u32 BlockHeight() const {
+ return 1U << block_height.Value();
+ }
+
+ u32 BlockDepth() const {
+ return 1U << block_depth.Value();
+ }
+ } dest;
+};
+
+class State {
+public:
+ State(MemoryManager& memory_manager, Registers& regs);
+ ~State() = default;
+
+ void ProcessExec(const bool is_linear);
+ void ProcessData(const u32 data, const bool is_last_call);
+
+private:
+ u32 write_offset = 0;
+ u32 copy_size = 0;
+ std::vector<u8> inner_buffer;
+ std::vector<u8> tmp_buffer;
+ bool is_linear = false;
+ Registers& regs;
+ MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13..45f59a4d9 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
#define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d950460..7404a8163 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ MemoryManager& memory_manager)
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+ memory_manager,
+ regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) {
+ case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+ upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ break;
+ }
+ case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ }
+ break;
+ }
case KEPLER_COMPUTE_REG_INDEX(launch):
- // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
- // kernels)
- UNREACHABLE_MSG("Compute shaders are not implemented");
+ ProcessLaunch();
break;
default:
break;
}
}
+void KeplerCompute::ProcessLaunch() {
+
+ const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+ memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+ LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+ const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+ LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf432..5250b8d9b 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@
#include <array>
#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
+namespace Core {
+class System;
+}
+
namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final {
public:
- explicit KeplerCompute(MemoryManager& memory_manager);
+ explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ MemoryManager& memory_manager);
~KeplerCompute();
static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0xAF);
+ INSERT_PADDING_WORDS(0x60);
+
+ Upload::Registers upload;
+
+ struct {
+ union {
+ BitField<0, 1, u32> linear;
+ };
+ } exec_upload;
+
+ u32 data_upload;
+
+ INSERT_PADDING_WORDS(0x3F);
+
+ struct {
+ u32 address;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+ }
+ } launch_desc_loc;
+
+ INSERT_PADDING_WORDS(0x1);
u32 launch;
- INSERT_PADDING_WORDS(0xC48);
+ INSERT_PADDING_WORDS(0x4A7);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } tsc;
+
+ INSERT_PADDING_WORDS(0x3);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } tic;
+
+ INSERT_PADDING_WORDS(0x22);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } code_loc;
+
+ INSERT_PADDING_WORDS(0x3FE);
+
+ u32 texture_const_buffer_index;
+
+ INSERT_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
+
+ struct LaunchParams {
+ static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+ INSERT_PADDING_WORDS(0x8);
+
+ u32 program_start;
+
+ INSERT_PADDING_WORDS(0x2);
+
+ BitField<30, 1, u32> linked_tsc;
+
+ BitField<0, 31, u32> grid_dim_x;
+ union {
+ BitField<0, 16, u32> grid_dim_y;
+ BitField<16, 16, u32> grid_dim_z;
+ };
+
+ INSERT_PADDING_WORDS(0x3);
+
+ BitField<0, 16, u32> shared_alloc;
+
+ BitField<0, 31, u32> block_dim_x;
+ union {
+ BitField<0, 16, u32> block_dim_y;
+ BitField<16, 16, u32> block_dim_z;
+ };
+
+ union {
+ BitField<0, 8, u32> const_buffer_enable_mask;
+ BitField<29, 2, u32> cache_layout;
+ } memory_config;
+
+ INSERT_PADDING_WORDS(0x8);
+
+ struct {
+ u32 address_low;
+ union {
+ BitField<0, 8, u32> address_high;
+ BitField<15, 17, u32> size;
+ };
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+ address_low);
+ }
+ } const_buffer_config[8];
+
+ union {
+ BitField<0, 20, u32> local_pos_alloc;
+ BitField<27, 5, u32> barrier_alloc;
+ };
+
+ union {
+ BitField<0, 20, u32> local_neg_alloc;
+ BitField<24, 5, u32> gpr_alloc;
+ };
+
+ INSERT_PADDING_WORDS(0x11);
+ } launch_description;
+
+ struct {
+ u32 write_offset = 0;
+ u32 copy_size = 0;
+ std::vector<u8> inner_buffer;
+ } state{};
+
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
+ static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+ "KeplerCompute LaunchParams has wrong size");
+
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
private:
+ Core::System& system;
+ VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
+ Upload::State upload_state;
+
+ void ProcessLaunch();
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
+ static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3..0561f676c 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
namespace Tegra::Engines {
-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+ : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): {
- ProcessExec();
+ upload_state.ProcessExec(regs.exec.linear != 0);
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
- ProcessData(method_call.argument, method_call.IsLastCall());
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ }
break;
}
}
}
-void KeplerMemory::ProcessExec() {
- state.write_offset = 0;
- state.copy_size = regs.line_length_in * regs.line_count;
- state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
- const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
- std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
- state.write_offset += sub_copy_size;
- if (is_last_call) {
- const GPUVAddr address{regs.dest.Address()};
- if (regs.exec.linear != 0) {
- memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
- } else {
- UNIMPLEMENTED_IF(regs.dest.z != 0);
- UNIMPLEMENTED_IF(regs.dest.depth != 1);
- UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
- UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
- const std::size_t dst_size = Tegra::Texture::CalculateSize(
- true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
- std::vector<u8> tmp_buffer(dst_size);
- memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
- Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
- regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
- state.inner_buffer.data(), tmp_buffer.data());
- memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
- }
- system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
- }
-}
-
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad..f3bc675a9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
class MemoryManager;
}
-namespace VideoCore {
-class RasterizerInterface;
-}
-
namespace Tegra::Engines {
+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
#define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final {
public:
- KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ KeplerMemory(Core::System& system, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
struct {
INSERT_PADDING_WORDS(0x60);
- u32 line_length_in;
- u32 line_count;
-
- struct {
- u32 address_high;
- u32 address_low;
- u32 pitch;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- };
- u32 width;
- u32 height;
- u32 depth;
- u32 z;
- u32 x;
- u32 y;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
-
- u32 BlockWidth() const {
- return 1U << block_width.Value();
- }
-
- u32 BlockHeight() const {
- return 1U << block_height.Value();
- }
-
- u32 BlockDepth() const {
- return 1U << block_depth.Value();
- }
- } dest;
+ Upload::Registers upload;
struct {
union {
@@ -96,28 +63,17 @@ public:
};
} regs{};
- struct {
- u32 write_offset = 0;
- u32 copy_size = 0;
- std::vector<u8> inner_buffer;
- } state{};
-
private:
Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
-
- void ProcessExec();
- void ProcessData(u32 data, bool is_last_call);
+ Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f2..d7b586db9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
- *this} {
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+ macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitializeRegisterDefaults();
}
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessSyncPoint();
break;
}
+ case MAXWELL3D_REG_INDEX(exec_upload): {
+ upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(data_upload): {
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ dirty_flags.OnMemoryWrite();
+ }
+ break;
+ }
default:
break;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cc2424d38..4883b582a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
#include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@@ -243,9 +250,10 @@ public:
return "10_10_10_2";
case Size::Size_11_11_10:
return "11_11_10";
+ default:
+ UNREACHABLE();
+ return {};
}
- UNREACHABLE();
- return {};
}
std::string TypeString() const {
@@ -579,7 +587,18 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x69);
+ INSERT_PADDING_WORDS(0x17);
+
+ Upload::Registers upload;
+ struct {
+ union {
+ BitField<0, 1, u32> linear;
+ };
+ } exec_upload;
+
+ u32 data_upload;
+
+ INSERT_PADDING_WORDS(0x44);
struct {
union {
@@ -1175,6 +1194,8 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter;
+ Upload::State upload_state;
+
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1218,6 +1239,9 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
- const std::size_t copy_size = regs.x_count * regs.y_count;
+ if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ ASSERT(regs.src_params.size_z == 1);
+ // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const std::size_t src_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+ regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
- auto source_ptr{memory_manager.GetPointer(source)};
- auto dst_ptr{memory_manager.GetPointer(dest)};
+ const std::size_t dst_size = regs.dst_pitch * regs.y_count;
- if (!source_ptr) {
- LOG_ERROR(HW_GPU, "source_ptr is invalid");
- return;
- }
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
- if (!dst_ptr) {
- LOG_ERROR(HW_GPU, "dst_ptr is invalid");
- return;
- }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
- // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
- // copying.
- rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- // We have to invalidate the destination region to evict any outdated surfaces from the
- // cache. We do this before actually writing the new data because the destination address
- // might contain a dirty surface that will have to be written back to memory.
- rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
- };
+ Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
+ regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+ write_buffer.data(), regs.src_params.BlockHeight(),
+ regs.src_params.pos_x, regs.src_params.pos_y);
- if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
- ASSERT(regs.src_params.size_z == 1);
- // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+ } else {
+ ASSERT(regs.dst_params.BlockDepth() == 1);
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
- FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
- copy_size * src_bytes_per_pixel);
+ const std::size_t dst_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+ regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
- regs.src_params.BlockHeight(), regs.src_params.pos_x,
- regs.src_params.pos_y);
- } else {
- ASSERT(regs.dst_params.size_z == 1);
- ASSERT(regs.src_pitch == regs.x_count);
+ const std::size_t dst_layer_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+ regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
- const u32 src_bpp = regs.src_pitch / regs.x_count;
+ const std::size_t src_size = regs.src_pitch * regs.y_count;
- FlushAndInvalidate(regs.src_pitch * regs.y_count,
- regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
+
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
- src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+ src_bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+ read_buffer.data(), regs.dst_params.BlockHeight());
+
+ memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b649842..e5942f671 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
+#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
class MaxwellDMA final {
public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+ enum class ComponentMode : u32 {
+ Src0 = 0,
+ Src1 = 1,
+ Src2 = 2,
+ Src3 = 3,
+ Const0 = 4,
+ Const1 = 5,
+ Zero = 6,
+ };
+
enum class CopyMode : u32 {
None = 0,
Unk1 = 1,
@@ -128,7 +144,26 @@ public:
u32 x_count;
u32 y_count;
- INSERT_PADDING_WORDS(0xBB);
+ INSERT_PADDING_WORDS(0xB8);
+
+ u32 const0;
+ u32 const1;
+ union {
+ BitField<0, 4, ComponentMode> component0;
+ BitField<4, 4, ComponentMode> component1;
+ BitField<8, 4, ComponentMode> component2;
+ BitField<12, 4, ComponentMode> component3;
+ BitField<16, 2, u32> component_size;
+ BitField<20, 3, u32> src_num_components;
+ BitField<24, 3, u32> dst_num_components;
+
+ u32 SrcBytePerPixel() const {
+ return src_num_components.Value() * component_size.Value();
+ }
+ u32 DstBytePerPixel() const {
+ return dst_num_components.Value() * component_size.Value();
+ }
+ } swizzle_config;
Parameters dst_params;
@@ -149,6 +184,9 @@ private:
MemoryManager& memory_manager;
+ std::vector<u8> read_buffer;
+ std::vector<u8> write_buffer;
+
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ff..52706505b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+ kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+ kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
GPU::~GPU() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9a088a503..3cc945235 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -305,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
+ default:
+ break;
}
continue;
}
@@ -920,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();
viewport.height = viewport_rect.GetHeight();
- viewport.depth_range_far = regs.viewports[i].depth_range_far;
- viewport.depth_range_near = regs.viewports[i].depth_range_near;
+ viewport.depth_range_far = src.depth_range_far;
+ viewport.depth_range_near = src.depth_range_near;
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 2a81b1169..b1c8f7c35 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (stop_loading)
return;
+ // Track if precompiled cache was altered during loading to know if we have to serialize the
+ // virtual precompiled cache file back to the hard drive
+ bool precompiled_cache_altered = false;
+
// Build shaders
if (callback)
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
@@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (!shader) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
+ precompiled_cache_altered = true;
dumps.clear();
}
}
@@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (dumps.find(usage) == dumps.end()) {
const auto& program = precompiled_programs.at(usage);
disk_cache.SaveDump(usage, program->handle);
+ precompiled_cache_altered = true;
}
}
+
+ if (precompiled_cache_altered) {
+ disk_cache.SaveVirtualPrecompiledFile();
+ }
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 53752b38d..254c0d499 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,7 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
+ : system{system}, precompiled_cache_virtual_file_offset{0} {}
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -177,6 +178,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {};
}
}
+
return {{raws, usages}};
}
@@ -208,59 +210,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+ // Read compressed file from disk and decompress to virtual precompiled cache file
+ std::vector<u8> compressed(file.GetSize());
+ file.ReadBytes(compressed.data(), compressed.size());
+ const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
+ SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
+ precompiled_cache_virtual_file_offset = 0;
+
ShaderCacheVersionHash file_hash{};
- if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
+ if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
+ precompiled_cache_virtual_file_offset = 0;
return {};
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
+ precompiled_cache_virtual_file_offset = 0;
return {};
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
- while (file.Tell() < file.GetSize()) {
+ while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{};
- if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+ if (!LoadObjectFromPrecompiled(kind)) {
return {};
}
switch (kind) {
case PrecompiledEntryKind::Decompiled: {
u64 unique_identifier{};
- if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
+ if (!LoadObjectFromPrecompiled(unique_identifier)) {
return {};
+ }
- const auto entry = LoadDecompiledEntry(file);
- if (!entry)
+ const auto entry = LoadDecompiledEntry();
+ if (!entry) {
return {};
+ }
decompiled.insert({unique_identifier, std::move(*entry)});
break;
}
case PrecompiledEntryKind::Dump: {
ShaderDiskCacheUsage usage;
- if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
+ if (!LoadObjectFromPrecompiled(usage)) {
return {};
+ }
ShaderDiskCacheDump dump;
- if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
- return {};
-
- u32 binary_length{};
- u32 compressed_size{};
- if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
+ if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return {};
}
- std::vector<u8> compressed_binary(compressed_size);
- if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
- compressed_binary.size()) {
+ u32 binary_length{};
+ if (!LoadObjectFromPrecompiled(binary_length)) {
return {};
}
- dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
- if (dump.binary.empty()) {
+ dump.binary.resize(binary_length);
+ if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
@@ -274,45 +281,41 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {{decompiled, dumps}};
}
-std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
- FileUtil::IOFile& file) {
+std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
u32 code_size{};
- u32 compressed_code_size{};
- if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
+ if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
- std::vector<u8> compressed_code(compressed_code_size);
- if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+ std::vector<u8> code(code_size);
+ if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
- const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
- if (code.empty()) {
- return {};
- }
ShaderDiskCacheDecompiled entry;
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
u32 const_buffers_count{};
- if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
+ if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
+ }
+
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
u8 is_indirect{};
- if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
+ if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
+ !LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
}
u32 samplers_count{};
- if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
+ if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
+ }
+
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
@@ -320,12 +323,9 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u8 is_array{};
u8 is_shadow{};
u8 is_bindless{};
- if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
- file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
- file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
- file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
- file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
+ if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
+ !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
+ !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
@@ -335,17 +335,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
}
u32 global_memory_count{};
- if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
+ if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
+ }
+
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
u8 is_read{};
u8 is_written{};
- if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
- file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
- file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
+ if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
+ !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
@@ -354,74 +354,81 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (auto& clip_distance : entry.entries.clip_distances) {
u8 clip_distance_raw{};
- if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
+ if (!LoadObjectFromPrecompiled(clip_distance_raw))
return {};
clip_distance = clip_distance_raw != 0;
}
u64 shader_length{};
- if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
+ if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
+ }
+
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
}
-bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
- const std::string& code,
- const std::vector<u8>& compressed_code,
+bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries) {
- if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
- file.WriteObject(unique_identifier) != 1 ||
- file.WriteObject(static_cast<u32>(code.size())) != 1 ||
- file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
- file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+ if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
+ !SaveObjectToPrecompiled(unique_identifier) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
+ !SaveArrayToPrecompiled(code.data(), code.size())) {
return false;
}
- if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
+ if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
return false;
+ }
for (const auto& cbuf : entries.const_buffers) {
- if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
- file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
- file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
+ if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
return false;
}
}
- if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
+ if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
return false;
+ }
for (const auto& sampler : entries.samplers) {
- if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
- file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
- file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
- file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
- file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
- file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
+ if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
+ !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
return false;
}
}
- if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
+ if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
return false;
+ }
for (const auto& gmem : entries.global_memory_entries) {
- if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
- file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
- file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
- file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
+ if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
- if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
+ if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
return false;
+ }
}
- return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
+ if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
+ return false;
+ }
+
+ return true;
}
-void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
+void ShaderDiskCacheOpenGL::InvalidateTransferable() {
if (!FileUtil::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
@@ -429,7 +436,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
InvalidatePrecompiled();
}
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
+void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
+ // Clear virtaul precompiled cache file
+ precompiled_cache_virtual_file.Resize(0);
+
if (!FileUtil::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
@@ -465,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
- ASSERT(usages.find(usage) == usages.end());
+ if (usages.find(usage) != usages.end()) {
+ // Skip this variant since the shader is already stored.
+ return;
+ }
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
@@ -485,22 +498,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
- const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
- reinterpret_cast<const u8*>(code.data()), code.size())};
- if (compressed_code.empty()) {
- LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
- unique_identifier);
- return;
+ if (precompiled_cache_virtual_file.GetSize() == 0) {
+ SavePrecompiledHeaderToVirtualPrecompiledCache();
}
- FileUtil::IOFile file = AppendPrecompiledFile();
- if (!file.IsOpen())
- return;
-
- if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
+ if (!SaveDecompiledFile(unique_identifier, code, entries)) {
LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing");
- file.Close();
InvalidatePrecompiled();
}
}
@@ -516,28 +520,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const std::vector<u8> compressed_binary =
- Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
-
- if (compressed_binary.empty()) {
- LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
- usage.unique_identifier);
- return;
- }
-
- FileUtil::IOFile file = AppendPrecompiledFile();
- if (!file.IsOpen())
- return;
-
- if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
- file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
- file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
- file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
- file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
- compressed_binary.size()) {
+ if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
+ !SaveObjectToPrecompiled(usage) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
+ !SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
usage.unique_identifier);
- file.Close();
InvalidatePrecompiled();
return;
}
@@ -570,28 +559,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
return file;
}
-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
- if (!EnsureDirectories())
- return {};
+void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
+ const auto hash{GetShaderCacheVersionHash()};
+ if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
+ LOG_ERROR(
+ Render_OpenGL,
+ "Failed to write precompiled cache version hash to virtual precompiled cache file");
+ }
+}
+
+void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
+ precompiled_cache_virtual_file_offset = 0;
+ const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
+ const std::vector<u8>& compressed =
+ Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
- const bool existed = FileUtil::Exists(precompiled_path);
+ FileUtil::IOFile file(precompiled_path, "wb");
- FileUtil::IOFile file(precompiled_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
- return {};
+ return;
}
-
- if (!existed || file.GetSize() == 0) {
- const auto hash{GetShaderCacheVersionHash()};
- if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
- LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
- precompiled_path);
- return {};
- }
+ if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
+ LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
+ precompiled_path);
+ return;
}
- return file;
}
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 6be0c0547..0142b2e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -16,6 +16,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -172,10 +173,10 @@ public:
LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
- void InvalidateTransferable() const;
+ void InvalidateTransferable();
- /// Removes the precompiled cache file.
- void InvalidatePrecompiled() const;
+ /// Removes the precompiled cache file and clears virtual precompiled cache file.
+ void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
@@ -190,18 +191,21 @@ public:
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+ /// Serializes virtual precompiled shader cache file to real file
+ void SaveVirtualPrecompiledFile();
+
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
LoadPrecompiledFile(FileUtil::IOFile& file);
- /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
- std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
+ /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
+ /// failure.
+ std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
/// Saves a decompiled entry to the passed file. Returns true on success.
- bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
- const std::vector<u8>& compressed_code,
+ bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries);
/// Returns if the cache can be used
@@ -210,8 +214,8 @@ private:
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
- /// Opens current game's precompiled file and write it's header if it doesn't exist
- FileUtil::IOFile AppendPrecompiledFile() const;
+ /// Save precompiled header to precompiled_cache_in_memory
+ void SavePrecompiledHeaderToVirtualPrecompiledCache();
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
@@ -234,10 +238,42 @@ private:
/// Get current game's title id
std::string GetTitleID() const;
+ template <typename T>
+ bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
+ const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
+ data, length, precompiled_cache_virtual_file_offset);
+ precompiled_cache_virtual_file_offset += write_length;
+ return write_length == sizeof(T) * length;
+ }
+
+ template <typename T>
+ bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
+ const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
+ data, length, precompiled_cache_virtual_file_offset);
+ precompiled_cache_virtual_file_offset += read_length;
+ return read_length == sizeof(T) * length;
+ }
+
+ template <typename T>
+ bool SaveObjectToPrecompiled(const T& object) {
+ return SaveArrayToPrecompiled(&object, 1);
+ }
+
+ template <typename T>
+ bool LoadObjectFromPrecompiled(T& object) {
+ return LoadArrayFromPrecompiled(&object, 1);
+ }
+
// Copre system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+ // Stores whole precompiled cache which will be read from or saved to the precompiled chache
+ // file
+ FileSys::VectorVfsFile precompiled_cache_virtual_file;
+ // Stores the current offset of the precompiled cache file for IO purposes
+ std::size_t precompiled_cache_virtual_file_offset;
+
// The cache has been loaded at boot
bool tried_to_load{};
};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a8833c06e..95b773135 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedInt:
- case Maxwell::VertexAttribute::Type::UnsignedNorm: {
-
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ UNREACHABLE();
+ return {};
}
-
- LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- UNREACHABLE();
- return {};
- }
-
case Maxwell::VertexAttribute::Type::SignedInt:
- case Maxwell::VertexAttribute::Type::SignedNorm: {
-
+ case Maxwell::VertexAttribute::Type::SignedNorm:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ UNREACHABLE();
+ return {};
}
-
- LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- UNREACHABLE();
- return {};
- }
-
- case Maxwell::VertexAttribute::Type::Float: {
+ case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ UNREACHABLE();
+ return {};
}
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+ UNREACHABLE();
+ return {};
}
- }
-
- LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
- UNREACHABLE();
- return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -129,10 +126,11 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
+ UNREACHABLE();
+ return {};
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
- UNREACHABLE();
- return {};
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
@@ -186,9 +184,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
+ default:
+ LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ return GL_REPEAT;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
- return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 34bf26ff2..9fe1e3280 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return vk::SamplerAddressMode::eMirrorClampToEdge;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return vk::PrimitiveTopology::eTriangleList;
case Maxwell::PrimitiveTopology::TriangleStrip:
return vk::PrimitiveTopology::eTriangleStrip;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index e4c438792..2da595c0d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
// Continue scanning for an exit method.
break;
}
+ default:
+ break;
}
}
return exit_method = ExitMethod::AlwaysReturn;
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 9467f9417..2098c1170 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -9,6 +9,7 @@
namespace VideoCommon::Shader {
+using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
}
}
- UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
- Node op_b = [&]() {
+ auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
- return GetRegister(instr.gpr20);
+ return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
default:
UNREACHABLE();
- return Immediate(0);
+ return {HalfType::F32, Immediate(0)};
}
}();
- op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
- op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+ op_b = UnpackHalfFloat(op_b, type_b);
+ // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
+ Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
- return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
+ return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R:
- return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
+ return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0);
}
}();
+ value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value);
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index ba15b1115..b5ec9a6f5 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -120,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+ static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+ return Immediate(0);
}
- UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
- static_cast<u32>(instr.conversion.f2f.rounding.Value()));
- return Immediate(0);
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5c1becce5..a425f9eb7 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, instr.hfma2.type_b,
+ return {instr.hfma2.saturate, HalfType::F32,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
- instr.hfma2.type_b,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+ HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return {false, identity, Immediate(0), identity, Immediate(0)};
}
}();
- UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
+ value = GetSaturatedHalfFloat(value, saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value);
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index fa65ac9a9..8b574d4e5 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -296,7 +296,7 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue();
const auto cbuf_index = cbuf->GetIndex();
- const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset;
+ const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
// If this sampler has already been used, return the existing mapping.
const auto itr =
@@ -541,7 +541,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
- const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index db15c0718..04a776398 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
instr.xmad.mode,
Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
+ default:
+ UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+ return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}
- UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 17f2f711c..e4eb0dfd9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -439,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
return OperationCode::LogicalUGreaterEqual;
case OperationCode::INegate:
UNREACHABLE_MSG("Can't negate an unsigned integer");
+ return {};
case OperationCode::IAbsolute:
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
+ return {};
+ default:
+ UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+ return {};
}
- UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
- return {};
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 81278fb33..65f1e1de9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -251,8 +251,9 @@ public:
}
bool operator<(const Sampler& rhs) const {
- return std::tie(offset, index, type, is_array, is_shadow) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
+ return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
+ std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
+ rhs.is_bindless);
}
private:
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 3b022a456..6384fa8d2 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::ABGR8S;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::ABGR8UI;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::B5G6R5:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::B5G6R5U;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::A2B10G10R10:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::A2B10G10R10U;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::A1B5G5R5:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::A1B5G5R5U;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R8:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::R8U;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::R8UI;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::G8R8:
// TextureFormat::G8R8 is actually ordered red then green, as such we can use
// PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
@@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::RG8U;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::RG8S;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::RGBA16U;
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGBA16F;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::R11FG11FB10F;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGBA32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::RGBA32UI;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R32_G32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RG32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::RG32UI;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R32_G32_B32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGB32F;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::R16UI;
case Tegra::Texture::ComponentType::SINT:
return PixelFormat::R16I;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::R32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::R32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::R32UI;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::ZF32:
return PixelFormat::Z32F;
case Tegra::Texture::TextureFormat::Z16:
@@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::DXN2UNORM;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::DXN2SNORM;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
case Tegra::Texture::TextureFormat::BC7U:
return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
case Tegra::Texture::TextureFormat::BC6H_UF16:
@@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::RG16UI;
case Tegra::Texture::ComponentType::SINT:
return PixelFormat::RG16I;
+ default:
+ break;
}
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
- UNREACHABLE();
+ break;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
- static_cast<u32>(component_type));
- UNREACHABLE();
- return PixelFormat::ABGR8U;
+ break;
}
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
+ static_cast<u32>(component_type));
+ UNREACHABLE();
+ return PixelFormat::ABGR8U;
}
ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
@@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) {
case PixelFormat::DXT45_SRGB:
case PixelFormat::BC7U_SRGB:
return true;
+ default:
+ return false;
}
- return false;
}
} // namespace VideoCore::Surface
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b508d64e9..eafb6b73a 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1616,6 +1616,7 @@ namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
+ std::size_t depth_offset = 0;
std::vector<uint8_t> outData(height * width * depth * 4);
for (uint32_t k = 0; k < depth; k++) {
for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1631,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
uint32_t decompWidth = std::min(block_width, width - i);
uint32_t decompHeight = std::min(block_height, height - j);
- uint8_t* outRow = outData.data() + (j * width + i) * 4;
+ uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
for (uint32_t jj = 0; jj < decompHeight; jj++) {
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
}
@@ -1638,6 +1639,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
blockIdx++;
}
}
+ depth_offset += height * width * 4;
}
return outData;
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index c8b0a5ec0..5477f050c 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -58,7 +58,7 @@ void CompatDB::Submit() {
button(NextButton)->setEnabled(false);
button(NextButton)->setText(tr("Submitting"));
- button(QWizard::CancelButton)->setVisible(false);
+ button(CancelButton)->setVisible(false);
testcase_watcher.setFuture(QtConcurrent::run(
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
tr("An error occured while sending the Testcase"));
button(NextButton)->setEnabled(true);
button(NextButton)->setText(tr("Next"));
- button(QWizard::CancelButton)->setVisible(true);
+ button(CancelButton)->setVisible(true);
} else {
next();
// older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
// workaround
- button(QWizard::CancelButton)->setVisible(false);
+ button(CancelButton)->setVisible(false);
}
}
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index a5218b051..32c05b797 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
ui->hotkeysTab->Populate(registry);
this->setConfiguration();
this->PopulateSelectionList();
+
+ setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
+
connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
&ConfigureDialog::UpdateVisibleTabs);
+
adjustSize();
ui->selectorList->setCurrentRow(0);
diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h
index 4f526dc7e..248fadaf3 100644
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -67,8 +67,6 @@ public:
private:
struct Hotkey {
- Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
-
QKeySequence keyseq;
QShortcut* shortcut = nullptr;
Qt::ShortcutContext context = Qt::WindowShortcut;