summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.appveyor/UtilityFunctions.ps139
-rw-r--r--appveyor.yml178
-rw-r--r--src/common/threadsafe_queue.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp150
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h198
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp24
-rw-r--r--src/video_core/shader/decode/conversion.cpp15
-rw-r--r--src/video_core/shader/decode/memory.cpp38
-rw-r--r--src/video_core/shader/decode/texture.cpp13
11 files changed, 377 insertions, 292 deletions
diff --git a/.appveyor/UtilityFunctions.ps1 b/.appveyor/UtilityFunctions.ps1
deleted file mode 100644
index fd7476314..000000000
--- a/.appveyor/UtilityFunctions.ps1
+++ /dev/null
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
- if ($_ -match "=") {
- $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
- }
-}
-popd
-
-function Which ($search_path, $name) {
- ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
- ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
- Which $search_path $_.Value
- }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
- $final_deps = @()
- $deps_to_process = GetDeps $search_path $binary
- while ($deps_to_process.Count -gt 0) {
- $current, $deps_to_process = $deps_to_process
- if ($final_deps -contains $current) { continue }
-
- # Is this a system dll file?
- # We use the same algorithm that cmake uses to determine this.
- if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
- if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
- if ($current -match "\\msvc[^\\]+dll") { continue }
- if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
- $final_deps += $current
- $new_deps = GetDeps $search_path $current
- $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
- }
- return $final_deps
-}
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index cef19c259..000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
- - C:\ProgramData\chocolatey\bin -> appveyor.yml
- - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
- # Tell msys2 to add mingw64 to the path
- MSYSTEM: MINGW64
- # Tell msys2 to inherit the current directory when starting the shell
- CHERE_INVOKING: 1
- matrix:
- - BUILD_TYPE: msvc
- - BUILD_TYPE: mingw
-
-platform:
- - x64
-
-configuration:
- - Release
-
-install:
- - git submodule update --init --recursive
- - ps: |
- if ($env:BUILD_TYPE -eq 'mingw') {
- $dependencies = "mingw64/mingw-w64-x86_64-cmake",
- "mingw64/mingw-w64-x86_64-qt5",
- "mingw64/mingw-w64-x86_64-SDL2"
- # redirect err to null to prevent warnings from becoming errors
- # workaround to prevent pacman from failing due to cyclical dependencies
- C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
- C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
- }
-
-before_build:
- - mkdir %BUILD_TYPE%_build
- - cd %BUILD_TYPE%_build
- - ps: |
- $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
- if ($env:BUILD_TYPE -eq 'msvc') {
- # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
- cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
- } else {
- C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
- }
- - cd ..
-
-build_script:
- - ps: |
- if ($env:BUILD_TYPE -eq 'msvc') {
- # https://www.appveyor.com/docs/build-phase
- msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
- } else {
- C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
- }
-
-after_build:
- - ps: |
- $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
- $GITREV = $(git show -s --format='%h')
-
- # Find out which kind of release we are producing by tag name
- if ($env:APPVEYOR_REPO_TAG_NAME) {
- $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
- } else {
- # There is no repo tag - make assumptions
- $RELEASE_DIST = "head"
- }
-
- if ($env:BUILD_TYPE -eq 'msvc') {
- # Where are these spaces coming from? Regardless, let's remove them
- $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
- $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
- $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
- # set the build names as env vars so the artifacts can upload them
- $env:BUILD_ZIP = $MSVC_BUILD_ZIP
- $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
- $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
- $BUILD_DIR = ".\msvc_build\bin\Release"
-
- # Make a debug symbol upload
- mkdir pdb
- Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
- 7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
- rm "$BUILD_DIR\*.pdb"
-
- mkdir $RELEASE_DIST
- # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
- Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
- rm "$RELEASE_DIST\*.exe"
- Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
- Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
- Copy-Item .\license.txt -Destination $RELEASE_DIST
- Copy-Item .\README.md -Destination $RELEASE_DIST
- 7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
- 7z a $MSVC_SEVENZIP $RELEASE_DIST
- } else {
- $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
- $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
- # not going to bother adding separate debug symbols for mingw, so just upload a README for it
- # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
- $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
- Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
- # store the build information in env vars so we can use them as artifacts
- $env:BUILD_ZIP = $MINGW_BUILD_ZIP
- $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
- $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
- $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
- $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
- $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
- mkdir $RELEASE_DIST
- mkdir $RELEASE_DIST/platforms
- mkdir $RELEASE_DIST/styles
- mkdir $RELEASE_DIST/imageformats
-
- # copy the compiled binaries and other release files to the release folder
- Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
- Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
- Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
- # copy the qt windows plugin dll to platforms
- Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
- # copy the qt windows vista style dll to platforms
- Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
- # copy the qt jpeg imageformat dll to platforms
- Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
- # copy all the dll dependencies to the release folder
- . "./.appveyor/UtilityFunctions.ps1"
- $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
- $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
- $MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu_cmd.exe"
- $MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\imageformats\qjpeg.dll"
- Write-Host "Detected the following dependencies:"
- Write-Host $MingwDLLs
- foreach ($file in $MingwDLLs) {
- Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
- }
-
- 7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
- 7z a $MINGW_SEVENZIP $RELEASE_DIST
- }
-
-test_script:
- - cd %BUILD_TYPE%_build
- - ps: |
- if ($env:BUILD_TYPE -eq 'msvc') {
- ctest -VV -C Release
- } else {
- C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
- }
- - cd ..
-
-artifacts:
- - path: $(BUILD_ZIP)
- name: build
- type: zip
-
-deploy:
- provider: GitHub
- release: $(appveyor_repo_tag_name)
- auth_token:
- secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
- artifact: update,build
- draft: false
- prerelease: false
- on:
- appveyor_repo_tag: true
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index e714ba5b3..8268bbd5c 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,9 +46,16 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
- cv.notify_one();
- ++size;
+ const size_t previous_size{size++};
+
+ // Acquire the mutex and then immediately release it as a fence.
+ // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
+ // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
+ if (previous_size == 0) {
+ std::lock_guard lock{cv_mutex};
+ }
+ cv.notify_one();
}
void Pop() {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d6a2cc8b8..dfb12cd2d 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1973,7 +1973,7 @@ private:
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
- INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"),
+ INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d1ae4be6d..0389c2143 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -399,6 +399,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
+ DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 0f8116458..d66133ad1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@
namespace Vulkan {
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) {
+ auto command = first;
+ while (command != nullptr) {
+ auto next = command->GetNext();
+ command->Execute(cmdbuf, dld);
+ command->~Command();
+ command = next;
+ }
+
+ command_offset = 0;
+ first = nullptr;
+ last = nullptr;
+}
+
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
- : device{device}, resource_manager{resource_manager} {
- next_fence = &resource_manager.CommitFence();
+ : device{device}, resource_manager{resource_manager}, next_fence{
+ &resource_manager.CommitFence()} {
+ AcquireNewChunk();
AllocateNewContext();
+ worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
-VKScheduler::~VKScheduler() = default;
+VKScheduler::~VKScheduler() {
+ quit = true;
+ cv.notify_all();
+ worker_thread.join();
+}
void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
- if (release_fence)
+ if (release_fence) {
current_fence->Release();
+ }
AllocateNewContext();
}
void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
- if (release_fence)
+ if (release_fence) {
current_fence->Release();
+ }
AllocateNewContext();
}
+void VKScheduler::WaitWorker() {
+ MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+ DispatchWork();
+
+ bool finished = false;
+ do {
+ cv.notify_all();
+ std::unique_lock lock{mutex};
+ finished = chunk_queue.Empty();
+ } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+ if (chunk->Empty()) {
+ return;
+ }
+ chunk_queue.Push(std::move(chunk));
+ cv.notify_all();
+ AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+ if (state.renderpass && renderpass_bi == *state.renderpass) {
+ return;
+ }
+ const bool end_renderpass = state.renderpass.has_value();
+ state.renderpass = renderpass_bi;
+ Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+ if (end_renderpass) {
+ cmdbuf.endRenderPass(dld);
+ }
+ cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+ });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+ EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+ if (state.graphics_pipeline == pipeline) {
+ return;
+ }
+ state.graphics_pipeline = pipeline;
+ Record([pipeline](auto cmdbuf, auto& dld) {
+ cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+ });
+}
+
+void VKScheduler::WorkerThread() {
+ std::unique_lock lock{mutex};
+ do {
+ cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+ if (quit) {
+ continue;
+ }
+ auto extracted_chunk = std::move(chunk_queue.Front());
+ chunk_queue.Pop();
+ extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+ chunk_reserve.Push(std::move(extracted_chunk));
+ } while (!quit);
+}
+
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+ EndPendingOperations();
+ InvalidateState();
+ WaitWorker();
+
+ std::unique_lock lock{mutex};
+
+ const auto queue = device.GetGraphicsQueue();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld);
- const auto queue = device.GetGraphicsQueue();
- const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+ const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
&semaphore);
- queue.submit({submit_info}, *current_fence, dld);
+ queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
}
void VKScheduler::AllocateNewContext() {
+ std::unique_lock lock{mutex};
current_fence = next_fence;
- current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence();
- const auto& dld = device.GetDispatchLoader();
- current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+ current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+ current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+ device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+ state.graphics_pipeline = nullptr;
+ state.viewports = false;
+ state.scissors = false;
+ state.depth_bias = false;
+ state.blend_constants = false;
+ state.depth_bounds = false;
+ state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+ EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+ if (!state.renderpass) {
+ return;
+ }
+ state.renderpass = std::nullopt;
+ Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+ if (chunk_reserve.Empty()) {
+ chunk = std::make_unique<CommandChunk>();
+ return;
+ }
+ chunk = std::move(chunk_reserve.Front());
+ chunk_reserve.Pop();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 0e5b49c7f..bcdffbba0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@
#pragma once
+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
@@ -30,56 +37,197 @@ private:
VKFence* const& fence;
};
-class VKCommandBufferView {
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
public:
- VKCommandBufferView() = default;
- VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
+ explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+ ~VKScheduler();
+
+ /// Sends the current execution context to the GPU.
+ void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+ /// Sends the current execution context to the GPU and waits for it to complete.
+ void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+ /// Waits for the worker thread to finish executing everything. After this function returns it's
+ /// safe to touch worker resources.
+ void WaitWorker();
+
+ /// Sends currently recorded work to the worker thread.
+ void DispatchWork();
+
+ /// Requests to begin a renderpass.
+ void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+ /// Requests the current executino context to be able to execute operations only allowed outside
+ /// of a renderpass.
+ void RequestOutsideRenderPassOperationContext();
+
+ /// Binds a pipeline to the current execution context.
+ void BindGraphicsPipeline(vk::Pipeline pipeline);
- const vk::CommandBuffer* operator->() const noexcept {
- return &cmdbuf;
+ /// Returns true when viewports have been set in the current command buffer.
+ bool TouchViewports() {
+ return std::exchange(state.viewports, true);
}
- operator vk::CommandBuffer() const noexcept {
- return cmdbuf;
+ /// Returns true when scissors have been set in the current command buffer.
+ bool TouchScissors() {
+ return std::exchange(state.scissors, true);
}
-private:
- const vk::CommandBuffer& cmdbuf;
-};
+ /// Returns true when depth bias have been set in the current command buffer.
+ bool TouchDepthBias() {
+ return std::exchange(state.depth_bias, true);
+ }
-/// The scheduler abstracts command buffer and fence management with an interface that's able to do
-/// OpenGL-like operations on Vulkan command buffers.
-class VKScheduler {
-public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
- ~VKScheduler();
+ /// Returns true when blend constants have been set in the current command buffer.
+ bool TouchBlendConstants() {
+ return std::exchange(state.blend_constants, true);
+ }
+
+ /// Returns true when depth bounds have been set in the current command buffer.
+ bool TouchDepthBounds() {
+ return std::exchange(state.depth_bounds, true);
+ }
+
+ /// Returns true when stencil values have been set in the current command buffer.
+ bool TouchStencilValues() {
+ return std::exchange(state.stencil_values, true);
+ }
+
+ /// Send work to a separate thread.
+ template <typename T>
+ void Record(T&& command) {
+ if (chunk->Record(command)) {
+ return;
+ }
+ DispatchWork();
+ (void)chunk->Record(command);
+ }
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
- /// Gets a reference to the current command buffer.
- VKCommandBufferView GetCommandBuffer() const {
- return current_cmdbuf;
- }
+private:
+ class Command {
+ public:
+ virtual ~Command() = default;
- /// Sends the current execution context to the GPU.
- void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+ virtual void Execute(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) const = 0;
- /// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+ Command* GetNext() const {
+ return next;
+ }
+
+ void SetNext(Command* next_) {
+ next = next_;
+ }
+
+ private:
+ Command* next = nullptr;
+ };
+
+ template <typename T>
+ class TypedCommand final : public Command {
+ public:
+ explicit TypedCommand(T&& command) : command{std::move(command)} {}
+ ~TypedCommand() override = default;
+
+ TypedCommand(TypedCommand&&) = delete;
+ TypedCommand& operator=(TypedCommand&&) = delete;
+
+ void Execute(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) const override {
+ command(cmdbuf, dld);
+ }
+
+ private:
+ T command;
+ };
+
+ class CommandChunk final {
+ public:
+ void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+ template <typename T>
+ bool Record(T& command) {
+ using FuncType = TypedCommand<T>;
+ static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+ if (command_offset > sizeof(data) - sizeof(FuncType)) {
+ return false;
+ }
+
+ Command* current_last = last;
+
+ last = new (data.data() + command_offset) FuncType(std::move(command));
+
+ if (current_last) {
+ current_last->SetNext(last);
+ } else {
+ first = last;
+ }
+
+ command_offset += sizeof(FuncType);
+ return true;
+ }
+
+ bool Empty() const {
+ return command_offset == 0;
+ }
+
+ private:
+ Command* first = nullptr;
+ Command* last = nullptr;
+
+ std::size_t command_offset = 0;
+ std::array<u8, 0x8000> data{};
+ };
+
+ void WorkerThread();
-private:
void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext();
+ void InvalidateState();
+
+ void EndPendingOperations();
+
+ void EndRenderPass();
+
+ void AcquireNewChunk();
+
const VKDevice& device;
VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
+
+ struct State {
+ std::optional<vk::RenderPassBeginInfo> renderpass;
+ vk::Pipeline graphics_pipeline;
+ bool viewports = false;
+ bool scissors = false;
+ bool depth_bias = false;
+ bool blend_constants = false;
+ bool depth_bounds = false;
+ bool stencil_values = false;
+ } state;
+
+ std::unique_ptr<CommandChunk> chunk;
+ std::thread worker_thread;
+
+ Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+ Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex mutex;
+ std::condition_variable cv;
+ bool quit = false;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 6227bc70b..fae8c95c8 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -2552,29 +2552,7 @@ public:
}
Id operator()(const ExprCondCode& expr) {
- const Node cc = decomp.ir.GetConditionCode(expr.cc);
- Id target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- target = decomp.v_false;
- break;
- case Tegra::Shader::Pred::UnusedIndex:
- target = decomp.v_true;
- break;
- default:
- target = decomp.predicates.at(index);
- break;
- }
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
- target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
- } else {
- UNREACHABLE();
- }
-
- return decomp.OpLoad(decomp.t_bool, target);
+ return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
}
Id operator()(const ExprVar& expr) {
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 32facd6ba..0eeb75559 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
- Node value = [&]() {
+ Node value = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::I2F_R:
return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
+
const bool input_signed = instr.conversion.is_input_signed;
+
+ if (instr.conversion.src_size == Register::Size::Byte) {
+ const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+ if (offset > 0) {
+ value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+ std::move(value), Immediate(offset));
+ }
+ } else {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+ }
+
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 78e92f52e..c934d0719 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -22,7 +22,23 @@ using Tegra::Shader::Register;
namespace {
-u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+ switch (uniform_type) {
+ case Tegra::Shader::UniformType::UnsignedByte:
+ case Tegra::Shader::UniformType::Single:
+ return 1;
+ case Tegra::Shader::UniformType::Double:
+ return 2;
+ case Tegra::Shader::UniformType::Quad:
+ case Tegra::Shader::UniformType::UnsignedQuad:
+ return 4;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+ return 1;
+ }
+}
+
+u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::Single:
return 1;
@@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const auto [real_address_base, base_address, descriptor] =
TrackGlobalMemory(bb, instr, false);
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetLdgMemorySize(type);
if (!real_address_base || !base_address) {
// Tracking failed, load zeroes.
for (u32 i = 0; i < count; ++i) {
@@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
- const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+ const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+ Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+ if (type == Tegra::Shader::UniformType::UnsignedByte) {
+ // To handle unaligned loads get the byte used to dereferenced global memory
+ // and extract that byte from the loaded uint32.
+ Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
+ byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+
+ gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
+ Immediate(8));
+ }
SetTemporary(bb, i, gmem);
}
+
for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
@@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
- const u32 count = GetUniformTypeElementsCount(type);
+ const u32 count = GetStgMemorySize(type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 994c05611..dff01a541 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- // Fill empty entries from the guest sampler.
+ // Fill empty entries from the guest sampler
const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
if (type_coord_count != entry_coord_count) {
LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
- }
- for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
- coords.push_back(GetRegister(Register::ZeroIndex));
+
+ // When the size is higher we insert zeroes
+ for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+ coords.push_back(GetRegister(Register::ZeroIndex));
+ }
+
+ // Then we ensure the size matches the number of entries (dropping unused values)
+ coords.resize(entry_coord_count);
}
Node4 values;