summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/async_shaders.cpp100
-rw-r--r--src/video_core/shader/async_shaders.h61
-rw-r--r--src/video_core/shader/control_flow.cpp30
-rw-r--r--src/video_core/shader/decode/memory.cpp9
-rw-r--r--src/video_core/shader/memory_util.cpp7
-rw-r--r--src/video_core/shader/memory_util.h6
6 files changed, 139 insertions, 74 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index b7f66d7ee..aabd62c5c 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
@@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
KillWorkers();
}
-void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
- // If we're already have workers queued or don't want to queue workers, ignore
- if (num_workers == worker_threads.size() || num_workers == 0) {
+void AsyncShaders::AllocateWorkers() {
+ // Max worker threads we should allow
+ constexpr u32 MAX_THREADS = 4;
+ // Deduce how many threads we can use
+ const u32 threads_used = std::thread::hardware_concurrency() / 4;
+ // Always allow at least 1 thread regardless of our settings
+ const auto max_worker_count = std::max(1U, threads_used);
+ // Don't use more than MAX_THREADS
+ const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+ // If we already have workers queued, ignore
+ if (num_workers == worker_threads.size()) {
return;
}
@@ -34,8 +42,8 @@ void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
// Create workers
for (std::size_t i = 0; i < num_workers; i++) {
context_list.push_back(emu_window.CreateSharedContext());
- worker_threads.push_back(std::move(
- std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
+ worker_threads.push_back(
+ std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
}
}
@@ -65,11 +73,11 @@ void AsyncShaders::KillWorkers() {
worker_threads.clear();
}
-bool AsyncShaders::HasWorkQueued() {
+bool AsyncShaders::HasWorkQueued() const {
return !pending_queue.empty();
}
-bool AsyncShaders::HasCompletedWork() {
+bool AsyncShaders::HasCompletedWork() const {
std::shared_lock lock{completed_mutex};
return !finished_work.empty();
}
@@ -94,7 +102,7 @@ bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
}
std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
- std::vector<AsyncShaders::Result> results;
+ std::vector<Result> results;
{
std::unique_lock lock{completed_mutex};
results.assign(std::make_move_iterator(finished_work.begin()),
@@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
VideoCommon::Shader::CompilerSettings compiler_settings,
const VideoCommon::Shader::Registry& registry,
VAddr cpu_addr) {
- WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
- : AsyncShaders::Backend::OpenGL,
- device,
- shader_type,
- uid,
- std::move(code),
- std::move(code_b),
- main_offset,
- compiler_settings,
- registry,
- cpu_addr};
+ WorkerParams params{
+ .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+ .device = &device,
+ .shader_type = shader_type,
+ .uid = uid,
+ .code = std::move(code),
+ .code_b = std::move(code_b),
+ .main_offset = main_offset,
+ .compiler_settings = compiler_settings,
+ .registry = registry,
+ .cpu_address = cpu_addr,
+ };
std::unique_lock lock(queue_mutex);
- pending_queue.push_back(std::move(params));
+ pending_queue.push(std::move(params));
+ cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+ const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program,
+ Vulkan::GraphicsPipelineCacheKey key) {
+ WorkerParams params{
+ .backend = Backend::Vulkan,
+ .pp_cache = pp_cache,
+ .vk_device = &device,
+ .scheduler = &scheduler,
+ .descriptor_pool = &descriptor_pool,
+ .update_descriptor_queue = &update_descriptor_queue,
+ .renderpass_cache = &renderpass_cache,
+ .bindings = bindings,
+ .program = program,
+ .key = key,
+ };
+
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push(std::move(params));
cv.notify_one();
}
void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
- using namespace std::chrono_literals;
while (!is_thread_exiting.load(std::memory_order_relaxed)) {
std::unique_lock lock{queue_mutex};
cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
@@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
if (pending_queue.empty()) {
continue;
}
+
// Pull work from queue
WorkerParams work = std::move(pending_queue.front());
- pending_queue.pop_front();
-
+ pending_queue.pop();
lock.unlock();
- if (work.backend == AsyncShaders::Backend::OpenGL ||
- work.backend == AsyncShaders::Backend::GLASM) {
- const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+ if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+ const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
const auto scope = context->Acquire();
auto program =
- OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+ OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
Result result{};
result.backend = work.backend;
result.cpu_address = work.cpu_address;
@@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
result.code_b = std::move(work.code_b);
result.shader_type = work.shader_type;
- if (work.backend == AsyncShaders::Backend::OpenGL) {
+ if (work.backend == Backend::OpenGL) {
result.program.opengl = std::move(program->source_program);
- } else if (work.backend == AsyncShaders::Backend::GLASM) {
+ } else if (work.backend == Backend::GLASM) {
result.program.glasm = std::move(program->assembly_program);
}
@@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
std::unique_lock complete_lock(completed_mutex);
finished_work.push_back(std::move(result));
}
+ } else if (work.backend == Backend::Vulkan) {
+ auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+ *work.vk_device, *work.scheduler, *work.descriptor_pool,
+ *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+ work.program);
+
+ work.pp_cache->EmplacePipeline(std::move(pipeline));
}
}
}
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 2f5ee94ad..7cf8d994c 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -5,15 +5,17 @@
#pragma once
#include <condition_variable>
-#include <deque>
#include <memory>
#include <shared_mutex>
#include <thread>
-#include "common/bit_field.h"
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Core::Frontend {
class EmuWindow;
@@ -24,6 +26,10 @@ namespace Tegra {
class GPU;
}
+namespace Vulkan {
+class VKPipelineCache;
+}
+
namespace VideoCommon::Shader {
class AsyncShaders {
@@ -31,6 +37,7 @@ public:
enum class Backend {
OpenGL,
GLASM,
+ Vulkan,
};
struct ResultPrograms {
@@ -52,7 +59,7 @@ public:
~AsyncShaders();
/// Start up shader worker threads
- void AllocateWorkers(std::size_t num_workers);
+ void AllocateWorkers();
/// Clear the shader queue and kill all worker threads
void FreeWorkers();
@@ -61,48 +68,68 @@ public:
void KillWorkers();
/// Check to see if any shaders have actually been compiled
- bool HasCompletedWork();
+ [[nodiscard]] bool HasCompletedWork() const;
/// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
/// every shader async as some shaders are only built and executed once. We try to "guess" which
/// shader would be used only once
- bool IsShaderAsync(const Tegra::GPU& gpu) const;
+ [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
/// Pulls completed compiled shaders
- std::vector<Result> GetCompletedWork();
+ [[nodiscard]] std::vector<Result> GetCompletedWork();
void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
- VideoCommon::Shader::CompilerSettings compiler_settings,
- const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
+ CompilerSettings compiler_settings, const Registry& registry,
+ VAddr cpu_addr);
+
+ void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+ Vulkan::VKScheduler& scheduler,
+ Vulkan::VKDescriptorPool& descriptor_pool,
+ Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+ Vulkan::VKRenderPassCache& renderpass_cache,
+ std::vector<VkDescriptorSetLayoutBinding> bindings,
+ Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
private:
void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
/// Check our worker queue to see if we have any work queued already
- bool HasWorkQueued();
+ [[nodiscard]] bool HasWorkQueued() const;
struct WorkerParams {
- AsyncShaders::Backend backend;
- OpenGL::Device device;
+ Backend backend;
+ // For OGL
+ const OpenGL::Device* device;
Tegra::Engines::ShaderType shader_type;
u64 uid;
std::vector<u64> code;
std::vector<u64> code_b;
u32 main_offset;
- VideoCommon::Shader::CompilerSettings compiler_settings;
- VideoCommon::Shader::Registry registry;
+ CompilerSettings compiler_settings;
+ std::optional<Registry> registry;
VAddr cpu_address;
+
+ // For Vulkan
+ Vulkan::VKPipelineCache* pp_cache;
+ const Vulkan::VKDevice* vk_device;
+ Vulkan::VKScheduler* scheduler;
+ Vulkan::VKDescriptorPool* descriptor_pool;
+ Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+ Vulkan::VKRenderPassCache* renderpass_cache;
+ std::vector<VkDescriptorSetLayoutBinding> bindings;
+ Vulkan::SPIRVProgram program;
+ Vulkan::GraphicsPipelineCacheKey key;
};
std::condition_variable cv;
- std::mutex queue_mutex;
- std::shared_mutex completed_mutex;
+ mutable std::mutex queue_mutex;
+ mutable std::shared_mutex completed_mutex;
std::atomic<bool> is_thread_exiting{};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
std::vector<std::thread> worker_threads;
- std::deque<WorkerParams> pending_queue;
- std::vector<AsyncShaders::Result> finished_work;
+ std::queue<WorkerParams> pending_queue;
+ std::vector<Result> finished_work;
Core::Frontend::EmuWindow& emu_window;
};
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 8d86020f6..336397cdb 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
u64 ldc_tracked_register) {
- return TrackInstruction<u64>(state, pos,
- [ldc_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::SHL_IMM &&
- instr.gpr0.Value() == ldc_tracked_register;
- },
- [](auto instr, const auto&) { return instr.gpr8.Value(); });
+ return TrackInstruction<u64>(
+ state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
}
std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
u64 shl_tracked_register) {
- return TrackInstruction<u32>(state, pos,
- [shl_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
- instr.gpr0.Value() == shl_tracked_register;
- },
- [](auto instr, const auto&) {
- return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
- });
+ return TrackInstruction<u32>(
+ state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
}
std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63adbc4a3..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+ UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+ static_cast<int>(instr.red.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
const auto [base_address, index, offset] =
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
- { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
- "Global memory tracking failed");
+ ASSERT_OR_EXECUTE_MSG(
+ base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+ "Global memory tracking failed");
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
namespace VideoCommon::Shader {
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& gpu{system.GPU().Maxwell3D()};
- const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
- return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+ const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+ return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
}
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
-namespace Core {
-class System;
-}
-
namespace Tegra {
class MemoryManager;
}
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction