summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp7
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp3
-rw-r--r--src/video_core/engines/kepler_compute.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp66
-rw-r--r--src/video_core/engines/maxwell_3d.h15
-rw-r--r--src/video_core/engines/maxwell_dma.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h1
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h57
-rw-r--r--src/video_core/gpu_asynch.cpp37
-rw-r--r--src/video_core/gpu_asynch.h37
-rw-r--r--src/video_core/gpu_synch.cpp37
-rw-r--r--src/video_core/gpu_synch.h29
-rw-r--r--src/video_core/gpu_thread.cpp152
-rw-r--r--src/video_core/gpu_thread.h133
-rw-r--r--src/video_core/rasterizer_cache.h18
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp245
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h83
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp483
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp116
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/textures/astc.cpp80
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/convert.cpp92
-rw-r--r--src/video_core/textures/convert.h18
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/decoders.h18
46 files changed, 1900 insertions, 315 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6036d6ed3..dac992d44 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
engines/shader_header.h
gpu.cpp
gpu.h
+ gpu_asynch.cpp
+ gpu_asynch.h
+ gpu_synch.cpp
+ gpu_synch.h
+ gpu_thread.cpp
+ gpu_thread.h
macro_interpreter.cpp
macro_interpreter.h
memory_manager.cpp
@@ -94,6 +100,8 @@ add_library(video_core STATIC
surface.h
textures/astc.cpp
textures/astc.h
+ textures/convert.cpp
+ textures/convert.h
textures/decoders.cpp
textures/decoders.h
textures/texture.h
@@ -104,6 +112,10 @@ add_library(video_core STATIC
if (ENABLE_VULKAN)
target_sources(video_core PRIVATE
renderer_vulkan/declarations.h
+ renderer_vulkan/maxwell_to_vk.cpp
+ renderer_vulkan/maxwell_to_vk.h
+ renderer_vulkan/vk_buffer_cache.cpp
+ renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_memory_manager.cpp
@@ -111,7 +123,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
- renderer_vulkan/vk_scheduler.h)
+ renderer_vulkan/vk_scheduler.h
+ renderer_vulkan/vk_stream_buffer.cpp
+ renderer_vulkan/vk_stream_buffer.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 669541b4b..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
}
const CommandList& command_list{dma_pushbuffer.front()};
- const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+ const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
bool non_main = command_list_header.is_non_main;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 540dcc52c..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
#pragma once
#include <array>
-#include "common/assert.h"
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/assert.h"
#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
#pragma once
#include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4f6126116..aae2a4019 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
- rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
+ Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f680c2ad9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = system.GetGPUDebugContext();
+ const u32 method = method_call.method;
+
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
- ASSERT(method_call.method == executing_macro + 1);
+ ASSERT(method == executing_macro + 1);
}
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
- if (method_call.method >= MacroRegistersStart) {
+ if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method_call.method % 2) == 0,
+ ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
- executing_macro = method_call.method;
+ executing_macro = method;
}
macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
return;
}
- ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+ ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
- if (regs.reg_array[method_call.method] != method_call.argument) {
- regs.reg_array[method_call.method] = method_call.argument;
+ if (regs.reg_array[method] != method_call.argument) {
+ regs.reg_array[method] = method_call.argument;
// Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
- if (method_call.method >= first_rt_reg &&
- method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
- const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
- dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+ if (method >= first_rt_reg &&
+ method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+ const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+ dirty_flags.color_buffer.set(rt_index);
}
// Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
- if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
- method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
- method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
- (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
- method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+ if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+ method == MAXWELL3D_REG_INDEX(zeta_width) ||
+ method == MAXWELL3D_REG_INDEX(zeta_height) ||
+ (method >= MAXWELL3D_REG_INDEX(zeta) &&
+ method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true;
}
// Shader
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
- if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
- method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+ if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+ method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true;
}
// Vertex format
- if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
- method_call.method <
- MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+ if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+ method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true;
}
// Vertex buffer
- if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
- method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
- dirty_flags.vertex_array |=
- 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
- } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
- method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
- dirty_flags.vertex_array |=
- 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
- } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
- method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
- dirty_flags.vertex_array |=
- 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+ if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+ method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+ dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+ } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+ method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+ dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+ } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+ method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+ dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
- switch (method_call.method) {
+ switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument);
break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 584f51c48..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
#pragma once
#include <array>
+#include <bitset>
#include <unordered_map>
#include <vector>
+
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -1094,19 +1096,18 @@ public:
MemoryManager& memory_manager;
struct DirtyFlags {
- u8 color_buffer = 0xFF;
- bool zeta_buffer = true;
-
- bool shaders = true;
+ std::bitset<8> color_buffer{0xFF};
+ std::bitset<32> vertex_array{0xFFFFFFFF};
bool vertex_attrib_format = true;
- u32 vertex_array = 0xFFFFFFFF;
+ bool zeta_buffer = true;
+ bool shaders = true;
void OnMemoryWrite() {
- color_buffer = 0xFF;
zeta_buffer = true;
shaders = true;
- vertex_array = 0xFFFFFFFF;
+ color_buffer.set();
+ vertex_array.set();
}
};
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 529a14ec7..9dfea5999 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
- rasterizer.FlushRegion(*source_cpu, src_size);
+ Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
- rasterizer.InvalidateRegion(*dest_cpu, dst_size);
+ Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index cf75aeb12..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 252592edd..c7eb15b6a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
#include <bitset>
#include <optional>
-#include <string>
#include <tuple>
#include <vector>
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ac30d1a89..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
namespace Tegra {
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
UNREACHABLE();
}
-GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
+ auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>();
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 6313702f2..56a203275 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
}
namespace VideoCore {
-class RasterizerInterface;
-}
+class RendererBase;
+} // namespace VideoCore
namespace Tegra {
@@ -119,10 +119,11 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5,
};
-class GPU final {
+class GPU {
public:
- explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
- ~GPU();
+ explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
+
+ virtual ~GPU();
struct MethodCall {
u32 method{};
@@ -200,8 +201,42 @@ public:
};
} regs{};
+ /// Push GPU command entries to be processed
+ virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+
+ /// Swap buffers (render frame)
+ virtual void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+ virtual void FlushRegion(VAddr addr, u64 size) = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be invalidated
+ virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+
+ /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+ virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+
private:
+ void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessSemaphoreTriggerMethod();
+ void ProcessSemaphoreRelease();
+ void ProcessSemaphoreAcquire();
+
+ /// Calls a GPU puller method.
+ void CallPullerMethod(const MethodCall& method_call);
+
+ /// Calls a GPU engine method.
+ void CallEngineMethod(const MethodCall& method_call);
+
+ /// Determines where the method should be executed.
+ bool ExecuteMethodOnEngine(const MethodCall& method_call);
+
+protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+ VideoCore::RendererBase& renderer;
+
+private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
-
- void ProcessBindMethod(const MethodCall& method_call);
- void ProcessSemaphoreTriggerMethod();
- void ProcessSemaphoreRelease();
- void ProcessSemaphoreAcquire();
-
- // Calls a GPU puller method.
- void CallPullerMethod(const MethodCall& method_call);
- // Calls a GPU engine method.
- void CallEngineMethod(const MethodCall& method_call);
- // Determines where the method should be executed.
- bool ExecuteMethodOnEngine(const MethodCall& method_call);
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..ad0a747e3
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
+ : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+
+GPUAsynch::~GPUAsynch() = default;
+
+void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
+ gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPUAsynch::SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+ gpu_thread.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushRegion(addr, size);
+}
+
+void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..e6a807aba
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+namespace GPUThread {
+class ThreadManager;
+} // namespace GPUThread
+
+/// Implementation of GPU interface that runs the GPU asynchronously
+class GPUAsynch : public Tegra::GPU {
+public:
+ explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+ ~GPUAsynch() override;
+
+ void PushGPUEntries(Tegra::CommandList&& entries) override;
+ void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void FlushRegion(VAddr addr, u64 size) override;
+ void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+
+private:
+ GPUThread::ThreadManager gpu_thread;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..4c00b96c7
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_synch.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
+ : Tegra::GPU(system, renderer) {}
+
+GPUSynch::~GPUSynch() = default;
+
+void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
+ dma_pusher->Push(std::move(entries));
+ dma_pusher->DispatchCalls();
+}
+
+void GPUSynch::SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+ renderer.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+ renderer.Rasterizer().FlushRegion(addr, size);
+}
+
+void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+ renderer.Rasterizer().InvalidateRegion(addr, size);
+}
+
+void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..7d5a241ff
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+/// Implementation of GPU interface that runs the GPU synchronously
+class GPUSynch : public Tegra::GPU {
+public:
+ explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+ ~GPUSynch() override;
+
+ void PushGPUEntries(Tegra::CommandList&& entries) override;
+ void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void FlushRegion(VAddr addr, u64 size) override;
+ void InvalidateRegion(VAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5bdd2a17
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/microprofile.h"
+#include "core/frontend/scope_acquire_window_context.h"
+#include "core/settings.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon::GPUThread {
+
+/// Executes a single GPU thread command
+static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher) {
+ if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
+ dma_pusher.Push(std::move(submit_list->entries));
+ dma_pusher.DispatchCalls();
+ } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
+ renderer.SwapBuffers(data->framebuffer);
+ } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
+ renderer.Rasterizer().FlushRegion(data->addr, data->size);
+ } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
+ renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+ } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
+ renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
+ } else {
+ UNREACHABLE();
+ }
+}
+
+/// Runs the GPU thread
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+ SynchState& state) {
+
+ MicroProfileOnThreadCreate("GpuThread");
+
+ auto WaitForWakeup = [&]() {
+ std::unique_lock<std::mutex> lock{state.signal_mutex};
+ state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
+ };
+
+ // Wait for first GPU command before acquiring the window context
+ WaitForWakeup();
+
+ // If emulation was stopped during disk shader loading, abort before trying to acquire context
+ if (!state.is_running) {
+ return;
+ }
+
+ Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+ while (state.is_running) {
+ if (!state.is_running) {
+ return;
+ }
+
+ {
+ // Thread has been woken up, so make the previous write queue the next read queue
+ std::lock_guard<std::mutex> lock{state.signal_mutex};
+ std::swap(state.push_queue, state.pop_queue);
+ }
+
+ // Execute all of the GPU commands
+ while (!state.pop_queue->empty()) {
+ ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
+ state.pop_queue->pop();
+ }
+
+ state.UpdateIdleState();
+
+ // Signal that the GPU thread has finished processing commands
+ if (state.is_idle) {
+ state.idle_condition.notify_one();
+ }
+
+ // Wait for CPU thread to send more GPU commands
+ WaitForWakeup();
+ }
+}
+
+ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
+ : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
+ std::ref(dma_pusher), std::ref(state)},
+ thread_id{thread.get_id()} {}
+
+ThreadManager::~ThreadManager() {
+ {
+ // Notify GPU thread that a shutdown is pending
+ std::lock_guard<std::mutex> lock{state.signal_mutex};
+ state.is_running = false;
+ }
+
+ state.signal_condition.notify_one();
+ thread.join();
+}
+
+void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
+ if (entries.empty()) {
+ return;
+ }
+
+ PushCommand(SubmitListCommand(std::move(entries)), false, false);
+}
+
+void ThreadManager::SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+ PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+}
+
+void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+ // Block the CPU when using accurate emulation
+ PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+}
+
+void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
+ PushCommand(InvalidateRegionCommand(addr, size), true, true);
+}
+
+void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ InvalidateRegion(addr, size);
+}
+
+void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
+ {
+ std::lock_guard<std::mutex> lock{state.signal_mutex};
+
+ if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
+ // Execute the command synchronously on the current thread
+ ExecuteCommand(&command_data, renderer, dma_pusher);
+ return;
+ }
+
+ // Push the command to the GPU thread
+ state.UpdateIdleState();
+ state.push_queue->emplace(command_data);
+ }
+
+ // Signal the GPU thread that commands are pending
+ state.signal_condition.notify_one();
+
+ if (wait_for_idle) {
+ // Wait for the GPU to be idle (all commands to be executed)
+ std::unique_lock<std::mutex> lock{state.idle_mutex};
+ state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
+ }
+}
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..edb148b14
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <variant>
+
+namespace Tegra {
+struct FramebufferConfig;
+class DmaPusher;
+} // namespace Tegra
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon::GPUThread {
+
+/// Command to signal to the GPU thread that a command list is ready for processing
+struct SubmitListCommand final {
+ explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
+
+ Tegra::CommandList entries;
+};
+
+/// Command to signal to the GPU thread that a swap buffers is pending
+struct SwapBuffersCommand final {
+ explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
+ : framebuffer{std::move(framebuffer)} {}
+
+ std::optional<const Tegra::FramebufferConfig> framebuffer;
+};
+
+/// Command to signal to the GPU thread to flush a region
+struct FlushRegionCommand final {
+ explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+ const VAddr addr;
+ const u64 size;
+};
+
+/// Command to signal to the GPU thread to invalidate a region
+struct InvalidateRegionCommand final {
+ explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+ const VAddr addr;
+ const u64 size;
+};
+
+/// Command to signal to the GPU thread to flush and invalidate a region
+struct FlushAndInvalidateRegionCommand final {
+ explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+ : addr{addr}, size{size} {}
+
+ const VAddr addr;
+ const u64 size;
+};
+
+using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+/// Struct used to synchronize the GPU thread
+struct SynchState final {
+ std::atomic<bool> is_running{true};
+ std::atomic<bool> is_idle{true};
+ std::condition_variable signal_condition;
+ std::mutex signal_mutex;
+ std::condition_variable idle_condition;
+ std::mutex idle_mutex;
+
+ // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
+ // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
+ // empty. This allows for efficient thread-safe access, as it does not require any copies.
+
+ using CommandQueue = std::queue<CommandData>;
+ std::array<CommandQueue, 2> command_queues;
+ CommandQueue* push_queue{&command_queues[0]};
+ CommandQueue* pop_queue{&command_queues[1]};
+
+ void UpdateIdleState() {
+ std::lock_guard<std::mutex> lock{idle_mutex};
+ is_idle = command_queues[0].empty() && command_queues[1].empty();
+ }
+};
+
+/// Class used to manage the GPU thread
+class ThreadManager final {
+public:
+ explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+ ~ThreadManager();
+
+ /// Push GPU command entries to be processed
+ void SubmitList(Tegra::CommandList&& entries);
+
+ /// Swap buffers (render frame)
+ void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+ /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+ void FlushRegion(VAddr addr, u64 size);
+
+ /// Notify rasterizer that any caches of the specified region should be invalidated
+ void InvalidateRegion(VAddr addr, u64 size);
+
+ /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+ void FlushAndInvalidateRegion(VAddr addr, u64 size);
+
+private:
+ /// Pushes a command to be executed by the GPU thread
+ void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
+
+ /// Returns true if this is called by the GPU thread
+ bool IsGpuThread() const {
+ return std::this_thread::get_id() == thread_id;
+ }
+
+private:
+ SynchState state;
+ VideoCore::RendererBase& renderer;
+ Tegra::DmaPusher& dma_pusher;
+ std::thread thread;
+ std::thread::id thread_id;
+};
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
return ++modified_ticks;
}
+ /// Flushes the specified object, updating appropriate cache state as needed
+ void FlushObject(const T& object) {
+ if (!object->IsDirty()) {
+ return;
+ }
+ object->Flush();
+ object->MarkAsModified(false, *this);
+ }
+
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
return objects;
}
- /// Flushes the specified object, updating appropriate cache state as needed
- void FlushObject(const T& object) {
- if (!object->IsDirty()) {
- return;
- }
- object->Flush();
- object->MarkAsModified(false, *this);
- }
-
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 406d720a0..301562ff6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
- buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+ : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
+ screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
- LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
+ LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
}
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
continue;
const auto& buffer = regs.vertex_array[attrib.buffer];
- LOG_TRACE(HW_GPU,
+ LOG_TRACE(Render_OpenGL,
"vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
attrib.offset.Value(), attrib.IsNormalized());
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+ gpu.dirty_flags.vertex_array.set();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
- if (!gpu.dirty_flags.vertex_array)
+ if (gpu.dirty_flags.vertex_array.none())
return;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (~gpu.dirty_flags.vertex_array & (1u << index))
+ if (!gpu.dirty_flags.vertex_array[index])
continue;
const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
}
}
- gpu.dirty_flags.vertex_array = 0;
+ gpu.dirty_flags.vertex_array.reset();
}
DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_program_manager->UseProgrammableFragmentShader(program_handle);
break;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
- shader_config.enable.Value(), shader_config.offset);
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
+ shader_config.enable.Value(), shader_config.offset);
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -488,13 +487,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
- if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
- !gpu.dirty_flags.zeta_buffer) {
+ if (fb_config_state == current_framebuffer_config_state &&
+ gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
@@ -721,10 +720,10 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
- bool invalidate = buffer_cache.Map(buffer_size);
+ const bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+ gpu.dirty_flags.vertex_array.set();
}
const GLuint vao = SetupVertexFormat();
@@ -738,30 +737,18 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
- // Execute draw call
+ res_cache.SignalPreDrawCall();
params.DispatchDraw();
-
- // Disable scissor test
- state.viewports[0].scissor.enabled = false;
+ res_cache.SignalPostDrawCall();
accelerate_draw = AccelDraw::Disabled;
-
- // Unbind textures for potential future use as framebuffer attachments
- for (auto& texture_unit : state.texture_units) {
- texture_unit.Unbind();
- }
- state.Apply();
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-
- if (Settings::values.use_accurate_gpu_emulation) {
- // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
- res_cache.FlushRegion(addr, size);
- }
+ res_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -805,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
- ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
+
+ if (params.pixel_format != pixel_format) {
+ LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
+ }
screen_info.display_texture = surface->Texture().handle;
@@ -951,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
size = buffer.size;
if (size > MaxConstbufferSize) {
- LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
- MaxConstbufferSize);
+ LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
+ MaxConstbufferSize);
size = MaxConstbufferSize;
}
} else {
@@ -1246,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
void RasterizerOpenGL::SyncTransformFeedback() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
- if (regs.tfb_enabled != 0) {
- LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
- UNREACHABLE();
- }
+ UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
}
void RasterizerOpenGL::SyncPointState() {
@@ -1270,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
void RasterizerOpenGL::CheckAlphaTests() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
- if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
- LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
- "this behavior is undefined.");
- UNREACHABLE();
- }
+ UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
+ "Alpha Testing is enabled with more than one rendertarget");
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 837523b82..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <optional>
#include <glad/glad.h>
#include "common/alignment.h"
@@ -20,7 +21,7 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/surface.h"
-#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
@@ -570,6 +571,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
+ memory_size = params.MemorySize();
+ reinterpreted = false;
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
@@ -615,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
}
}
-static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
- union S8Z24 {
- BitField<0, 24, u32> z24;
- BitField<24, 8, u32> s8;
- };
- static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
-
- union Z24S8 {
- BitField<0, 8, u32> s8;
- BitField<8, 24, u32> z24;
- };
- static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
-
- S8Z24 s8z24_pixel{};
- Z24S8 z24s8_pixel{};
- constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
- for (std::size_t y = 0; y < height; ++y) {
- for (std::size_t x = 0; x < width; ++x) {
- const std::size_t offset{bpp * (y * width + x)};
- if (reverse) {
- std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
- s8z24_pixel.s8.Assign(z24s8_pixel.s8);
- s8z24_pixel.z24.Assign(z24s8_pixel.z24);
- std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
- } else {
- std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
- z24s8_pixel.s8.Assign(s8z24_pixel.s8);
- z24s8_pixel.z24.Assign(s8z24_pixel.z24);
- std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
- }
- }
- }
-}
-
-/**
- * Helper function to perform software conversion (as needed) when loading a buffer from Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
- u32 width, u32 height, u32 depth) {
- switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_8X8:
- case PixelFormat::ASTC_2D_8X5:
- case PixelFormat::ASTC_2D_5X4:
- case PixelFormat::ASTC_2D_5X5:
- case PixelFormat::ASTC_2D_4X4_SRGB:
- case PixelFormat::ASTC_2D_8X8_SRGB:
- case PixelFormat::ASTC_2D_8X5_SRGB:
- case PixelFormat::ASTC_2D_5X4_SRGB:
- case PixelFormat::ASTC_2D_5X5_SRGB:
- case PixelFormat::ASTC_2D_10X8:
- case PixelFormat::ASTC_2D_10X8_SRGB: {
- // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
- u32 block_width{};
- u32 block_height{};
- std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
- data =
- Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
- break;
- }
- case PixelFormat::S8Z24:
- // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
- ConvertS8Z24ToZ24S8(data, width, height, false);
- break;
- }
-}
-
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
- * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
- * with typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
- u32 width, u32 height) {
- switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_8X8:
- case PixelFormat::ASTC_2D_4X4_SRGB:
- case PixelFormat::ASTC_2D_8X8_SRGB:
- case PixelFormat::ASTC_2D_5X5:
- case PixelFormat::ASTC_2D_5X5_SRGB:
- case PixelFormat::ASTC_2D_10X8:
- case PixelFormat::ASTC_2D_10X8_SRGB: {
- LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
- static_cast<u32>(pixel_format));
- UNREACHABLE();
- break;
- }
- case PixelFormat::S8Z24:
- // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
- ConvertS8Z24ToZ24S8(data, width, height, true);
- break;
- }
-}
-
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -740,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
}
}
for (u32 i = 0; i < params.max_mip_level; i++) {
- ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
- params.MipHeight(i), params.MipDepth(i));
+ const u32 width = params.MipWidth(i);
+ const u32 height = params.MipHeight(i);
+ const u32 depth = params.MipDepth(i);
+ if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
+ // Reserve size for RGBA8 conversion
+ constexpr std::size_t rgba_bpp = 4;
+ gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
+ }
+ Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
+ height, depth, true, true);
}
}
@@ -764,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
- ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
- params.height);
+ Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
+ params.height, params.depth, true, true);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
@@ -985,30 +899,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
- if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
- return last_color_buffers[index];
+ if (!gpu.dirty_flags.color_buffer[index]) {
+ return current_color_buffers[index];
}
- gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+ gpu.dirty_flags.color_buffer.reset(index);
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
if (index >= regs.rt_control.count) {
- return last_color_buffers[index] = {};
+ return current_color_buffers[index] = {};
}
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
- return last_color_buffers[index] = {};
+ return current_color_buffers[index] = {};
}
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
- return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+ return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
+ surface->MarkForReload(false);
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -1020,18 +935,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
Surface surface{TryGet(params.addr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
- // Use the cached surface as-is
+ // Use the cached surface as-is unless it's not synced with memory
+ if (surface->MustReload())
+ LoadSurface(surface);
return surface;
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
- Unregister(surface);
+ UnregisterSurface(surface);
Register(new_surface);
+ if (new_surface->IsUploaded()) {
+ RegisterReinterpretSurface(new_surface);
+ }
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
- Unregister(surface);
+ UnregisterSurface(surface);
}
}
@@ -1313,4 +1233,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
return {};
}
+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+ u32 height) {
+ for (u32 i = 0; i < params.max_mip_level; i++) {
+ if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+ return {i};
+ }
+ }
+ return {};
+}
+
+static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
+ const std::size_t size = params.LayerMemorySize();
+ VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+ for (u32 i = 0; i < params.depth; i++) {
+ if (start == addr) {
+ return {i};
+ }
+ start += size;
+ }
+ return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+ const Surface blitted_surface) {
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ const std::size_t src_memory_size = src_params.size_in_bytes;
+ const std::optional<u32> level =
+ TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+ if (level.has_value()) {
+ if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+ src_params.height == dst_params.MipHeight(*level) &&
+ src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+ const std::optional<u32> slot =
+ TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+ if (slot.has_value()) {
+ glCopyImageSubData(render_surface->Texture().handle,
+ SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+ blitted_surface->Texture().handle,
+ SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+ dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+ blitted_surface->MarkAsModified(true, cache);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+ const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
+ const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+ if (bound2 > bound1)
+ return true;
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+ const Surface blitted_surface) {
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+ Surface intersect) {
+ if (IsReinterpretInvalid(triggering_surface, intersect)) {
+ UnregisterSurface(intersect);
+ return false;
+ }
+ if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+ if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+ UnregisterSurface(intersect);
+ return false;
+ }
+ FlushObject(intersect);
+ FlushObject(triggering_surface);
+ intersect->MarkForReload(true);
+ }
+ return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+ if (texception && GLAD_GL_ARB_texture_barrier) {
+ glTextureBarrier();
+ }
+ texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+ for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+ if (current_color_buffers[i] != nullptr) {
+ Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+ if (intersect != nullptr) {
+ PartialReinterpretSurface(current_color_buffers[i], intersect);
+ texception = true;
+ }
+ }
+ }
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 32fc6538a..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -34,6 +34,7 @@ using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct SurfaceParams {
enum class SurfaceClass {
@@ -140,10 +141,18 @@ struct SurfaceParams {
return offset;
}
+ std::size_t GetMipmapSingleSize(u32 mip_level) const {
+ return InnerMipmapMemorySize(mip_level, false, is_layered);
+ }
+
u32 MipWidth(u32 mip_level) const {
return std::max(1U, width >> mip_level);
}
+ u32 MipWidthGobAligned(u32 mip_level) const {
+ return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+ }
+
u32 MipHeight(u32 mip_level) const {
return std::max(1U, height >> mip_level);
}
@@ -346,6 +355,10 @@ public:
return cached_size_in_bytes;
}
+ std::size_t GetMemorySize() const {
+ return memory_size;
+ }
+
void Flush() override {
FlushGLBuffer();
}
@@ -383,6 +396,26 @@ public:
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
+ void MarkReinterpreted() {
+ reinterpreted = true;
+ }
+
+ bool IsReinterpreted() const {
+ return reinterpreted;
+ }
+
+ void MarkForReload(bool reload) {
+ must_reload = reload;
+ }
+
+ bool MustReload() const {
+ return must_reload;
+ }
+
+ bool IsUploaded() const {
+ return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+ }
+
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -396,6 +429,9 @@ private:
GLenum gl_internal_format{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+ std::size_t memory_size;
+ bool reinterpreted = false;
+ bool must_reload = false;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -421,6 +457,9 @@ public:
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect);
+ void SignalPreDrawCall();
+ void SignalPostDrawCall();
+
private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -437,6 +476,10 @@ private:
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
+ // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+ // returns true if the reinterpret was successful, false in case it was not.
+ bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -453,12 +496,50 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
+ bool texception = false;
+
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
- std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+ std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+ std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
+
+ using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+ using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+ static auto GetReinterpretInterval(const Surface& object) {
+ return SurfaceInterval::right_open(object->GetAddr() + 1,
+ object->GetAddr() + object->GetMemorySize() - 1);
+ }
+
+ // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+ SurfaceIntervalCache reinterpreted_surfaces;
+
+ void RegisterReinterpretSurface(Surface reinterpret_surface) {
+ auto interval = GetReinterpretInterval(reinterpret_surface);
+ reinterpreted_surfaces.insert({interval, reinterpret_surface});
+ reinterpret_surface->MarkReinterpreted();
+ }
+
+ Surface CollideOnReinterpretedSurface(VAddr addr) const {
+ const SurfaceInterval interval{addr};
+ for (auto& pair :
+ boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+ return pair.second;
+ }
+ return nullptr;
+ }
+
+ /// Unregisters an object from the cache
+ void UnregisterSurface(const Surface& object) {
+ if (object->IsReinterpreted()) {
+ auto interval = GetReinterpretInterval(object);
+ reinterpreted_surfaces.erase(interval);
+ }
+ Unregister(object);
+ }
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 219f08053..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- textures.data());
+ textures.data() + first);
}
}
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- samplers.data());
+ samplers.data() + first);
}
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e60b2eb44..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
}
+void RendererOpenGL::AddTelemetryFields() {
+ const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+ const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+ const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+ LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+ LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+ LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+
+ auto& telemetry_session = system.TelemetrySession();
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+}
+
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
@@ -466,17 +481,7 @@ bool RendererOpenGL::Init() {
glDebugMessageCallback(DebugHandler, nullptr);
}
- const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
- const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
- const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
-
- LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
- LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
- LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
-
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+ AddTelemetryFields();
if (!GLAD_GL_VERSION_4_3) {
return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c168fa89e..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -60,6 +60,7 @@ public:
private:
void InitOpenGLObjects();
+ void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+
+namespace Vulkan::MaxwellToVK {
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
+ switch (filter) {
+ case Tegra::Texture::TextureFilter::Linear:
+ return vk::Filter::eLinear;
+ case Tegra::Texture::TextureFilter::Nearest:
+ return vk::Filter::eNearest;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+ return {};
+}
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
+ switch (mipmap_filter) {
+ case Tegra::Texture::TextureMipmapFilter::None:
+ // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
+ // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
+ // use an image view with a single mipmap level to emulate this.
+ return vk::SamplerMipmapMode::eLinear;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return vk::SamplerMipmapMode::eLinear;
+ case Tegra::Texture::TextureMipmapFilter::Nearest:
+ return vk::SamplerMipmapMode::eNearest;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ return {};
+}
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+ switch (wrap_mode) {
+ case Tegra::Texture::WrapMode::Wrap:
+ return vk::SamplerAddressMode::eRepeat;
+ case Tegra::Texture::WrapMode::Mirror:
+ return vk::SamplerAddressMode::eMirroredRepeat;
+ case Tegra::Texture::WrapMode::ClampToEdge:
+ return vk::SamplerAddressMode::eClampToEdge;
+ case Tegra::Texture::WrapMode::Border:
+ return vk::SamplerAddressMode::eClampToBorder;
+ case Tegra::Texture::WrapMode::ClampOGL:
+ // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+ // eClampToBorder to get the border color of the texture, and then sample the edge to
+ // manually mix them. However the shader part of this is not yet implemented.
+ return vk::SamplerAddressMode::eClampToBorder;
+ case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+ return vk::SamplerAddressMode::eMirrorClampToEdge;
+ case Tegra::Texture::WrapMode::MirrorOnceBorder:
+ UNIMPLEMENTED();
+ return vk::SamplerAddressMode::eMirrorClampToEdge;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
+}
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
+ switch (depth_compare_func) {
+ case Tegra::Texture::DepthCompareFunc::Never:
+ return vk::CompareOp::eNever;
+ case Tegra::Texture::DepthCompareFunc::Less:
+ return vk::CompareOp::eLess;
+ case Tegra::Texture::DepthCompareFunc::LessEqual:
+ return vk::CompareOp::eLessOrEqual;
+ case Tegra::Texture::DepthCompareFunc::Equal:
+ return vk::CompareOp::eEqual;
+ case Tegra::Texture::DepthCompareFunc::NotEqual:
+ return vk::CompareOp::eNotEqual;
+ case Tegra::Texture::DepthCompareFunc::Greater:
+ return vk::CompareOp::eGreater;
+ case Tegra::Texture::DepthCompareFunc::GreaterEqual:
+ return vk::CompareOp::eGreaterOrEqual;
+ case Tegra::Texture::DepthCompareFunc::Always:
+ return vk::CompareOp::eAlways;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
+ static_cast<u32>(depth_compare_func));
+ return {};
+}
+
+} // namespace Sampler
+
+struct FormatTuple {
+ vk::Format format; ///< Vulkan format
+ ComponentType component_type; ///< Abstracted component type
+ bool attachable; ///< True when this format can be used as an attachment
+};
+
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+ {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
+ {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
+ {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
+ {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
+ {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
+ {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
+ {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
+ {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
+ {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
+
+ // Compressed sRGB formats
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+
+ // Depth formats
+ {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
+ {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
+
+ // DepthStencil formats
+ {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
+ {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
+}};
+
+static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+ return pixel_format >= PixelFormat::MaxColorFormat &&
+ pixel_format < PixelFormat::MaxDepthStencilFormat;
+}
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+ PixelFormat pixel_format, ComponentType component_type) {
+ ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+
+ const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
+ UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
+ "Unimplemented texture format with pixel format={} and component type={}",
+ static_cast<u32>(pixel_format), static_cast<u32>(component_type));
+ ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+
+ auto usage = vk::FormatFeatureFlagBits::eSampledImage |
+ vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
+ if (tuple.attachable) {
+ usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+ : vk::FormatFeatureFlagBits::eColorAttachment;
+ }
+ return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+}
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
+ switch (stage) {
+ case Maxwell::ShaderStage::Vertex:
+ return vk::ShaderStageFlagBits::eVertex;
+ case Maxwell::ShaderStage::TesselationControl:
+ return vk::ShaderStageFlagBits::eTessellationControl;
+ case Maxwell::ShaderStage::TesselationEval:
+ return vk::ShaderStageFlagBits::eTessellationEvaluation;
+ case Maxwell::ShaderStage::Geometry:
+ return vk::ShaderStageFlagBits::eGeometry;
+ case Maxwell::ShaderStage::Fragment:
+ return vk::ShaderStageFlagBits::eFragment;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
+ return {};
+}
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return vk::PrimitiveTopology::ePointList;
+ case Maxwell::PrimitiveTopology::Lines:
+ return vk::PrimitiveTopology::eLineList;
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return vk::PrimitiveTopology::eLineStrip;
+ case Maxwell::PrimitiveTopology::Triangles:
+ return vk::PrimitiveTopology::eTriangleList;
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ return vk::PrimitiveTopology::eTriangleStrip;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
+}
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
+ switch (type) {
+ case Maxwell::VertexAttribute::Type::SignedNorm:
+ break;
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return vk::Format::eR8G8B8A8Unorm;
+ default:
+ break;
+ }
+ break;
+ case Maxwell::VertexAttribute::Type::SignedInt:
+ break;
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return vk::Format::eR32Uint;
+ default:
+ break;
+ }
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ break;
+ case Maxwell::VertexAttribute::Type::Float:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return vk::Format::eR32G32B32A32Sfloat;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return vk::Format::eR32G32B32Sfloat;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return vk::Format::eR32G32Sfloat;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return vk::Format::eR32Sfloat;
+ default:
+ break;
+ }
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
+ static_cast<u32>(size));
+ return {};
+}
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
+ switch (comparison) {
+ case Maxwell::ComparisonOp::Never:
+ case Maxwell::ComparisonOp::NeverOld:
+ return vk::CompareOp::eNever;
+ case Maxwell::ComparisonOp::Less:
+ case Maxwell::ComparisonOp::LessOld:
+ return vk::CompareOp::eLess;
+ case Maxwell::ComparisonOp::Equal:
+ case Maxwell::ComparisonOp::EqualOld:
+ return vk::CompareOp::eEqual;
+ case Maxwell::ComparisonOp::LessEqual:
+ case Maxwell::ComparisonOp::LessEqualOld:
+ return vk::CompareOp::eLessOrEqual;
+ case Maxwell::ComparisonOp::Greater:
+ case Maxwell::ComparisonOp::GreaterOld:
+ return vk::CompareOp::eGreater;
+ case Maxwell::ComparisonOp::NotEqual:
+ case Maxwell::ComparisonOp::NotEqualOld:
+ return vk::CompareOp::eNotEqual;
+ case Maxwell::ComparisonOp::GreaterEqual:
+ case Maxwell::ComparisonOp::GreaterEqualOld:
+ return vk::CompareOp::eGreaterOrEqual;
+ case Maxwell::ComparisonOp::Always:
+ case Maxwell::ComparisonOp::AlwaysOld:
+ return vk::CompareOp::eAlways;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+ return {};
+}
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+ switch (index_format) {
+ case Maxwell::IndexFormat::UnsignedByte:
+ UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
+ return vk::IndexType::eUint16;
+ case Maxwell::IndexFormat::UnsignedShort:
+ return vk::IndexType::eUint16;
+ case Maxwell::IndexFormat::UnsignedInt:
+ return vk::IndexType::eUint32;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
+ return {};
+}
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
+ switch (stencil_op) {
+ case Maxwell::StencilOp::Keep:
+ case Maxwell::StencilOp::KeepOGL:
+ return vk::StencilOp::eKeep;
+ case Maxwell::StencilOp::Zero:
+ case Maxwell::StencilOp::ZeroOGL:
+ return vk::StencilOp::eZero;
+ case Maxwell::StencilOp::Replace:
+ case Maxwell::StencilOp::ReplaceOGL:
+ return vk::StencilOp::eReplace;
+ case Maxwell::StencilOp::Incr:
+ case Maxwell::StencilOp::IncrOGL:
+ return vk::StencilOp::eIncrementAndClamp;
+ case Maxwell::StencilOp::Decr:
+ case Maxwell::StencilOp::DecrOGL:
+ return vk::StencilOp::eDecrementAndClamp;
+ case Maxwell::StencilOp::Invert:
+ case Maxwell::StencilOp::InvertOGL:
+ return vk::StencilOp::eInvert;
+ case Maxwell::StencilOp::IncrWrap:
+ case Maxwell::StencilOp::IncrWrapOGL:
+ return vk::StencilOp::eIncrementAndWrap;
+ case Maxwell::StencilOp::DecrWrap:
+ case Maxwell::StencilOp::DecrWrapOGL:
+ return vk::StencilOp::eDecrementAndWrap;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
+ return {};
+}
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
+ switch (equation) {
+ case Maxwell::Blend::Equation::Add:
+ case Maxwell::Blend::Equation::AddGL:
+ return vk::BlendOp::eAdd;
+ case Maxwell::Blend::Equation::Subtract:
+ case Maxwell::Blend::Equation::SubtractGL:
+ return vk::BlendOp::eSubtract;
+ case Maxwell::Blend::Equation::ReverseSubtract:
+ case Maxwell::Blend::Equation::ReverseSubtractGL:
+ return vk::BlendOp::eReverseSubtract;
+ case Maxwell::Blend::Equation::Min:
+ case Maxwell::Blend::Equation::MinGL:
+ return vk::BlendOp::eMin;
+ case Maxwell::Blend::Equation::Max:
+ case Maxwell::Blend::Equation::MaxGL:
+ return vk::BlendOp::eMax;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+ return {};
+}
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
+ switch (factor) {
+ case Maxwell::Blend::Factor::Zero:
+ case Maxwell::Blend::Factor::ZeroGL:
+ return vk::BlendFactor::eZero;
+ case Maxwell::Blend::Factor::One:
+ case Maxwell::Blend::Factor::OneGL:
+ return vk::BlendFactor::eOne;
+ case Maxwell::Blend::Factor::SourceColor:
+ case Maxwell::Blend::Factor::SourceColorGL:
+ return vk::BlendFactor::eSrcColor;
+ case Maxwell::Blend::Factor::OneMinusSourceColor:
+ case Maxwell::Blend::Factor::OneMinusSourceColorGL:
+ return vk::BlendFactor::eOneMinusSrcColor;
+ case Maxwell::Blend::Factor::SourceAlpha:
+ case Maxwell::Blend::Factor::SourceAlphaGL:
+ return vk::BlendFactor::eSrcAlpha;
+ case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+ case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
+ return vk::BlendFactor::eOneMinusSrcAlpha;
+ case Maxwell::Blend::Factor::DestAlpha:
+ case Maxwell::Blend::Factor::DestAlphaGL:
+ return vk::BlendFactor::eDstAlpha;
+ case Maxwell::Blend::Factor::OneMinusDestAlpha:
+ case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
+ return vk::BlendFactor::eOneMinusDstAlpha;
+ case Maxwell::Blend::Factor::DestColor:
+ case Maxwell::Blend::Factor::DestColorGL:
+ return vk::BlendFactor::eDstColor;
+ case Maxwell::Blend::Factor::OneMinusDestColor:
+ case Maxwell::Blend::Factor::OneMinusDestColorGL:
+ return vk::BlendFactor::eOneMinusDstColor;
+ case Maxwell::Blend::Factor::SourceAlphaSaturate:
+ case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
+ return vk::BlendFactor::eSrcAlphaSaturate;
+ case Maxwell::Blend::Factor::Source1Color:
+ case Maxwell::Blend::Factor::Source1ColorGL:
+ return vk::BlendFactor::eSrc1Color;
+ case Maxwell::Blend::Factor::OneMinusSource1Color:
+ case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
+ return vk::BlendFactor::eOneMinusSrc1Color;
+ case Maxwell::Blend::Factor::Source1Alpha:
+ case Maxwell::Blend::Factor::Source1AlphaGL:
+ return vk::BlendFactor::eSrc1Alpha;
+ case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+ case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
+ return vk::BlendFactor::eOneMinusSrc1Alpha;
+ case Maxwell::Blend::Factor::ConstantColor:
+ case Maxwell::Blend::Factor::ConstantColorGL:
+ return vk::BlendFactor::eConstantColor;
+ case Maxwell::Blend::Factor::OneMinusConstantColor:
+ case Maxwell::Blend::Factor::OneMinusConstantColorGL:
+ return vk::BlendFactor::eOneMinusConstantColor;
+ case Maxwell::Blend::Factor::ConstantAlpha:
+ case Maxwell::Blend::Factor::ConstantAlphaGL:
+ return vk::BlendFactor::eConstantAlpha;
+ case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+ case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
+ return vk::BlendFactor::eOneMinusConstantAlpha;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+ return {};
+}
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
+ switch (front_face) {
+ case Maxwell::Cull::FrontFace::ClockWise:
+ return vk::FrontFace::eClockwise;
+ case Maxwell::Cull::FrontFace::CounterClockWise:
+ return vk::FrontFace::eCounterClockwise;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
+ return {};
+}
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
+ switch (cull_face) {
+ case Maxwell::Cull::CullFace::Front:
+ return vk::CullModeFlagBits::eFront;
+ case Maxwell::Cull::CullFace::Back:
+ return vk::CullModeFlagBits::eBack;
+ case Maxwell::Cull::CullFace::FrontAndBack:
+ return vk::CullModeFlagBits::eFrontAndBack;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+ return {};
+}
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
+ switch (swizzle) {
+ case Tegra::Texture::SwizzleSource::Zero:
+ return vk::ComponentSwizzle::eZero;
+ case Tegra::Texture::SwizzleSource::R:
+ return vk::ComponentSwizzle::eR;
+ case Tegra::Texture::SwizzleSource::G:
+ return vk::ComponentSwizzle::eG;
+ case Tegra::Texture::SwizzleSource::B:
+ return vk::ComponentSwizzle::eB;
+ case Tegra::Texture::SwizzleSource::A:
+ return vk::ComponentSwizzle::eA;
+ case Tegra::Texture::SwizzleSource::OneInt:
+ case Tegra::Texture::SwizzleSource::OneFloat:
+ return vk::ComponentSwizzle::eOne;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
+ return {};
+}
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan::MaxwellToVK {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter);
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
+
+} // namespace Sampler
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+ PixelFormat pixel_format, ComponentType component_type);
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..4a33a6c84
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,116 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "core/memory.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+ VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
+ : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
+ const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
+ vk::BufferUsageFlagBits::eIndexBuffer |
+ vk::BufferUsageFlagBits::eUniformBuffer;
+ const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
+ vk::AccessFlagBits::eUniformRead;
+ stream_buffer =
+ std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
+ vk::PipelineStageFlagBits::eAllCommands);
+ buffer_handle = stream_buffer->GetBuffer();
+}
+
+VKBufferCache::~VKBufferCache() = default;
+
+u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
+ bool cache) {
+ const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
+ ASSERT(cpu_addr);
+
+ // Cache management is a big overhead, so only cache entries with a given size.
+ // TODO: Figure out which size is the best for given games.
+ cache &= size >= 2048;
+
+ if (cache) {
+ if (auto entry = TryGet(*cpu_addr); entry) {
+ if (entry->size >= size && entry->alignment == alignment) {
+ return entry->offset;
+ }
+ Unregister(entry);
+ }
+ }
+
+ AlignBuffer(alignment);
+ const u64 uploaded_offset = buffer_offset;
+
+ Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+
+ if (cache) {
+ auto entry = std::make_shared<CachedBufferEntry>();
+ entry->offset = uploaded_offset;
+ entry->size = size;
+ entry->alignment = alignment;
+ entry->addr = *cpu_addr;
+ Register(entry);
+ }
+
+ return uploaded_offset;
+}
+
+u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
+ AlignBuffer(alignment);
+ std::memcpy(buffer_ptr, raw_pointer, size);
+ const u64 uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return uploaded_offset;
+}
+
+std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
+ AlignBuffer(alignment);
+ u8* const uploaded_ptr = buffer_ptr;
+ const u64 uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return {uploaded_ptr, uploaded_offset};
+}
+
+void VKBufferCache::Reserve(std::size_t max_size) {
+ bool invalidate;
+ std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
+ buffer_offset = buffer_offset_base;
+
+ if (invalidate) {
+ InvalidateAll();
+ }
+}
+
+VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
+ return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
+}
+
+void VKBufferCache::AlignBuffer(std::size_t alignment) {
+ // Align the offset, not the mapped pointer
+ const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
+ buffer_ptr += offset_aligned - buffer_offset;
+ buffer_offset = offset_aligned;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..d8e916f31
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKStreamBuffer;
+
+struct CachedBufferEntry final : public RasterizerCacheObject {
+ VAddr GetAddr() const override {
+ return addr;
+ }
+
+ std::size_t GetSizeInBytes() const override {
+ return size;
+ }
+
+ // We do not have to flush this cache as things in it are never modified by us.
+ void Flush() override {}
+
+ VAddr addr;
+ std::size_t size;
+ u64 offset;
+ std::size_t alignment;
+};
+
+class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+public:
+ explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+ VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+ ~VKBufferCache();
+
+ /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+ /// allocated.
+ u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
+ bool cache = true);
+
+ /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
+ u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+
+ /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+ std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+
+ /// Reserves a region of memory to be used in subsequent upload/reserve operations.
+ void Reserve(std::size_t max_size);
+
+ /// Ensures that the set data is sent to the device.
+ [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
+
+ /// Returns the buffer cache handle.
+ vk::Buffer GetBuffer() const {
+ return buffer_handle;
+ }
+
+private:
+ void AlignBuffer(std::size_t alignment);
+
+ Tegra::MemoryManager& tegra_memory_manager;
+
+ std::unique_ptr<VKStreamBuffer> stream_buffer;
+ vk::Buffer buffer_handle;
+
+ u8* buffer_ptr = nullptr;
+ u64 buffer_offset = 0;
+ u64 buffer_offset_base = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
- LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
- static_cast<u32>(wanted_format));
+ LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
UNREACHABLE();
return true;
}
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
};
AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
- AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+ AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
+ AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
+ AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
+ AddFormatQuery(vk::Format::eR8Unorm);
AddFormatQuery(vk::Format::eD32Sfloat);
+ AddFormatQuery(vk::Format::eD16Unorm);
AddFormatQuery(vk::Format::eD16UnormS8Uint);
AddFormatQuery(vk::Format::eD24UnormS8Uint);
AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+ AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
+ AddFormatQuery(vk::Format::eBc2UnormBlock);
+ AddFormatQuery(vk::Format::eBc3UnormBlock);
+ AddFormatQuery(vk::Format::eBc4UnormBlock);
return format_properties;
}
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
u8* data, u64 begin, u64 end)
- : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
+ : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 1678463c7..a1e117443 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
protected_resources.push_back(resource);
}
-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
- if (it != protected_resources.end()) {
- protected_resources.erase(it);
- }
+ ASSERT(it != protected_resources.end());
+
+ resource->OnFenceRemoval(this);
+ protected_resources.erase(it);
}
VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
}
void VKFenceWatch::Wait() {
- if (!fence) {
+ if (fence == nullptr) {
return;
}
fence->Wait();
fence->Unprotect(this);
- fence = nullptr;
}
void VKFenceWatch::Watch(VKFence& new_fence) {
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5018dfa44..5bfe4cead 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
void Protect(VKResource* resource);
/// Removes protection for a resource.
- void Unprotect(const VKResource* resource);
+ void Unprotect(VKResource* resource);
/// Retreives the fence.
operator vk::Fence() const {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+ vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+ : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+ pipeline_stage} {
+ CreateBuffers(memory_manager, usage);
+ ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+ ASSERT(size <= buffer_size);
+ mapped_size = size;
+
+ if (offset + size > buffer_size) {
+ // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+ // reset the state.
+ invalidation_mark = used_watches;
+ used_watches = 0;
+ offset = 0;
+ }
+
+ return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+ ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+ if (invalidation_mark) {
+ // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+ exctx = scheduler.Flush();
+ std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+ [&](auto& resource) { resource->Wait(); });
+ invalidation_mark = std::nullopt;
+ }
+
+ if (used_watches + 1 >= watches.size()) {
+ // Ensure that there are enough watches.
+ ReserveWatches(WATCHES_RESERVE_CHUNK);
+ }
+ // Add a watch for this allocation.
+ watches[used_watches++]->Watch(exctx.GetFence());
+
+ offset += size;
+
+ return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+ const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+ nullptr);
+
+ const auto dev = device.GetLogical();
+ const auto& dld = device.GetDispatchLoader();
+ buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+ commit = memory_manager.Commit(*buffer, true);
+ mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+ const std::size_t previous_size = watches.size();
+ watches.resize(previous_size + grow_size);
+ std::generate(watches.begin() + previous_size, watches.end(),
+ []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+ explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+ vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+ ~VKStreamBuffer();
+
+ /**
+ * Reserves a region of memory from the stream buffer.
+ * @param size Size to reserve.
+ * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+ * offset and a boolean that's true when buffer has been invalidated.
+ */
+ std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+ /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+ [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+ vk::Buffer GetBuffer() const {
+ return *buffer;
+ }
+
+private:
+ /// Creates Vulkan buffer handles committing the required the required memory.
+ void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+ /// Increases the amount of watches available.
+ void ReserveWatches(std::size_t grow_size);
+
+ const VKDevice& device; ///< Vulkan device manager.
+ VKScheduler& scheduler; ///< Command scheduler.
+ const u64 buffer_size; ///< Total size of the stream buffer.
+ const vk::AccessFlags access; ///< Access usage of this stream buffer.
+ const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+ UniqueBuffer buffer; ///< Mapped buffer.
+ VKMemoryCommit commit; ///< Memory commit.
+ u8* mapped_pointer{}; ///< Pointer to the host visible commit
+
+ u64 offset{}; ///< Buffer iterator.
+ u64 mapped_size{}; ///< Size reserved for the current copy.
+
+ std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+ std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+ std::optional<std::size_t>
+ invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
- // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
- // gamma.
case Tegra::RenderTargetFormat::RGBA8_SRGB:
return PixelFormat::RGBA8_SRGB;
case Tegra::RenderTargetFormat::RGBA8_UNORM:
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
#include "video_core/textures/astc.h"
-class BitStream {
+class InputBitStream {
public:
- explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+ explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
- ~BitStream() = default;
-
- int GetBitsWritten() const {
- return m_BitsWritten;
- }
-
- void WriteBitsR(unsigned int val, unsigned int nBits) {
- for (unsigned int i = 0; i < nBits; i++) {
- WriteBit((val >> (nBits - i - 1)) & 1);
- }
- }
-
- void WriteBits(unsigned int val, unsigned int nBits) {
- for (unsigned int i = 0; i < nBits; i++) {
- WriteBit((val >> i) & 1);
- }
- }
+ ~InputBitStream() = default;
int GetBitsRead() const {
return m_BitsRead;
@@ -71,6 +55,38 @@ public:
}
private:
+ const int m_NumBits;
+ const unsigned char* m_CurByte;
+ int m_NextBit = 0;
+ int m_BitsRead = 0;
+
+ bool done = false;
+};
+
+class OutputBitStream {
+public:
+ explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+ : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+
+ ~OutputBitStream() = default;
+
+ int GetBitsWritten() const {
+ return m_BitsWritten;
+ }
+
+ void WriteBitsR(unsigned int val, unsigned int nBits) {
+ for (unsigned int i = 0; i < nBits; i++) {
+ WriteBit((val >> (nBits - i - 1)) & 1);
+ }
+ }
+
+ void WriteBits(unsigned int val, unsigned int nBits) {
+ for (unsigned int i = 0; i < nBits; i++) {
+ WriteBit((val >> i) & 1);
+ }
+ }
+
+private:
void WriteBit(int b) {
if (done)
@@ -238,8 +254,8 @@ public:
// Fills result with the values that are encoded in the given
// bitstream. We must know beforehand what the maximum possible
// value is, and how many values we're decoding.
- static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
- uint32_t maxRange, uint32_t nValues) {
+ static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
+ InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
// Determine encoding parameters
IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
@@ -267,7 +283,7 @@ public:
}
private:
- static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
uint32_t nBitsPerValue) {
// Implement the algorithm in section C.2.12
uint32_t m[5];
@@ -327,7 +343,7 @@ private:
}
}
- static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+ static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
uint32_t nBitsPerValue) {
// Implement the algorithm in section C.2.12
uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
}
};
-static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
+static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
TexelWeightParams params;
// Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
return params;
}
-static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
uint32_t blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
// We now have enough to decode our integer sequence.
std::vector<IntegerEncodedValue> decodedColorValues;
- BitStream colorStream(data);
+ InputBitStream colorStream(data);
IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
// Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
#undef READ_INT_VALUES
}
-static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
+static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
const uint32_t blockHeight, uint32_t* outBuf) {
- BitStream strm(inBuf);
+ InputBitStream strm(inBuf);
TexelWeightParams weightParams = DecodeBlockInfo(strm);
// Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
// Define color data.
uint8_t colorEndpointData[16];
memset(colorEndpointData, 0, sizeof(colorEndpointData));
- BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+ OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
// Read extra config data...
uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
std::vector<IntegerEncodedValue> texelWeightValues;
- BitStream weightStream(texelWeightData);
+ InputBitStream weightStream(texelWeightData);
IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
for (uint32_t j = 0; j < height; j += block_height) {
for (uint32_t i = 0; i < width; i += block_width) {
- uint8_t* blockPtr = data.data() + blockIdx * 16;
+ const uint8_t* blockPtr = data + blockIdx * 16;
// Blocks can be at most 12x12
uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height);
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <tuple>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
+
+namespace Tegra::Texture {
+
+using VideoCore::Surface::PixelFormat;
+
+template <bool reverse>
+void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+ union S8Z24 {
+ BitField<0, 24, u32> z24;
+ BitField<24, 8, u32> s8;
+ };
+ static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+
+ union Z24S8 {
+ BitField<0, 8, u32> s8;
+ BitField<8, 24, u32> z24;
+ };
+ static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+
+ S8Z24 s8z24_pixel{};
+ Z24S8 z24s8_pixel{};
+ constexpr auto bpp{
+ VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+ for (std::size_t y = 0; y < height; ++y) {
+ for (std::size_t x = 0; x < width; ++x) {
+ const std::size_t offset{bpp * (y * width + x)};
+ if constexpr (reverse) {
+ std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
+ s8z24_pixel.s8.Assign(z24s8_pixel.s8);
+ s8z24_pixel.z24.Assign(z24s8_pixel.z24);
+ std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
+ } else {
+ std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
+ z24s8_pixel.s8.Assign(s8z24_pixel.s8);
+ z24s8_pixel.z24.Assign(s8z24_pixel.z24);
+ std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
+ }
+ }
+ }
+}
+
+static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+ SwapS8Z24ToZ24S8<false>(data, width, height);
+}
+
+static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
+ SwapS8Z24ToZ24S8<true>(data, width, height);
+}
+
+void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+ bool convert_astc, bool convert_s8z24) {
+ if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+ // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+ u32 block_width{};
+ u32 block_height{};
+ std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+ const std::vector<u8> rgba8_data =
+ Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
+ std::copy(rgba8_data.begin(), rgba8_data.end(), data);
+
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
+ }
+}
+
+void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+ bool convert_astc, bool convert_s8z24) {
+ if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+ LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
+ static_cast<u32>(pixel_format));
+ UNREACHABLE();
+
+ } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+ Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
+ }
+}
+
+} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/surface.h"
+
+namespace Tegra::Texture {
+
+void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+ u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+ u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+} // namespace Tegra::Texture \ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const u32 pixel_index{out_x + pixel_base};
- data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
- data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+ data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
+ data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
}
pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
for (u32 xb = 0; xb < blocks_on_x; xb++) {
const u32 x_start = xb * block_x_elements;
const u32 x_end = std::min(width, x_start + block_x_elements);
- if (fast) {
+ if constexpr (fast) {
FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
return 512;
}
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
+/// Unswizzles a swizzled texture without changing its format.
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
+
+/// Unswizzles a swizzled texture without changing its format.
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
-/**
- * Decodes an unswizzled texture into a A8R8G8B8 texture.
- */
+/// Decodes an unswizzled texture into a A8R8G8B8 texture.
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
u32 height);
-/**
- * This function calculates the correct size of a texture depending if it's tiled or not.
- */
+/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth);
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height);
+
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,