summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/engine_upload.cpp52
-rw-r--r--src/video_core/engines/engine_upload.h73
-rw-r--r--src/video_core/engines/fermi_2d.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp37
-rw-r--r--src/video_core/engines/kepler_compute.h175
-rw-r--r--src/video_core/engines/kepler_memory.cpp45
-rw-r--r--src/video_core/engines/kepler_memory.h66
-rw-r--r--src/video_core/engines/maxwell_3d.cpp54
-rw-r--r--src/video_core/engines/maxwell_3d.h27
-rw-r--r--src/video_core/engines/maxwell_dma.cpp83
-rw-r--r--src/video_core/engines/maxwell_dma.h43
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/gpu_thread.cpp4
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/macro_interpreter.cpp6
-rw-r--r--src/video_core/memory_manager.cpp6
-rw-r--r--src/video_core/memory_manager.h31
-rw-r--r--src/video_core/rasterizer_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp59
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp14
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp1
-rw-r--r--src/video_core/shader/shader_ir.cpp7
-rw-r--r--src/video_core/shader/shader_ir.h41
-rw-r--r--src/video_core/shader/track.cpp12
-rw-r--r--src/video_core/textures/astc.cpp12
38 files changed, 686 insertions, 322 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6821f275d..1e010e4da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
+ engines/engine_upload.cpp
+ engines/engine_upload.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/kepler_compute.cpp
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 000000000..082a40cd9
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager, Registers& regs)
+ : regs{regs}, memory_manager{memory_manager} {}
+
+State::~State() = default;
+
+void State::ProcessExec(const bool is_linear) {
+ write_offset = 0;
+ copy_size = regs.line_length_in * regs.line_count;
+ inner_buffer.resize(copy_size);
+ this->is_linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+ const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+ std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+ write_offset += sub_copy_size;
+ if (!is_last_call) {
+ return;
+ }
+ const GPUVAddr address{regs.dest.Address()};
+ if (is_linear) {
+ memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+ } else {
+ UNIMPLEMENTED_IF(regs.dest.z != 0);
+ UNIMPLEMENTED_IF(regs.dest.depth != 1);
+ UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+ UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+ const std::size_t dst_size = Tegra::Texture::CalculateSize(
+ true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+ tmp_buffer.resize(dst_size);
+ memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+ Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+ regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+ tmp_buffer.data());
+ memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+ }
+}
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 000000000..ef4f5839a
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,73 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+ u32 line_length_in;
+ u32 line_count;
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 pitch;
+ union {
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ };
+ u32 width;
+ u32 height;
+ u32 depth;
+ u32 z;
+ u32 x;
+ u32 y;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+ }
+
+ u32 BlockWidth() const {
+ return 1U << block_width.Value();
+ }
+
+ u32 BlockHeight() const {
+ return 1U << block_height.Value();
+ }
+
+ u32 BlockDepth() const {
+ return 1U << block_depth.Value();
+ }
+ } dest;
+};
+
+class State {
+public:
+ State(MemoryManager& memory_manager, Registers& regs);
+ ~State();
+
+ void ProcessExec(bool is_linear);
+ void ProcessData(u32 data, bool is_last_call);
+
+private:
+ u32 write_offset = 0;
+ u32 copy_size = 0;
+ std::vector<u8> inner_buffer;
+ std::vector<u8> tmp_buffer;
+ bool is_linear = false;
+ Registers& regs;
+ MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13..45f59a4d9 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
#define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d950460..7404a8163 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ MemoryManager& memory_manager)
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+ memory_manager,
+ regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) {
+ case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+ upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ break;
+ }
+ case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ }
+ break;
+ }
case KEPLER_COMPUTE_REG_INDEX(launch):
- // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
- // kernels)
- UNREACHABLE_MSG("Compute shaders are not implemented");
+ ProcessLaunch();
break;
default:
break;
}
}
+void KeplerCompute::ProcessLaunch() {
+
+ const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+ memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+ LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+ const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+ LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf432..5250b8d9b 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@
#include <array>
#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
+namespace Core {
+class System;
+}
+
namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final {
public:
- explicit KeplerCompute(MemoryManager& memory_manager);
+ explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ MemoryManager& memory_manager);
~KeplerCompute();
static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0xAF);
+ INSERT_PADDING_WORDS(0x60);
+
+ Upload::Registers upload;
+
+ struct {
+ union {
+ BitField<0, 1, u32> linear;
+ };
+ } exec_upload;
+
+ u32 data_upload;
+
+ INSERT_PADDING_WORDS(0x3F);
+
+ struct {
+ u32 address;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+ }
+ } launch_desc_loc;
+
+ INSERT_PADDING_WORDS(0x1);
u32 launch;
- INSERT_PADDING_WORDS(0xC48);
+ INSERT_PADDING_WORDS(0x4A7);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } tsc;
+
+ INSERT_PADDING_WORDS(0x3);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } tic;
+
+ INSERT_PADDING_WORDS(0x22);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } code_loc;
+
+ INSERT_PADDING_WORDS(0x3FE);
+
+ u32 texture_const_buffer_index;
+
+ INSERT_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
+
+ struct LaunchParams {
+ static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+ INSERT_PADDING_WORDS(0x8);
+
+ u32 program_start;
+
+ INSERT_PADDING_WORDS(0x2);
+
+ BitField<30, 1, u32> linked_tsc;
+
+ BitField<0, 31, u32> grid_dim_x;
+ union {
+ BitField<0, 16, u32> grid_dim_y;
+ BitField<16, 16, u32> grid_dim_z;
+ };
+
+ INSERT_PADDING_WORDS(0x3);
+
+ BitField<0, 16, u32> shared_alloc;
+
+ BitField<0, 31, u32> block_dim_x;
+ union {
+ BitField<0, 16, u32> block_dim_y;
+ BitField<16, 16, u32> block_dim_z;
+ };
+
+ union {
+ BitField<0, 8, u32> const_buffer_enable_mask;
+ BitField<29, 2, u32> cache_layout;
+ } memory_config;
+
+ INSERT_PADDING_WORDS(0x8);
+
+ struct {
+ u32 address_low;
+ union {
+ BitField<0, 8, u32> address_high;
+ BitField<15, 17, u32> size;
+ };
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+ address_low);
+ }
+ } const_buffer_config[8];
+
+ union {
+ BitField<0, 20, u32> local_pos_alloc;
+ BitField<27, 5, u32> barrier_alloc;
+ };
+
+ union {
+ BitField<0, 20, u32> local_neg_alloc;
+ BitField<24, 5, u32> gpr_alloc;
+ };
+
+ INSERT_PADDING_WORDS(0x11);
+ } launch_description;
+
+ struct {
+ u32 write_offset = 0;
+ u32 copy_size = 0;
+ std::vector<u8> inner_buffer;
+ } state{};
+
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
+ static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+ "KeplerCompute LaunchParams has wrong size");
+
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
private:
+ Core::System& system;
+ VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
+ Upload::State upload_state;
+
+ void ProcessLaunch();
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
+ static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3..0561f676c 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
namespace Tegra::Engines {
-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+ : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): {
- ProcessExec();
+ upload_state.ProcessExec(regs.exec.linear != 0);
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
- ProcessData(method_call.argument, method_call.IsLastCall());
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ }
break;
}
}
}
-void KeplerMemory::ProcessExec() {
- state.write_offset = 0;
- state.copy_size = regs.line_length_in * regs.line_count;
- state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
- const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
- std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
- state.write_offset += sub_copy_size;
- if (is_last_call) {
- const GPUVAddr address{regs.dest.Address()};
- if (regs.exec.linear != 0) {
- memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
- } else {
- UNIMPLEMENTED_IF(regs.dest.z != 0);
- UNIMPLEMENTED_IF(regs.dest.depth != 1);
- UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
- UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
- const std::size_t dst_size = Tegra::Texture::CalculateSize(
- true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
- std::vector<u8> tmp_buffer(dst_size);
- memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
- Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
- regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
- state.inner_buffer.data(), tmp_buffer.data());
- memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
- }
- system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
- }
-}
-
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad..f3bc675a9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
class MemoryManager;
}
-namespace VideoCore {
-class RasterizerInterface;
-}
-
namespace Tegra::Engines {
+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
#define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final {
public:
- KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ KeplerMemory(Core::System& system, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
struct {
INSERT_PADDING_WORDS(0x60);
- u32 line_length_in;
- u32 line_count;
-
- struct {
- u32 address_high;
- u32 address_low;
- u32 pitch;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- };
- u32 width;
- u32 height;
- u32 depth;
- u32 z;
- u32 x;
- u32 y;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
-
- u32 BlockWidth() const {
- return 1U << block_width.Value();
- }
-
- u32 BlockHeight() const {
- return 1U << block_height.Value();
- }
-
- u32 BlockDepth() const {
- return 1U << block_depth.Value();
- }
- } dest;
+ Upload::Registers upload;
struct {
union {
@@ -96,28 +63,17 @@ public:
};
} regs{};
- struct {
- u32 write_offset = 0;
- u32 copy_size = 0;
- std::vector<u8> inner_buffer;
- } state{};
-
private:
Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
-
- void ProcessExec();
- void ProcessData(u32 data, bool is_last_call);
+ Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f2..39968d403 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
- *this} {
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+ macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitializeRegisterDefaults();
}
@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
// Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
// needed for ARMS.
- for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
- regs.viewports[viewport].depth_range_near = 0.0f;
- regs.viewports[viewport].depth_range_far = 1.0f;
+ for (auto& viewport : regs.viewports) {
+ viewport.depth_range_near = 0.0f;
+ viewport.depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.blend.equation_a = Regs::Blend::Equation::Add;
regs.blend.factor_source_a = Regs::Blend::Factor::One;
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
- for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
- regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
- regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
- regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
- regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
- regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
- regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+ for (auto& blend : regs.independent_blend) {
+ blend.equation_rgb = Regs::Blend::Equation::Add;
+ blend.factor_source_rgb = Regs::Blend::Factor::One;
+ blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+ blend.equation_a = Regs::Blend::Equation::Add;
+ blend.factor_source_a = Regs::Blend::Factor::One;
+ blend.factor_dest_a = Regs::Blend::Factor::Zero;
}
regs.stencil_front_op_fail = Regs::StencilOp::Keep;
regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
// TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
// default of enabled fixes rendering here.
- for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
- regs.color_mask[color_mask].R.Assign(1);
- regs.color_mask[color_mask].G.Assign(1);
- regs.color_mask[color_mask].B.Assign(1);
- regs.color_mask[color_mask].A.Assign(1);
+ for (auto& color_mask : regs.color_mask) {
+ color_mask.R.Assign(1);
+ color_mask.G.Assign(1);
+ color_mask.B.Assign(1);
+ color_mask.A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
// Vertex buffer
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
- method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+ method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
- method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+ method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
- method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+ method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessSyncPoint();
break;
}
+ case MAXWELL3D_REG_INDEX(exec_upload): {
+ upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ break;
+ }
+ case MAXWELL3D_REG_INDEX(data_upload): {
+ const bool is_last_call = method_call.IsLastCall();
+ upload_state.ProcessData(method_call.argument, is_last_call);
+ if (is_last_call) {
+ dirty_flags.OnMemoryWrite();
+ }
+ break;
+ }
default:
break;
}
@@ -430,7 +442,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const auto a_type = tic_entry.a_type.Value();
// TODO(Subv): Different data types for separate components are not supported
- ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+ DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
return tic_entry;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 85d309d9b..48e4fec33 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
#include <array>
#include <bitset>
+#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -14,6 +15,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
#include "video_core/textures/texture.h"
@@ -32,6 +34,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@@ -580,7 +588,18 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x69);
+ INSERT_PADDING_WORDS(0x17);
+
+ Upload::Registers upload;
+ struct {
+ union {
+ BitField<0, 1, u32> linear;
+ };
+ } exec_upload;
+
+ u32 data_upload;
+
+ INSERT_PADDING_WORDS(0x44);
struct {
union {
@@ -1089,6 +1108,7 @@ public:
} regs{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
+ static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
struct State {
struct ConstBufferInfo {
@@ -1176,6 +1196,8 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter;
+ Upload::State upload_state;
+
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1219,6 +1241,9 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
- const std::size_t copy_size = regs.x_count * regs.y_count;
+ if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ ASSERT(regs.src_params.size_z == 1);
+ // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const std::size_t src_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+ regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
- auto source_ptr{memory_manager.GetPointer(source)};
- auto dst_ptr{memory_manager.GetPointer(dest)};
+ const std::size_t dst_size = regs.dst_pitch * regs.y_count;
- if (!source_ptr) {
- LOG_ERROR(HW_GPU, "source_ptr is invalid");
- return;
- }
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
- if (!dst_ptr) {
- LOG_ERROR(HW_GPU, "dst_ptr is invalid");
- return;
- }
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
- const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
- // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
- // copying.
- rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- // We have to invalidate the destination region to evict any outdated surfaces from the
- // cache. We do this before actually writing the new data because the destination address
- // might contain a dirty surface that will have to be written back to memory.
- rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
- };
+ Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
+ regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+ write_buffer.data(), regs.src_params.BlockHeight(),
+ regs.src_params.pos_x, regs.src_params.pos_y);
- if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
- ASSERT(regs.src_params.size_z == 1);
- // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+ } else {
+ ASSERT(regs.dst_params.BlockDepth() == 1);
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
- FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
- copy_size * src_bytes_per_pixel);
+ const std::size_t dst_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+ regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
- regs.src_params.BlockHeight(), regs.src_params.pos_x,
- regs.src_params.pos_y);
- } else {
- ASSERT(regs.dst_params.size_z == 1);
- ASSERT(regs.src_pitch == regs.x_count);
+ const std::size_t dst_layer_size = Texture::CalculateSize(
+ true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+ regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
- const u32 src_bpp = regs.src_pitch / regs.x_count;
+ const std::size_t src_size = regs.src_pitch * regs.y_count;
- FlushAndInvalidate(regs.src_pitch * regs.y_count,
- regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
+
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
- src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+ src_bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+ read_buffer.data(), regs.dst_params.BlockHeight());
+
+ memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b649842..e5942f671 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
+#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
namespace Tegra::Engines {
+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
class MaxwellDMA final {
public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+ enum class ComponentMode : u32 {
+ Src0 = 0,
+ Src1 = 1,
+ Src2 = 2,
+ Src3 = 3,
+ Const0 = 4,
+ Const1 = 5,
+ Zero = 6,
+ };
+
enum class CopyMode : u32 {
None = 0,
Unk1 = 1,
@@ -128,7 +144,26 @@ public:
u32 x_count;
u32 y_count;
- INSERT_PADDING_WORDS(0xBB);
+ INSERT_PADDING_WORDS(0xB8);
+
+ u32 const0;
+ u32 const1;
+ union {
+ BitField<0, 4, ComponentMode> component0;
+ BitField<4, 4, ComponentMode> component1;
+ BitField<8, 4, ComponentMode> component2;
+ BitField<12, 4, ComponentMode> component3;
+ BitField<16, 2, u32> component_size;
+ BitField<20, 3, u32> src_num_components;
+ BitField<24, 3, u32> dst_num_components;
+
+ u32 SrcBytePerPixel() const {
+ return src_num_components.Value() * component_size.Value();
+ }
+ u32 DstBytePerPixel() const {
+ return dst_num_components.Value() * component_size.Value();
+ }
+ } swizzle_config;
Parameters dst_params;
@@ -149,6 +184,9 @@ private:
MemoryManager& memory_manager;
+ std::vector<u8> read_buffer;
+ std::vector<u8> write_buffer;
+
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ff..52706505b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+ kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+ kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
GPU::~GPU() = default;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c9a2077de..1e2ff46b0 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
- } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+ } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
UNREACHABLE();
@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
// Wait for the GPU to be idle (all commands to be executed)
{
MICROPROFILE_SCOPE(GPU_wait);
- std::unique_lock<std::mutex> lock{synchronization_mutex};
+ std::unique_lock lock{synchronization_mutex};
synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
}
}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cc14527c7..05a168a72 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -81,12 +81,6 @@ struct CommandDataContainer {
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
- CommandDataContainer& operator=(const CommandDataContainer& t) {
- data = std::move(t.data);
- fence = t.fence;
- return *this;
- }
-
CommandData data;
u64 fence{};
};
@@ -109,7 +103,7 @@ struct SynchState final {
void TrySynchronize() {
if (IsSynchronized()) {
- std::lock_guard<std::mutex> lock{synchronization_mutex};
+ std::lock_guard lock{synchronization_mutex};
synchronization_condition.notify_one();
}
}
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 524d9ea5a..fbea107ca 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -118,10 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
static_cast<u32>(opcode.operation.Value()));
}
- if (opcode.is_exit) {
+ // An instruction with the Exit flag will not actually
+ // cause an exit if it's executed inside a delay slot.
+ if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction
- // Note: Executing an exit during a branch delay slot will cause the instruction at the
- // branch target to be executed before exiting.
Step(offset, true);
return false;
}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 6c98c6701..5d8d126c1 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
UpdatePageTableForVMA(initial_vma);
}
+MemoryManager::~MemoryManager() = default;
+
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
const u64 aligned_size{Common::AlignUp(size, page_size)};
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
return {};
}
-bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
const GPUVAddr end = start + size;
const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
- const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+ const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
return range == size;
}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index e4f0c4bd6..113f9d8f3 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -47,7 +47,8 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
- MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+ explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+ ~MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -65,18 +66,18 @@ public:
u8* GetPointer(GPUVAddr addr);
const u8* GetPointer(GPUVAddr addr) const;
- // Returns true if the block is continous in host memory, false otherwise
- bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+ /// Returns true if the block is continuous in host memory, false otherwise
+ bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
- * GPU Memory. It's important to use these when GPU memory may not be continous
+ * GPU Memory. It's important to use these when GPU memory may not be continuous
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
- void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
- void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+ void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -88,9 +89,9 @@ public:
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
- void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
- void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
- void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+ void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
@@ -111,10 +112,10 @@ private:
/**
* Maps an unmanaged host memory pointer at a given address.
*
- * @param target The guest address to start the mapping at.
- * @param memory The memory to be mapped.
- * @param size Size of the mapping.
- * @param state MemoryState tag to attach to the VMA.
+ * @param target The guest address to start the mapping at.
+ * @param memory The memory to be mapped.
+ * @param size Size of the mapping in bytes.
+ * @param backing_addr The base address of the range to back this mapping.
*/
VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
@@ -124,7 +125,7 @@ private:
/// Converts a VMAHandle to a mutable VMAIter.
VMAIter StripIterConstness(const VMAHandle& iter);
- /// Marks as the specfied VMA as allocated.
+ /// Marks as the specified VMA as allocated.
VMAIter Allocate(VMAIter vma);
/**
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 185b6f414..0c4ea1494 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -37,9 +37,6 @@ public:
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
- /// Wriets any cached resources back to memory
- virtual void Flush() = 0;
-
/// Sets whether the cached object should be considered registered
void SetIsRegistered(bool registered) {
is_registered = registered;
@@ -159,6 +156,8 @@ protected:
return ++modified_ticks;
}
+ virtual void FlushObjectInner(const T& object) = 0;
+
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
std::lock_guard lock{mutex};
@@ -166,7 +165,7 @@ protected:
if (!object->IsDirty()) {
return;
}
- object->Flush();
+ FlushObjectInner(object);
object->MarkAsModified(false, *this);
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index fc33aa433..f9247a40e 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -42,9 +42,6 @@ public:
return alignment;
}
- // We do not have to flush this cache as things in it are never modified by us.
- void Flush() override {}
-
private:
VAddr cpu_addr{};
std::size_t size{};
@@ -75,6 +72,9 @@ public:
protected:
void AlignBuffer(std::size_t alignment);
+ // We do not have to flush this cache as things in it are never modified by us.
+ void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
private:
OGLStreamBuffer stream_buffer;
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 196e6e278..2d467a240 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -46,7 +46,7 @@ public:
/// Reloads the global region from guest memory
void Reload(u32 size_);
- void Flush() override;
+ void Flush();
private:
VAddr cpu_addr{};
@@ -65,6 +65,11 @@ public:
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
+protected:
+ void FlushObjectInner(const GlobalRegion& object) override {
+ object->Flush();
+ }
+
private:
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 3cc945235..dbd8049f5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
- params.index_buffer_offset =
- primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+ params.index_buffer_offset = primitive_assembler.MakeQuadArray(
+ regs.vertex_buffer.first, regs.vertex_buffer.count);
}
return params;
}
@@ -1135,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() {
void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
- state.point.size = regs.point_size;
+ // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
+ // in OpenGL).
+ state.point.size = std::max(1.0f, regs.point_size);
}
void RasterizerOpenGL::SyncPolygonOffset() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5a25f5b37..a7681902e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
-void CachedSurface::LoadGLBuffer() {
+void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
- gl_buffer.resize(params.max_mip_level);
+ auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+ if (gl_buffer.size() < params.max_mip_level)
+ gl_buffer.resize(params.max_mip_level);
for (u32 i = 0; i < params.max_mip_level; i++)
gl_buffer[i].resize(params.GetMipmapSizeGL(i));
if (params.is_tiled) {
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer() {
+void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
+ auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
// OpenGL temporary buffer needs to be big enough to store raw texture size
- gl_buffer.resize(1);
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
}
}
-void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
- GLuint draw_fb_handle) {
+void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+ GLuint read_fb_handle, GLuint draw_fb_handle) {
const auto& rect{params.GetRect(mip_map)};
+ auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+
// Load data from memory to the surface
const auto x0 = static_cast<GLint>(rect.left);
const auto y0 = static_cast<GLint>(rect.bottom);
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
- std::size_t start = buffer_offset;
for (std::size_t face = 0; face < params.depth; ++face) {
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
static_cast<GLsizei>(rect.GetWidth()),
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
+ GLuint read_fb_handle, GLuint draw_fb_handle) {
MICROPROFILE_SCOPE(OpenGL_TextureUL);
for (u32 i = 0; i < params.max_mip_level; i++)
- UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+ UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
}
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
- surface->LoadGLBuffer();
- surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+ surface->LoadGLBuffer(temporal_memory);
+ surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
surface->MarkForReload(false);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index db280dbb3..6263ef3e7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -355,6 +355,12 @@ namespace OpenGL {
class RasterizerOpenGL;
+// This is used to store temporary big buffers,
+// instead of creating/destroying all the time
+struct RasterizerTemporaryMemory {
+ std::vector<std::vector<u8>> gl_buffer;
+};
+
class CachedSurface final : public RasterizerCacheObject {
public:
explicit CachedSurface(const SurfaceParams& params);
@@ -371,10 +377,6 @@ public:
return memory_size;
}
- void Flush() override {
- FlushGLBuffer();
- }
-
const OGLTexture& Texture() const {
return texture;
}
@@ -397,11 +399,12 @@ public:
}
// Read/Write data in Switch memory to/from gl_buffer
- void LoadGLBuffer();
- void FlushGLBuffer();
+ void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
+ void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
// Upload data in gl_buffer to this surface's texture
- void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+ void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
+ GLuint draw_fb_handle);
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
@@ -429,13 +432,13 @@ public:
}
private:
- void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+ void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+ GLuint read_fb_handle, GLuint draw_fb_handle);
void EnsureTextureDiscrepantView();
OGLTexture texture;
OGLTexture discrepant_view;
- std::vector<std::vector<u8>> gl_buffer;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};
@@ -473,6 +476,11 @@ public:
void SignalPreDrawCall();
void SignalPostDrawCall();
+protected:
+ void FlushObjectInner(const Surface& object) override {
+ object->FlushGLBuffer(temporal_memory);
+ }
+
private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -519,6 +527,8 @@ private:
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
+ RasterizerTemporaryMemory temporal_memory;
+
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b1c8f7c35..f700dc89a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device)
- : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
+ : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a332087f8..31b979987 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -57,9 +57,6 @@ public:
return shader_length;
}
- // We do not have to flush this cache as things in it are never modified by us.
- void Flush() override {}
-
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;
@@ -123,6 +120,10 @@ public:
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
+protected:
+ // We do not have to flush this cache as things in it are never modified by us.
+ void FlushObjectInner(const Shader& object) override {}
+
private:
std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ef1a1995f..1a62795e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -871,17 +871,6 @@ private:
return {};
}
- std::string Composite(Operation operation) {
- std::string value = "vec4(";
- for (std::size_t i = 0; i < 4; ++i) {
- value += Visit(operation[i]);
- if (i < 3)
- value += ", ";
- }
- value += ')';
- return value;
- }
-
template <Type type>
std::string Add(Operation operation) {
return GenerateBinaryInfix(operation, "+", type, type, type);
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index ed7afc4a0..fba9c594a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
- : system{system}, precompiled_cache_virtual_file_offset{0} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- const auto entry = LoadDecompiledEntry();
+ auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
@@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- std::vector<u8> code(code_size);
+ std::string code(code_size, '\0');
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
- entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+ entry.code = std::move(code);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
@@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
- u8 is_indirect{};
+ bool is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
- entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+ entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
}
u32 samplers_count{};
@@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 offset{};
u64 index{};
u32 type{};
- u8 is_array{};
- u8 is_shadow{};
- u8 is_bindless{};
+ bool is_array{};
+ bool is_shadow{};
+ bool is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
- entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
- static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type),
- is_array != 0, is_shadow != 0, is_bindless != 0);
+ entry.entries.samplers.emplace_back(
+ static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
}
u32 global_memory_count{};
@@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
- u8 is_read{};
- u8 is_written{};
+ bool is_read{};
+ bool is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
- is_written != 0);
+ entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
+ is_written);
}
for (auto& clip_distance : entry.entries.clip_distances) {
- u8 clip_distance_raw{};
- if (!LoadObjectFromPrecompiled(clip_distance_raw))
+ if (!LoadObjectFromPrecompiled(clip_distance)) {
return {};
- clip_distance = clip_distance_raw != 0;
+ }
}
u64 shader_length{};
@@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
return false;
}
}
@@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(sampler.IsArray()) ||
+ !SaveObjectToPrecompiled(sampler.IsShadow()) ||
+ !SaveObjectToPrecompiled(sampler.IsBindless())) {
return false;
}
}
@@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
- if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
+ if (!SaveObjectToPrecompiled(clip_distance)) {
return false;
}
}
@@ -475,7 +473,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
- ASSERT(usages.find(usage) == usages.end());
+ if (usages.find(usage) != usages.end()) {
+ // Skip this variant since the shader is already stored.
+ return;
+ }
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 0142b2e3b..2da0a4a23 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -70,14 +70,14 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
- std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+ std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
- std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+ std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
}
@@ -162,6 +162,7 @@ struct ShaderDiskCacheDump {
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
+ ~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -259,20 +260,35 @@ private:
return SaveArrayToPrecompiled(&object, 1);
}
+ bool SaveObjectToPrecompiled(bool object) {
+ const auto value = static_cast<u8>(object);
+ return SaveArrayToPrecompiled(&value, 1);
+ }
+
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
- // Copre system
+ bool LoadObjectFromPrecompiled(bool& object) {
+ u8 value;
+ const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
+ if (!read_ok) {
+ return false;
+ }
+
+ object = value != 0;
+ return true;
+ }
+
+ // Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
+ // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset;
+ std::size_t precompiled_cache_virtual_file_offset = 0;
// The cache has been loaded at boot
bool tried_to_load{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 6abf948f8..7ab0b4553 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
};
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
- ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
+ const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
@@ -76,7 +76,7 @@ void main() {
}
})";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
};
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
@@ -107,7 +107,7 @@ void main() {
execute_geometry();
};)";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
}
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
@@ -172,7 +172,7 @@ void main() {
}
)";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 95b773135..ed7b5cff0 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -126,6 +126,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ return GL_TRIANGLE_FAN;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 08b786aad..3edf460df 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -49,9 +49,6 @@ public:
return alignment;
}
- // We do not have to flush this cache as things in it are never modified by us.
- void Flush() override {}
-
private:
VAddr cpu_addr{};
std::size_t size{};
@@ -87,6 +84,10 @@ public:
return buffer_handle;
}
+protected:
+ // We do not have to flush this cache as things in it are never modified by us.
+ void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
private:
void AlignBuffer(std::size_t alignment);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 23d9b10db..a11000f6b 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -315,7 +315,6 @@ private:
constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
"overflow"};
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
- const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8b574d4e5..5b033126d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -540,7 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
- const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e4eb0dfd9..196235e5d 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
+ : program_code{program_code}, main_offset{main_offset} {
+ Decode();
+}
+
+ShaderIR::~ShaderIR() = default;
+
Node ShaderIR::StoreNode(NodeData&& node_data) {
auto store = std::make_unique<NodeData>(node_data);
const Node node = store.get();
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 65f1e1de9..e4253fdb3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -328,40 +328,31 @@ struct MetaTexture {
u32 element{};
};
-inline constexpr MetaArithmetic PRECISE = {true};
-inline constexpr MetaArithmetic NO_PRECISE = {false};
+constexpr MetaArithmetic PRECISE = {true};
+constexpr MetaArithmetic NO_PRECISE = {false};
using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
public:
- template <typename... T>
- explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
+ explicit OperationNode(OperationCode code) : code{code} {}
- template <typename... T>
- explicit constexpr OperationNode(OperationCode code, Meta&& meta)
- : code{code}, meta{std::move(meta)} {}
+ explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
template <typename... T>
- explicit constexpr OperationNode(OperationCode code, const T*... operands)
+ explicit OperationNode(OperationCode code, const T*... operands)
: OperationNode(code, {}, operands...) {}
template <typename... T>
- explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
- : code{code}, meta{std::move(meta)} {
-
- auto operands_list = {operands_...};
- for (auto& operand : operands_list) {
- operands.push_back(operand);
- }
- }
+ explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+ : code{code}, meta{std::move(meta)}, operands{operands_...} {}
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
: code{code}, meta{meta}, operands{std::move(operands)} {}
explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
- : code{code}, meta{}, operands{std::move(operands)} {}
+ : code{code}, operands{std::move(operands)} {}
OperationCode GetCode() const {
return code;
@@ -567,11 +558,8 @@ private:
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
- : program_code{program_code}, main_offset{main_offset} {
-
- Decode();
- }
+ explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
+ ~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
@@ -814,11 +802,12 @@ private:
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut, bool sets_cc);
- Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+ Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+ std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+ std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
+ s64 cursor) const;
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
Node addr_register,
@@ -835,12 +824,10 @@ private:
return StoreNode(OperationNode(code, std::move(meta), operands...));
}
- template <typename... T>
Node Operation(OperationCode code, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(operands)));
}
- template <typename... T>
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
}
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 4505667ff..19ede1eb9 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
for (; cursor >= 0; --cursor) {
const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
- if (operation->GetCode() == operation_code)
+ if (operation->GetCode() == operation_code) {
return {node, cursor};
+ }
}
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
- if (found)
+ if (found) {
return {found, cursor};
+ }
}
}
return {};
}
} // namespace
-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
// Cbuf found, but it has to be immediate
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
// that it uses as operand
const auto [found, found_cursor] =
@@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) {
+ s64 cursor) const {
for (; cursor >= 0; --cursor) {
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
if (!found_node) {
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b508d64e9..a9b8f69af 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -25,8 +25,8 @@
class InputBitStream {
public:
- explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
- : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+ explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
+ : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
~InputBitStream() = default;
@@ -55,12 +55,9 @@ public:
}
private:
- const int m_NumBits;
const unsigned char* m_CurByte;
int m_NextBit = 0;
int m_BitsRead = 0;
-
- bool done = false;
};
class OutputBitStream {
@@ -114,7 +111,6 @@ private:
const int m_NumBits;
unsigned char* m_CurByte;
int m_NextBit = 0;
- int m_BitsRead = 0;
bool done = false;
};
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
+ std::size_t depth_offset = 0;
std::vector<uint8_t> outData(height * width * depth * 4);
for (uint32_t k = 0; k < depth; k++) {
for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
uint32_t decompWidth = std::min(block_width, width - i);
uint32_t decompHeight = std::min(block_height, height - j);
- uint8_t* outRow = outData.data() + (j * width + i) * 4;
+ uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
for (uint32_t jj = 0; jj < decompHeight; jj++) {
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
}
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
blockIdx++;
}
}
+ depth_offset += height * width * 4;
}
return outData;