summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/engines/fermi_2d.cpp15
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp3
-rw-r--r--src/video_core/engines/kepler_compute.h3
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp66
-rw-r--r--src/video_core/engines/maxwell_3d.h17
-rw-r--r--src/video_core/engines/maxwell_dma.cpp1
-rw-r--r--src/video_core/engines/maxwell_dma.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h1
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/rasterizer_cache.h18
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp40
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp138
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h91
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp28
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp116
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h87
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
-rw-r--r--src/video_core/surface.cpp2
31 files changed, 696 insertions, 140 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index b5a327936..c1ae83f4d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -106,6 +106,8 @@ add_library(video_core STATIC
if (ENABLE_VULKAN)
target_sources(video_core PRIVATE
renderer_vulkan/declarations.h
+ renderer_vulkan/vk_buffer_cache.cpp
+ renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_memory_manager.cpp
@@ -113,7 +115,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
- renderer_vulkan/vk_scheduler.h)
+ renderer_vulkan/vk_scheduler.h
+ renderer_vulkan/vk_stream_buffer.cpp
+ renderer_vulkan/vk_stream_buffer.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ec1a57226..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() {
const u32 src_blit_y2{
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
- const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
- regs.blit_dst_x + regs.blit_dst_width,
- regs.blit_dst_y + regs.blit_dst_height};
+ const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+ const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+ regs.blit_dst_x + regs.blit_dst_width,
+ regs.blit_dst_y + regs.blit_dst_height};
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
UNIMPLEMENTED();
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
#pragma once
#include <array>
-#include "common/assert.h"
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/assert.h"
#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
#pragma once
#include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f680c2ad9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = system.GetGPUDebugContext();
+ const u32 method = method_call.method;
+
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
- ASSERT(method_call.method == executing_macro + 1);
+ ASSERT(method == executing_macro + 1);
}
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
- if (method_call.method >= MacroRegistersStart) {
+ if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method_call.method % 2) == 0,
+ ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
- executing_macro = method_call.method;
+ executing_macro = method;
}
macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
return;
}
- ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+ ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
- if (regs.reg_array[method_call.method] != method_call.argument) {
- regs.reg_array[method_call.method] = method_call.argument;
+ if (regs.reg_array[method] != method_call.argument) {
+ regs.reg_array[method] = method_call.argument;
// Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
- if (method_call.method >= first_rt_reg &&
- method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
- const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
- dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+ if (method >= first_rt_reg &&
+ method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+ const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+ dirty_flags.color_buffer.set(rt_index);
}
// Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
- if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
- method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
- method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
- (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
- method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+ if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+ method == MAXWELL3D_REG_INDEX(zeta_width) ||
+ method == MAXWELL3D_REG_INDEX(zeta_height) ||
+ (method >= MAXWELL3D_REG_INDEX(zeta) &&
+ method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true;
}
// Shader
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
- if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
- method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+ if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+ method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true;
}
// Vertex format
- if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
- method_call.method <
- MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+ if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+ method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true;
}
// Vertex buffer
- if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
- method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
- dirty_flags.vertex_array |=
- 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
- } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
- method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
- dirty_flags.vertex_array |=
- 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
- } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
- method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
- dirty_flags.vertex_array |=
- 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+ if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+ method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+ dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+ } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+ method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+ dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+ } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+ method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+ dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
- switch (method_call.method) {
+ switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument);
break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0e3873ffd..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
#pragma once
#include <array>
+#include <bitset>
#include <unordered_map>
#include <vector>
+
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -503,7 +505,7 @@ public:
f32 translate_z;
INSERT_PADDING_WORDS(2);
- MathUtil::Rectangle<s32> GetRect() const {
+ Common::Rectangle<s32> GetRect() const {
return {
GetX(), // left
GetY() + GetHeight(), // top
@@ -1094,19 +1096,18 @@ public:
MemoryManager& memory_manager;
struct DirtyFlags {
- u8 color_buffer = 0xFF;
- bool zeta_buffer = true;
-
- bool shaders = true;
+ std::bitset<8> color_buffer{0xFF};
+ std::bitset<32> vertex_array{0xFFFFFFFF};
bool vertex_attrib_format = true;
- u32 vertex_array = 0xFFFFFFFF;
+ bool zeta_buffer = true;
+ bool shaders = true;
void OnMemoryWrite() {
- color_buffer = 0xFF;
zeta_buffer = true;
shaders = true;
- vertex_array = 0xFFFFFFFF;
+ color_buffer.set();
+ vertex_array.set();
}
};
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 529a14ec7..0474c7ba3 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index cf75aeb12..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 252592edd..c7eb15b6a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
#include <bitset>
#include <optional>
-#include <string>
#include <tuple>
#include <vector>
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0f5bfdcbf..6313702f2 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -100,7 +100,7 @@ struct FramebufferConfig {
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags;
- MathUtil::Rectangle<int> crop_rect;
+ Common::Rectangle<int> crop_rect;
};
namespace Engines {
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
return ++modified_ticks;
}
+ /// Flushes the specified object, updating appropriate cache state as needed
+ void FlushObject(const T& object) {
+ if (!object->IsDirty()) {
+ return;
+ }
+ object->Flush();
+ object->MarkAsModified(false, *this);
+ }
+
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
return objects;
}
- /// Flushes the specified object, updating appropriate cache state as needed
- void FlushObject(const T& object) {
- if (!object->IsDirty()) {
- return;
- }
- object->Flush();
- object->MarkAsModified(false, *this);
- }
-
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b2a223705..6a1dc9cf6 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
- const MathUtil::Rectangle<u32>& src_rect,
- const MathUtil::Rectangle<u32>& dst_rect) {
+ const Common::Rectangle<u32>& src_rect,
+ const Common::Rectangle<u32>& dst_rect) {
return false;
}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 12d876120..321d9dd3d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
- buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+ : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
+ screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+ gpu.dirty_flags.vertex_array.set();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
- if (!gpu.dirty_flags.vertex_array)
+ if (gpu.dirty_flags.vertex_array.none())
return;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (~gpu.dirty_flags.vertex_array & (1u << index))
+ if (!gpu.dirty_flags.vertex_array[index])
continue;
const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
}
}
- gpu.dirty_flags.vertex_array = 0;
+ gpu.dirty_flags.vertex_array.reset();
}
DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
- if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
- !gpu.dirty_flags.zeta_buffer) {
+ if (fb_config_state == current_framebuffer_config_state &&
+ gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
- bool invalidate = buffer_cache.Map(buffer_size);
+ const bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+ gpu.dirty_flags.vertex_array.set();
}
const GLuint vao = SetupVertexFormat();
@@ -738,19 +738,11 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
- // Execute draw call
+ res_cache.SignalPreDrawCall();
params.DispatchDraw();
-
- // Disable scissor test
- state.viewports[0].scissor.enabled = false;
+ res_cache.SignalPostDrawCall();
accelerate_draw = AccelDraw::Disabled;
-
- // Unbind textures for potential future use as framebuffer attachments
- for (auto& texture_unit : state.texture_units) {
- texture_unit.Unbind();
- }
- state.Apply();
}
void RasterizerOpenGL::FlushAll() {}
@@ -779,8 +771,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
- const MathUtil::Rectangle<u32>& src_rect,
- const MathUtil::Rectangle<u32>& dst_rect) {
+ const Common::Rectangle<u32>& src_rect,
+ const Common::Rectangle<u32>& dst_rect) {
MICROPROFILE_SCOPE(OpenGL_Blits);
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
return true;
@@ -1034,7 +1026,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
for (std::size_t i = 0; i < viewport_count; i++) {
auto& viewport = current_state.viewports[i];
const auto& src = regs.viewports[i];
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+ const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
viewport.x = viewport_rect.left;
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 258d62259..2f0524f85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
- const MathUtil::Rectangle<u32>& src_rect,
- const MathUtil::Rectangle<u32>& dst_rect) override;
+ const Common::Rectangle<u32>& src_rect,
+ const Common::Rectangle<u32>& dst_rect) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 642ccb269..876698b37 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <optional>
#include <glad/glad.h>
#include "common/alignment.h"
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return format;
}
-MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
if (IsPixelFormatASTC(pixel_format)) {
// ASTC formats must stop at the ATSC block size boundary
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
+ memory_size = params.MemorySize();
+ reinterpreted = false;
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
@@ -873,30 +876,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
- if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
- return last_color_buffers[index];
+ if (!gpu.dirty_flags.color_buffer[index]) {
+ return current_color_buffers[index];
}
- gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+ gpu.dirty_flags.color_buffer.reset(index);
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
if (index >= regs.rt_control.count) {
- return last_color_buffers[index] = {};
+ return current_color_buffers[index] = {};
}
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
- return last_color_buffers[index] = {};
+ return current_color_buffers[index] = {};
}
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
- return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+ return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
+ surface->MarkForReload(false);
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -908,18 +912,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
Surface surface{TryGet(params.addr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
- // Use the cached surface as-is
+ // Use the cached surface as-is unless it's not synced with memory
+ if (surface->MustReload())
+ LoadSurface(surface);
return surface;
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
- Unregister(surface);
+ UnregisterSurface(surface);
Register(new_surface);
+ if (new_surface->IsUploaded()) {
+ RegisterReinterpretSurface(new_surface);
+ }
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
- Unregister(surface);
+ UnregisterSurface(surface);
}
}
@@ -973,8 +982,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
}
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
- const MathUtil::Rectangle<u32>& src_rect,
- const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+ const Common::Rectangle<u32>& src_rect,
+ const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
std::size_t cubemap_face = 0) {
@@ -1104,7 +1113,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
void RasterizerCacheOpenGL::FermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
- const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
+ const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1201,4 +1210,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
return {};
}
+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+ u32 height) {
+ for (u32 i = 0; i < params.max_mip_level; i++) {
+ if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+ return {i};
+ }
+ }
+ return {};
+}
+
+static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
+ const std::size_t size = params.LayerMemorySize();
+ VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+ for (u32 i = 0; i < params.depth; i++) {
+ if (start == addr) {
+ return {i};
+ }
+ start += size;
+ }
+ return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+ const Surface blitted_surface) {
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ const std::size_t src_memory_size = src_params.size_in_bytes;
+ const std::optional<u32> level =
+ TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+ if (level.has_value()) {
+ if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+ src_params.height == dst_params.MipHeight(*level) &&
+ src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+ const std::optional<u32> slot =
+ TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+ if (slot.has_value()) {
+ glCopyImageSubData(render_surface->Texture().handle,
+ SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+ blitted_surface->Texture().handle,
+ SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+ dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+ blitted_surface->MarkAsModified(true, cache);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+ const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
+ const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+ if (bound2 > bound1)
+ return true;
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+ const Surface blitted_surface) {
+ const auto& dst_params = blitted_surface->GetSurfaceParams();
+ const auto& src_params = render_surface->GetSurfaceParams();
+ return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+ Surface intersect) {
+ if (IsReinterpretInvalid(triggering_surface, intersect)) {
+ UnregisterSurface(intersect);
+ return false;
+ }
+ if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+ if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+ UnregisterSurface(intersect);
+ return false;
+ }
+ FlushObject(intersect);
+ FlushObject(triggering_surface);
+ intersect->MarkForReload(true);
+ }
+ return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+ if (texception && GLAD_GL_ARB_texture_barrier) {
+ glTextureBarrier();
+ }
+ texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+ for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+ if (current_color_buffers[i] != nullptr) {
+ Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+ if (intersect != nullptr) {
+ PartialReinterpretSurface(current_color_buffers[i], intersect);
+ texception = true;
+ }
+ }
+ }
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 89d733c50..797bbdc9c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {
class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct SurfaceParams {
enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
}
/// Returns the rectangle corresponding to this surface
- MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
+ Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
return offset;
}
+ std::size_t GetMipmapSingleSize(u32 mip_level) const {
+ return InnerMipmapMemorySize(mip_level, false, is_layered);
+ }
+
u32 MipWidth(u32 mip_level) const {
return std::max(1U, width >> mip_level);
}
+ u32 MipWidthGobAligned(u32 mip_level) const {
+ return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+ }
+
u32 MipHeight(u32 mip_level) const {
return std::max(1U, height >> mip_level);
}
@@ -346,6 +355,10 @@ public:
return cached_size_in_bytes;
}
+ std::size_t GetMemorySize() const {
+ return memory_size;
+ }
+
void Flush() override {
FlushGLBuffer();
}
@@ -395,6 +408,26 @@ public:
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
+ void MarkReinterpreted() {
+ reinterpreted = true;
+ }
+
+ bool IsReinterpreted() const {
+ return reinterpreted;
+ }
+
+ void MarkForReload(bool reload) {
+ must_reload = reload;
+ }
+
+ bool MustReload() const {
+ return must_reload;
+ }
+
+ bool IsUploaded() const {
+ return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+ }
+
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -408,6 +441,9 @@ private:
GLenum gl_internal_format{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+ std::size_t memory_size;
+ bool reinterpreted = false;
+ bool must_reload = false;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -430,8 +466,11 @@ public:
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
- const MathUtil::Rectangle<u32>& src_rect,
- const MathUtil::Rectangle<u32>& dst_rect);
+ const Common::Rectangle<u32>& src_rect,
+ const Common::Rectangle<u32>& dst_rect);
+
+ void SignalPreDrawCall();
+ void SignalPostDrawCall();
private:
void LoadSurface(const Surface& surface);
@@ -449,6 +488,10 @@ private:
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
+ // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+ // returns true if the reinterpret was successful, false in case it was not.
+ bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +508,50 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
+ bool texception = false;
+
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
- std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+ std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+ std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
+
+ using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+ using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+ static auto GetReinterpretInterval(const Surface& object) {
+ return SurfaceInterval::right_open(object->GetAddr() + 1,
+ object->GetAddr() + object->GetMemorySize() - 1);
+ }
+
+ // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+ SurfaceIntervalCache reinterpreted_surfaces;
+
+ void RegisterReinterpretSurface(Surface reinterpret_surface) {
+ auto interval = GetReinterpretInterval(reinterpret_surface);
+ reinterpreted_surfaces.insert({interval, reinterpret_surface});
+ reinterpret_surface->MarkReinterpreted();
+ }
+
+ Surface CollideOnReinterpretedSurface(VAddr addr) const {
+ const SurfaceInterval interval{addr};
+ for (auto& pair :
+ boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+ return pair.second;
+ }
+ return nullptr;
+ }
+
+ /// Unregisters an object from the cache
+ void UnregisterSurface(const Surface& object) {
+ if (object->IsReinterpreted()) {
+ auto interval = GetReinterpretInterval(object);
+ reinterpreted_surfaces.erase(interval);
+ }
+ Unregister(object);
+ }
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 81882822b..82fc4d44b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#pragma once
-
#include <cstring>
#include <fmt/format.h>
#include <lz4.h>
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 219f08053..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- textures.data());
+ textures.data() + first);
}
}
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- samplers.data());
+ samplers.data() + first);
}
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 272fc2e8e..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
}
+void RendererOpenGL::AddTelemetryFields() {
+ const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+ const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+ const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+ LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+ LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+ LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+
+ auto& telemetry_session = system.TelemetrySession();
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+ telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+}
+
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
@@ -257,6 +272,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
+ texture.pixel_format = framebuffer.pixel_format;
GLint internal_format;
switch (framebuffer.pixel_format) {
@@ -465,17 +481,7 @@ bool RendererOpenGL::Init() {
glDebugMessageCallback(DebugHandler, nullptr);
}
- const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
- const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
- const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
-
- LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
- LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
- LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
-
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
- Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+ AddTelemetryFields();
if (!GLAD_GL_VERSION_4_3) {
return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 7e13e566b..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture;
- const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
+ const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
@@ -60,6 +60,7 @@ public:
private:
void InitOpenGLObjects();
+ void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -102,7 +103,7 @@ private:
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
- MathUtil::Rectangle<int> framebuffer_crop_rect;
+ Common::Rectangle<int> framebuffer_crop_rect;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..4a33a6c84
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,116 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "core/memory.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+ VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
+ : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
+ const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
+ vk::BufferUsageFlagBits::eIndexBuffer |
+ vk::BufferUsageFlagBits::eUniformBuffer;
+ const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
+ vk::AccessFlagBits::eUniformRead;
+ stream_buffer =
+ std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
+ vk::PipelineStageFlagBits::eAllCommands);
+ buffer_handle = stream_buffer->GetBuffer();
+}
+
+VKBufferCache::~VKBufferCache() = default;
+
+u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
+ bool cache) {
+ const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
+ ASSERT(cpu_addr);
+
+ // Cache management is a big overhead, so only cache entries with a given size.
+ // TODO: Figure out which size is the best for given games.
+ cache &= size >= 2048;
+
+ if (cache) {
+ if (auto entry = TryGet(*cpu_addr); entry) {
+ if (entry->size >= size && entry->alignment == alignment) {
+ return entry->offset;
+ }
+ Unregister(entry);
+ }
+ }
+
+ AlignBuffer(alignment);
+ const u64 uploaded_offset = buffer_offset;
+
+ Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+
+ if (cache) {
+ auto entry = std::make_shared<CachedBufferEntry>();
+ entry->offset = uploaded_offset;
+ entry->size = size;
+ entry->alignment = alignment;
+ entry->addr = *cpu_addr;
+ Register(entry);
+ }
+
+ return uploaded_offset;
+}
+
+u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
+ AlignBuffer(alignment);
+ std::memcpy(buffer_ptr, raw_pointer, size);
+ const u64 uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return uploaded_offset;
+}
+
+std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
+ AlignBuffer(alignment);
+ u8* const uploaded_ptr = buffer_ptr;
+ const u64 uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return {uploaded_ptr, uploaded_offset};
+}
+
+void VKBufferCache::Reserve(std::size_t max_size) {
+ bool invalidate;
+ std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
+ buffer_offset = buffer_offset_base;
+
+ if (invalidate) {
+ InvalidateAll();
+ }
+}
+
+VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
+ return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
+}
+
+void VKBufferCache::AlignBuffer(std::size_t alignment) {
+ // Align the offset, not the mapped pointer
+ const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
+ buffer_ptr += offset_aligned - buffer_offset;
+ buffer_offset = offset_aligned;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..d8e916f31
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKStreamBuffer;
+
+struct CachedBufferEntry final : public RasterizerCacheObject {
+ VAddr GetAddr() const override {
+ return addr;
+ }
+
+ std::size_t GetSizeInBytes() const override {
+ return size;
+ }
+
+ // We do not have to flush this cache as things in it are never modified by us.
+ void Flush() override {}
+
+ VAddr addr;
+ std::size_t size;
+ u64 offset;
+ std::size_t alignment;
+};
+
+class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+public:
+ explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+ VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+ VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+ ~VKBufferCache();
+
+ /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+ /// allocated.
+ u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
+ bool cache = true);
+
+ /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
+ u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+
+ /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+ std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+
+ /// Reserves a region of memory to be used in subsequent upload/reserve operations.
+ void Reserve(std::size_t max_size);
+
+ /// Ensures that the set data is sent to the device.
+ [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
+
+ /// Returns the buffer cache handle.
+ vk::Buffer GetBuffer() const {
+ return buffer_handle;
+ }
+
+private:
+ void AlignBuffer(std::size_t alignment);
+
+ Tegra::MemoryManager& tegra_memory_manager;
+
+ std::unique_ptr<VKStreamBuffer> stream_buffer;
+ vk::Buffer buffer_handle;
+
+ u8* buffer_ptr = nullptr;
+ u64 buffer_offset = 0;
+ u64 buffer_offset_base = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
u8* data, u64 begin, u64 end)
- : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
+ : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 1678463c7..a1e117443 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
protected_resources.push_back(resource);
}
-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
- if (it != protected_resources.end()) {
- protected_resources.erase(it);
- }
+ ASSERT(it != protected_resources.end());
+
+ resource->OnFenceRemoval(this);
+ protected_resources.erase(it);
}
VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
}
void VKFenceWatch::Wait() {
- if (!fence) {
+ if (fence == nullptr) {
return;
}
fence->Wait();
fence->Unprotect(this);
- fence = nullptr;
}
void VKFenceWatch::Watch(VKFence& new_fence) {
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5018dfa44..5bfe4cead 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
void Protect(VKResource* resource);
/// Removes protection for a resource.
- void Unprotect(const VKResource* resource);
+ void Unprotect(VKResource* resource);
/// Retreives the fence.
operator vk::Fence() const {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+ vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+ : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+ pipeline_stage} {
+ CreateBuffers(memory_manager, usage);
+ ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+ ASSERT(size <= buffer_size);
+ mapped_size = size;
+
+ if (offset + size > buffer_size) {
+ // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+ // reset the state.
+ invalidation_mark = used_watches;
+ used_watches = 0;
+ offset = 0;
+ }
+
+ return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+ ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+ if (invalidation_mark) {
+ // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+ exctx = scheduler.Flush();
+ std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+ [&](auto& resource) { resource->Wait(); });
+ invalidation_mark = std::nullopt;
+ }
+
+ if (used_watches + 1 >= watches.size()) {
+ // Ensure that there are enough watches.
+ ReserveWatches(WATCHES_RESERVE_CHUNK);
+ }
+ // Add a watch for this allocation.
+ watches[used_watches++]->Watch(exctx.GetFence());
+
+ offset += size;
+
+ return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+ const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+ nullptr);
+
+ const auto dev = device.GetLogical();
+ const auto& dld = device.GetDispatchLoader();
+ buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+ commit = memory_manager.Commit(*buffer, true);
+ mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+ const std::size_t previous_size = watches.size();
+ watches.resize(previous_size + grow_size);
+ std::generate(watches.begin() + previous_size, watches.end(),
+ []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+ explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+ vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+ ~VKStreamBuffer();
+
+ /**
+ * Reserves a region of memory from the stream buffer.
+ * @param size Size to reserve.
+ * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+ * offset and a boolean that's true when buffer has been invalidated.
+ */
+ std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+ /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+ [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+ vk::Buffer GetBuffer() const {
+ return *buffer;
+ }
+
+private:
+ /// Creates Vulkan buffer handles committing the required the required memory.
+ void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+ /// Increases the amount of watches available.
+ void ReserveWatches(std::size_t grow_size);
+
+ const VKDevice& device; ///< Vulkan device manager.
+ VKScheduler& scheduler; ///< Command scheduler.
+ const u64 buffer_size; ///< Total size of the stream buffer.
+ const vk::AccessFlags access; ///< Access usage of this stream buffer.
+ const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+ UniqueBuffer buffer; ///< Mapped buffer.
+ VKMemoryCommit commit; ///< Memory commit.
+ u8* mapped_pointer{}; ///< Pointer to the host visible commit
+
+ u64 offset{}; ///< Buffer iterator.
+ u64 mapped_size{}; ///< Size reserved for the current copy.
+
+ std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+ std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+ std::optional<std::size_t>
+ invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
- // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
- // gamma.
case Tegra::RenderTargetFormat::RGBA8_SRGB:
return PixelFormat::RGBA8_SRGB;
case Tegra::RenderTargetFormat::RGBA8_UNORM: