summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2018-07-22 05:28:53 +0200
committerGitHub <noreply@github.com>2018-07-22 05:28:53 +0200
commit4cd5df95d67bb2867ad9ffb006b27d2635c5fd91 (patch)
tree741aaf7b4a46bbcce5a25192d8df0c3f3d2c88fd
parentMerge pull request #759 from lioncash/redundant (diff)
parentgl_rasterizer_cache: Blit surfaces on recreation instead of flush and load. (diff)
downloadyuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar.gz
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar.bz2
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar.lz
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar.xz
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.tar.zst
yuzu-4cd5df95d67bb2867ad9ffb006b27d2635c5fd91.zip
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp138
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h47
4 files changed, 157 insertions, 72 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dbd106c53..3c32f1067 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -487,7 +487,12 @@ public:
};
} rt_control;
- INSERT_PADDING_WORDS(0x2B);
+ INSERT_PADDING_WORDS(0x2);
+
+ u32 zeta_width;
+ u32 zeta_height;
+
+ INSERT_PADDING_WORDS(0x27);
u32 depth_test_enable;
@@ -540,7 +545,11 @@ public:
u32 vb_element_base;
- INSERT_PADDING_WORDS(0x49);
+ INSERT_PADDING_WORDS(0x40);
+
+ u32 zeta_enable;
+
+ INSERT_PADDING_WORDS(0x8);
struct {
u32 tsc_address_high;
@@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(zeta_width, 0x48a);
+ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil, 0x4E0);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
+ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(stencil_two_side, 0x565);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 65a2fd5e8..56d9c575b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
- use_depth_fb = true;
+ use_depth_fb = regs.zeta_enable != 0;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {
glClear(clear_mask);
// Mark framebuffer surfaces as dirty
- if (dirty_color_surface != nullptr) {
- res_cache.MarkSurfaceAsDirty(dirty_color_surface);
- }
- if (dirty_depth_surface != nullptr) {
- res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
+ if (Settings::values.use_accurate_framebuffers) {
+ if (dirty_color_surface != nullptr) {
+ res_cache.FlushSurface(dirty_color_surface);
+ }
+ if (dirty_depth_surface != nullptr) {
+ res_cache.FlushSurface(dirty_depth_surface);
+ }
}
}
@@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context;
auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(true, regs.zeta.Address() != 0);
+ ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);
SyncDepthTestState();
SyncBlendState();
@@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
- if (dirty_color_surface != nullptr) {
- res_cache.MarkSurfaceAsDirty(dirty_color_surface);
- }
- if (dirty_depth_surface != nullptr) {
- res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
+ if (Settings::values.use_accurate_framebuffers) {
+ if (dirty_color_surface != nullptr) {
+ res_cache.FlushSurface(dirty_color_surface);
+ }
+ if (dirty_depth_surface != nullptr) {
+ res_cache.FlushSurface(dirty_depth_surface);
+ }
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 2c43982b0..28f0bc379 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -65,9 +65,9 @@ struct FormatTuple {
return params;
}
-/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
- Tegra::DepthFormat format) {
+/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
+ Tegra::GPUVAddr zeta_address,
+ Tegra::DepthFormat format) {
SurfaceParams params{};
params.addr = zeta_address;
@@ -77,9 +77,9 @@ struct FormatTuple {
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.size_in_bytes = params.SizeInBytes();
- params.width = config.width;
- params.height = config.height;
- params.unaligned_height = config.height;
+ params.width = zeta_width;
+ params.height = zeta_height;
+ params.unaligned_height = zeta_height;
params.size_in_bytes = params.SizeInBytes();
return params;
}
@@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
cur_state.Apply();
}
+static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
+ const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
+ GLuint read_fb_handle, GLuint draw_fb_handle) {
+ OpenGLState prev_state{OpenGLState::GetCurState()};
+ SCOPE_EXIT({ prev_state.Apply(); });
+
+ OpenGLState state;
+ state.draw.read_framebuffer = read_fb_handle;
+ state.draw.draw_framebuffer = draw_fb_handle;
+ state.Apply();
+
+ u32 buffers{};
+
+ if (type == SurfaceType::ColorTexture) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
+ 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
+ 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+
+ buffers = GL_COLOR_BUFFER_BIT;
+ } else if (type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT;
+ } else if (type == SurfaceType::DepthStencil) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ src_tex, 0);
+
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ dst_tex, 0);
+
+ buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+ }
+
+ glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
+ dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
+ buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+
+ return true;
+}
+
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
texture.Create();
const auto& rect{params.GetRect()};
@@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
}
if (using_depth_fb) {
- depth_params =
- SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
+ depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
+ regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};
@@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
-void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
- if (Settings::values.use_accurate_framebuffers) {
- // If enabled, always flush dirty surfaces
- surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
- surface->FlushGLBuffer();
- } else {
- // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
- // and flushes are very slow and do not seem to improve accuracy
- const auto& params{surface->GetSurfaceParams()};
- Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
- }
+void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
+ surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+ surface->FlushGLBuffer();
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
return {};
- // Check for an exact match in existing surfaces
- const auto& surface_key{SurfaceKey::Create(params)};
- const auto& search{surface_cache.find(surface_key)};
+ // Look up surface in the cache based on address
+ const auto& search{surface_cache.find(params.addr)};
Surface surface;
if (search != surface_cache.end()) {
surface = search->second;
if (Settings::values.use_accurate_framebuffers) {
- // Reload the surface from Switch memory
- LoadSurface(surface);
+ // If use_accurate_framebuffers is enabled, always load from memory
+ FlushSurface(surface);
+ UnregisterSurface(surface);
+ } else if (surface->GetSurfaceParams() != params) {
+ // If surface parameters changed, recreate the surface from the old one
+ return RecreateSurface(surface, params);
+ } else {
+ // Use the cached surface as-is
+ return surface;
}
- } else {
- surface = std::make_shared<CachedSurface>(params);
- RegisterSurface(surface);
- LoadSurface(surface);
}
+ // No surface found - create a new one
+ surface = std::make_shared<CachedSurface>(params);
+ RegisterSurface(surface);
+ LoadSurface(surface);
+
return surface;
}
+Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
+ const SurfaceParams& new_params) {
+ // Verify surface is compatible for blitting
+ const auto& params{surface->GetSurfaceParams()};
+ ASSERT(params.type == new_params.type);
+ ASSERT(params.pixel_format == new_params.pixel_format);
+ ASSERT(params.component_type == new_params.component_type);
+
+ // Create a new surface with the new parameters, and blit the previous surface to it
+ Surface new_surface{std::make_shared<CachedSurface>(new_params)};
+ BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
+ new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle,
+ draw_framebuffer.handle);
+
+ // Update cache accordingly
+ UnregisterSurface(surface);
+ RegisterSurface(new_surface);
+
+ return new_surface;
+}
+
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
@@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
- const auto& surface_key{SurfaceKey::Create(params)};
- const auto& search{surface_cache.find(surface_key)};
+ const auto& search{surface_cache.find(params.addr)};
if (search != surface_cache.end()) {
// Registered already
return;
}
- surface_cache[surface_key] = surface;
+ surface_cache[params.addr] = surface;
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
- const auto& surface_key{SurfaceKey::Create(params)};
- const auto& search{surface_cache.find(surface_key)};
+ const auto& search{surface_cache.find(params.addr)};
if (search == surface_cache.end()) {
// Unregistered already
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 718c45ce1..b084c4db4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -10,7 +10,6 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
-#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -137,6 +136,7 @@ struct SurfaceParams {
ASSERT(static_cast<size_t>(format) < bpp_table.size());
return bpp_table[static_cast<size_t>(format)];
}
+
u32 GetFormatBpp() const {
return GetFormatBpp(pixel_format);
}
@@ -365,9 +365,21 @@ struct SurfaceParams {
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
/// Creates SurfaceParams for a depth buffer configuration
- static SurfaceParams CreateForDepthBuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
- Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
+ static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
+ Tegra::GPUVAddr zeta_address,
+ Tegra::DepthFormat format);
+
+ bool operator==(const SurfaceParams& other) const {
+ return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
+ height, unaligned_height, size_in_bytes) ==
+ std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
+ other.component_type, other.type, other.width, other.height,
+ other.unaligned_height, other.size_in_bytes);
+ }
+
+ bool operator!=(const SurfaceParams& other) const {
+ return !operator==(other);
+ }
Tegra::GPUVAddr addr;
bool is_tiled;
@@ -381,24 +393,6 @@ struct SurfaceParams {
size_t size_in_bytes;
};
-/// Hashable variation of SurfaceParams, used for a key in the surface cache
-struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
- static SurfaceKey Create(const SurfaceParams& params) {
- SurfaceKey res;
- res.state = params;
- return res;
- }
-};
-
-namespace std {
-template <>
-struct hash<SurfaceKey> {
- size_t operator()(const SurfaceKey& k) const {
- return k.Hash();
- }
-};
-} // namespace std
-
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
@@ -444,8 +438,8 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
- /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
- void MarkSurfaceAsDirty(const Surface& surface);
+ /// Flushes the surface to Switch memory
+ void FlushSurface(const Surface& surface);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
@@ -460,6 +454,9 @@ private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
+ /// Recreates a surface with new parameters
+ Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
+
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@@ -469,7 +466,7 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
- std::unordered_map<SurfaceKey, Surface> surface_cache;
+ std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
PageMap cached_pages;
OGLFramebuffer read_framebuffer;