diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-04-17 01:01:07 +0200 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-06-21 02:36:11 +0200 |
commit | 4b396f375c0d32b60595f224d06b1b63d6df6b0a (patch) | |
tree | d61b116481b20802deee2472735e83c053901454 | |
parent | gl_texture_cache: Add copy from multiple overlaps into a single surface (diff) | |
download | yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar.gz yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar.bz2 yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar.lz yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar.xz yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.tar.zst yuzu-4b396f375c0d32b60595f224d06b1b63d6df6b0a.zip |
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 20 | ||||
-rw-r--r-- | src/video_core/texture_cache.cpp | 72 | ||||
-rw-r--r-- | src/video_core/texture_cache.h | 175 |
5 files changed, 185 insertions, 140 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 07c28357e..af63365a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -498,8 +498,8 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( color_surface->MarkAsModified(true); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - // state.framebuffer_srgb.enabled |= - // color_surface->GetSurfaceParams().srgb_conversion; + state.framebuffer_srgb.enabled |= + color_surface->GetSurfaceParams().GetSrgbConversion(); } fbkey.is_single_buffer = true; @@ -519,8 +519,8 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - // state.framebuffer_srgb.enabled |= - // color_surface->GetSurfaceParams().srgb_conversion; + state.framebuffer_srgb.enabled |= + color_surface->GetSurfaceParams().GetSrgbConversion(); } fbkey.color_attachments[index] = diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6a6fe7cc4..da2d1e63a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -8,6 +8,7 @@ #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" @@ -285,6 +286,8 @@ void CachedSurface::LoadBuffer() { } void CachedSurface::FlushBufferImpl() { + LOG_CRITICAL(Render_OpenGL, "Flushing"); + if (!IsModified()) { return; } @@ -352,9 +355,6 @@ void CachedSurface::UploadTextureMipmap(u32 level) { if (is_compressed) { const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; - GLint expected_size; - glGetTextureLevelParameteriv(texture.handle, level, GL_TEXTURE_COMPRESSED_IMAGE_SIZE, - &expected_size); switch (params.GetTarget()) { case SurfaceTarget::Texture2D: glCompressedTextureSubImage2D(texture.handle, level, 0, 0, @@ -419,6 +419,10 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } } +void CachedSurface::DecorateSurfaceName() { + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr()); +} + std::unique_ptr<CachedSurfaceView> CachedSurface::CreateView(const ViewKey& view_key) { return std::make_unique<CachedSurfaceView>(*this, view_key); } @@ -517,11 +521,13 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( - VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, bool preserve_contents, - const std::vector<CachedSurface*>& overlaps) { +CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, + const SurfaceParams& new_params, + bool preserve_contents, + const std::vector<Surface>& overlaps) { if (overlaps.size() > 1) { - return TryCopyAsViews(cpu_addr, host_ptr, new_params, overlaps); + return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); } const auto& old_surface{overlaps[0]}; @@ -530,18 +536,18 @@ CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && old_params.GetNumLevels() == new_params.GetNumLevels() && old_params.GetPixelFormat() == new_params.GetPixelFormat()) { - return SurfaceCopy(cpu_addr, host_ptr, new_params, old_surface, old_params); + return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); } return nullptr; } -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, +CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - CachedSurface* old_surface, + const Surface& old_surface, const SurfaceParams& old_params) { - CachedSurface* const new_surface{GetUncachedSurface(new_params)}; - Register(new_surface, cpu_addr, host_ptr); + const auto new_surface{GetUncachedSurface(new_params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); const u32 min_width{ std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; @@ -562,12 +568,12 @@ CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, new_surface->MarkAsModified(true); // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(cpu_addr, new_params); + return new_surface->GetView(gpu_addr, new_params); } -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const std::vector<CachedSurface*>& overlaps) { +CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& new_params, + const std::vector<Surface>& overlaps) { if (new_params.GetTarget() == SurfaceTarget::Texture1D || new_params.GetTarget() == SurfaceTarget::Texture1DArray || new_params.GetTarget() == SurfaceTarget::Texture3D) { @@ -575,16 +581,16 @@ CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_p return nullptr; } - CachedSurface* const new_surface{GetUncachedSurface(new_params)}; + const auto new_surface{GetUncachedSurface(new_params)}; // TODO(Rodrigo): Move this down - Register(new_surface, cpu_addr, host_ptr); + Register(new_surface, gpu_addr, cpu_addr, host_ptr); // TODO(Rodrigo): Find a way to avoid heap allocations here. std::vector<CachedSurfaceView*> views; views.reserve(overlaps.size()); for (const auto& overlap : overlaps) { const auto view{ - new_surface->TryGetView(overlap->GetCpuAddr(), overlap->GetSurfaceParams())}; + new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; if (!view) { // TODO(Rodrigo): Remove this Unregister(new_surface); @@ -610,11 +616,11 @@ CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_p new_surface->MarkAsModified(true); // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(cpu_addr, new_params); + return new_surface->GetView(gpu_addr, new_params); } -std::unique_ptr<CachedSurface> TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { - return std::make_unique<CachedSurface>(params); +Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { + return std::make_unique<CachedSurface>(*this, params); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 86ad91dab..8705db74c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -27,6 +27,7 @@ using VideoCore::Surface::SurfaceType; class CachedSurfaceView; class CachedSurface; +using Surface = std::shared_ptr<CachedSurface>; using TextureCacheBase = VideoCommon::TextureCacheContextless<CachedSurface, CachedSurfaceView>; class CachedSurface final : public VideoCommon::SurfaceBaseContextless<CachedSurfaceView> { @@ -47,6 +48,8 @@ public: } protected: + void DecorateSurfaceName(); + std::unique_ptr<CachedSurfaceView> CreateView(const ViewKey& view_key); void FlushBufferImpl(); @@ -65,7 +68,6 @@ private: OGLTexture texture; std::vector<u8> staging_buffer; - u8* host_ptr{}; }; class CachedSurfaceView final { @@ -155,19 +157,21 @@ public: ~TextureCacheOpenGL(); protected: - CachedSurfaceView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, + CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, bool preserve_contents, - const std::vector<CachedSurface*>& overlaps); + const std::vector<Surface>& overlaps); - std::unique_ptr<CachedSurface> CreateSurface(const SurfaceParams& params); + Surface CreateSurface(const SurfaceParams& params); private: - CachedSurfaceView* SurfaceCopy(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - CachedSurface* old_surface, const SurfaceParams& old_params); + CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, const Surface& old_surface, + const SurfaceParams& old_params); - CachedSurfaceView* TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - const std::vector<CachedSurface*>& overlaps); + CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, + const std::vector<Surface>& overlaps); }; } // namespace OpenGL diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index 2994312f4..b47ce6b98 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -32,12 +32,13 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, const Tegra::Texture::FullTextureInfo& config) { SurfaceParams params; params.is_tiled = config.tic.IsTiled(); + params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.pixel_format = - PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), + params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); params.target = SurfaceTargetFromTextureType(config.tic.texture_type); @@ -62,6 +63,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = false; params.block_width = 1 << std::min(block_width, 5U); params.block_height = 1 << std::min(block_height, 5U); params.block_depth = 1 << std::min(block_depth, 5U); @@ -85,6 +87,8 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.block_width = 1 << config.memory_layout.block_width; params.block_height = 1 << config.memory_layout.block_height; params.block_depth = 1 << config.memory_layout.block_depth; @@ -113,6 +117,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params{}; params.is_tiled = !config.linear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, @@ -162,6 +168,7 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { if (level == 0) { return this->block_height; } + const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; @@ -173,10 +180,12 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { - if (level == 0) - return block_depth; - if (target != SurfaceTarget::Texture3D) + if (level == 0) { + return this->block_depth; + } + if (IsLayered()) { return 1; + } const u32 depth{GetMipDepth(level)}; u32 block_depth = 32; @@ -192,7 +201,7 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { std::size_t offset = 0; for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); + offset += GetInnerMipmapMemorySize(i, false, false); } return offset; } @@ -200,21 +209,33 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { std::size_t offset = 0; for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, true, false, false); + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); } return offset; } std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, true, false) * GetNumLayers(); + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } std::size_t SurfaceParams::GetGuestLayerSize() const { - return GetInnerMemorySize(false, true, false); + return GetLayerSize(false, false); +} + +std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { + std::size_t size = 0; + for (u32 level = 0; level < num_levels; ++level) { + size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); + } + if (is_tiled && IsLayered()) { + return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + return size; } std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, IsLayered(), false); + ASSERT(target != SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); } u32 SurfaceParams::GetDefaultBlockWidth() const { @@ -273,15 +294,6 @@ bool SurfaceParams::IsPixelFormatZeta() const { } void SurfaceParams::CalculateCachedValues() { - guest_size_in_bytes = GetInnerMemorySize(false, false, false); - - // ASTC is uncompressed in software, in emulated as RGBA8 - if (IsPixelFormatASTC(pixel_format)) { - host_size_in_bytes = static_cast<std::size_t>(width * height * depth) * 4ULL; - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); - } - switch (target) { case SurfaceTarget::Texture1D: case SurfaceTarget::Texture2D: @@ -297,28 +309,30 @@ void SurfaceParams::CalculateCachedValues() { default: UNREACHABLE(); } + + guest_size_in_bytes = GetInnerMemorySize(false, false, false); + + // ASTC is uncompressed in software, in emulated as RGBA8 + if (IsPixelFormatASTC(pixel_format)) { + host_size_in_bytes = static_cast<std::size_t>(width * height * depth * 4U); + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } } -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, +std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{layer_only ? 1U : GetMipDepth(level)}; + const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - std::size_t size = 0; - for (u32 level = 0; level < num_levels; ++level) { - size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); - } - if (is_tiled && !as_host_size) { - size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); - } - return size; + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); } std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const { diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h index 9fd5f074e..0e289d378 100644 --- a/src/video_core/texture_cache.h +++ b/src/video_core/texture_cache.h @@ -53,6 +53,7 @@ protected: HasheableSurfaceParams() = default; bool is_tiled; + bool srgb_conversion; u32 block_width; u32 block_height; u32 block_depth; @@ -92,6 +93,10 @@ public: return is_tiled; } + bool GetSrgbConversion() const { + return srgb_conversion; + } + u32 GetBlockWidth() const { return block_width; } @@ -211,13 +216,15 @@ private: /// Calculates values that can be deduced from HasheableSurfaceParams. void CalculateCachedValues(); - /// Returns the size of a given mipmap level. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, - bool uncompressed) const; + /// Returns the size of a given mipmap level inside a layer. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; /// Returns the size of all mipmap levels and aligns as needed. std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + /// Returns the size of a layer + std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; + /// Returns true if the passed view width and height match the size of this params in a given /// mipmap level. bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; @@ -277,13 +284,13 @@ public: virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; - TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < gpu_addr || !params.IsFamiliar(view_params)) { // It can't be a view if it's in a prior address. return {}; } - const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)}; + const auto relative_offset{static_cast<u64>(view_addr - gpu_addr)}; const auto it{view_offset_map.find(relative_offset)}; if (it == view_offset_map.end()) { // Couldn't find an aligned view. @@ -298,6 +305,11 @@ public: return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); } + GPUVAddr GetGpuAddr() const { + ASSERT(is_registered); + return gpu_addr; + } + VAddr GetCpuAddr() const { ASSERT(is_registered); return cpu_addr; @@ -325,22 +337,20 @@ public: return params; } - TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { TView* view{TryGetView(view_addr, view_params)}; ASSERT(view != nullptr); return view; } - void Register(VAddr cpu_addr_, u8* host_ptr_) { + void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { ASSERT(!is_registered); is_registered = true; + gpu_addr = gpu_addr_; cpu_addr = cpu_addr_; host_ptr = host_ptr_; cache_addr = ToCacheAddr(host_ptr_); - } - - void Register(VAddr cpu_addr_) { - Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); + DecorateSurfaceName(); } void Unregister() { @@ -358,6 +368,8 @@ protected: ~SurfaceBase() = default; + virtual void DecorateSurfaceName() = 0; + virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0; bool IsModified() const { @@ -379,6 +391,7 @@ private: const std::map<u64, std::pair<u32, u32>> view_offset_map; + GPUVAddr gpu_addr{}; VAddr cpu_addr{}; u8* host_ptr{}; CacheAddr cache_addr{}; @@ -392,12 +405,12 @@ class TextureCache { static_assert(std::is_trivially_copyable_v<TExecutionContext>); using ResultType = std::tuple<TView*, TExecutionContext>; - using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>; + using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<std::shared_ptr<TSurface>>>; using IntervalType = typename IntervalMap::interval_type; public: void InvalidateRegion(CacheAddr addr, std::size_t size) { - for (TSurface* surface : GetSurfacesInRegion(addr, size)) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { if (!surface->IsRegistered()) { // Skip duplicates continue; @@ -408,32 +421,25 @@ public: ResultType GetTextureSurface(TExecutionContext exctx, const Tegra::Texture::FullTextureInfo& config) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; - if (!cpu_addr) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { return {{}, exctx}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, *cpu_addr, params, true); + return GetSurfaceView(exctx, gpu_addr, params, true); } ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; - if (!regs.zeta.Address() || !regs.zeta_enable) { + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { return {{}, exctx}; } - - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; - if (!cpu_addr) { - return {{}, exctx}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); } ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, @@ -448,25 +454,23 @@ public: auto& memory_manager{system.GPU().MemoryManager()}; const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto cpu_addr{memory_manager.GpuToCpuAddress( - config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; - if (!cpu_addr) { + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { return {{}, exctx}; } - return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } ResultType GetFermiSurface(TExecutionContext exctx, const Tegra::Engines::Fermi2D::Regs::Surface& config) { - const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; - ASSERT(cpu_addr); - return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), - true); + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); } - TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + std::shared_ptr<TSurface> TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -477,56 +481,68 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector<TSurface*>& overlaps) = 0; + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0; - virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; + virtual std::shared_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; - void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { - surface->Register(cpu_addr, host_ptr); + void Register(std::shared_ptr<TSurface> surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); } - void Unregister(TSurface* surface) { + void Unregister(std::shared_ptr<TSurface> surface) { registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); surface->Unregister(); } - TSurface* GetUncachedSurface(const SurfaceParams& params) { - if (TSurface* surface = TryGetReservedSurface(params); surface) + std::shared_ptr<TSurface> GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) return surface; // No reserved surface available, create a new one and reserve it auto new_surface{CreateSurface(params)}; - TSurface* surface{new_surface.get()}; - ReserveSurface(params, std::move(new_surface)); - return surface; + ReserveSurface(params, new_surface); + return new_surface; } Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, - bool preserve_contents) { - const auto host_ptr{Memory::GetPointer(cpu_addr)}; + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { return {view, exctx}; + } } TView* fast_view; - std::tie(fast_view, exctx) = - TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); + + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } - for (TSurface* surface : overlaps) { + for (const auto& surface : overlaps) { if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not written // by the new surface. @@ -539,57 +555,59 @@ private: return {fast_view, exctx}; } - return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - TSurface* new_surface{GetUncachedSurface(params)}; - Register(new_surface, cpu_addr, host_ptr); + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { exctx = LoadSurface(exctx, new_surface); } - return {new_surface->GetView(cpu_addr, params), exctx}; + return {new_surface->GetView(gpu_addr, params), exctx}; } - TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr<TSurface>& surface) { surface->LoadBuffer(); exctx = surface->UploadTexture(exctx); surface->MarkAsModified(false); return exctx; } - std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { + std::vector<std::shared_ptr<TSurface>> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector<TSurface*> surfaces; + std::vector<std::shared_ptr<TSurface>> surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { surfaces.push_back(*pair.second.begin()); } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) { + void ReserveSurface(const SurfaceParams& params, std::shared_ptr<TSurface> surface) { surface_reserve[params].push_back(std::move(surface)); } - TSurface* TryGetReservedSurface(const SurfaceParams& params) { + std::shared_ptr<TSurface> TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; } for (auto& surface : search->second) { if (!surface->IsRegistered()) { - return surface.get(); + return surface; } } return {}; } - IntervalType GetSurfaceInterval(TSurface* surface) const { + IntervalType GetSurfaceInterval(std::shared_ptr<TSurface> surface) const { return IntervalType::right_open(surface->GetCacheAddr(), surface->GetCacheAddr() + surface->GetSizeInBytes()); } @@ -601,7 +619,7 @@ private: /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve; + std::unordered_map<SurfaceParams, std::list<std::shared_ptr<TSurface>>> surface_reserve; }; struct DummyExecutionContext {}; @@ -631,7 +649,7 @@ public: return RemoveContext(Base::GetFermiSurface({}, config)); } - TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + std::shared_ptr<TSurface> TryFindFramebufferSurface(const u8* host_ptr) const { return Base::TryFindFramebufferSurface(host_ptr); } @@ -640,15 +658,18 @@ protected: VideoCore::RasterizerInterface& rasterizer) : TextureCache<TSurface, TView, DummyExecutionContext>{system, rasterizer} {} - virtual TView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, - const std::vector<TSurface*>& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0; private: std::tuple<TView*, DummyExecutionContext> TryFastGetSurfaceView( - DummyExecutionContext, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector<TSurface*>& overlaps) { - return {TryFastGetSurfaceView(cpu_addr, host_ptr, params, preserve_contents, overlaps), {}}; + DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector<std::shared_ptr<TSurface>>& overlaps) { + return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, + overlaps), + {}}; } TView* RemoveContext(std::tuple<TView*, DummyExecutionContext> return_value) { |