From 5f57ab1b2aa80d427b6e454f8904e2e937a6981e Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 21 Jun 2018 19:36:01 -0400 Subject: gl_rasterizer_cache: Remove Citra's rasterizer cache, always load/flush surfaces. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 117 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 1262 +++----------------- .../renderer_opengl/gl_rasterizer_cache.h | 323 +---- 4 files changed, 210 insertions(+), 1494 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fbf8e1f9..bc463fc30 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -146,7 +146,6 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - res_cache.FlushRegion(start, size, nullptr); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. @@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() { std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); - const u16 res_scale = color_surface != nullptr - ? color_surface->res_scale - : (depth_surface == nullptr ? 1u : depth_surface->res_scale); - MathUtil::Rectangle draw_rect{ + static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right, + surfaces_rect.left, surfaces_rect.right)), // Right static_cast( - std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast( - std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast( - std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast(std::clamp(static_cast(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.bottom, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); // Sync the viewport - SyncViewport(surfaces_rect, res_scale); + SyncViewport(surfaces_rect); // Sync the blend state registers SyncBlendState(); @@ -442,44 +434,23 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Mark framebuffer surfaces as dirty - MathUtil::Rectangle draw_rect_unscaled{ - draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, - draw_rect.bottom / res_scale}; - if (color_surface != nullptr && write_color_fb) { - auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - color_surface); + res_cache.FlushSurface(color_surface); } if (depth_surface != nullptr && write_depth_fb) { - auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - depth_surface); + res_cache.FlushSurface(depth_surface); } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} -void RasterizerOpenGL::FlushAll() { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushAll(); -} +void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); -} +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} -void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); -} +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} -void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); -} +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); @@ -500,44 +471,8 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - if (framebuffer_addr == 0) { - return false; - } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - SurfaceParams src_params; - src_params.cpu_addr = framebuffer_addr; - src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); - src_params.width = std::min(framebuffer.width, pixel_stride); - src_params.height = framebuffer.height; - src_params.stride = pixel_stride; - src_params.is_tiled = true; - src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - src_params.pixel_format = - SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); - src_params.component_type = - SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); - src_params.UpdateParams(); - - MathUtil::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { - return false; - } - - u32 scaled_width = src_surface->GetScaledWidth(); - u32 scaled_height = src_surface->GetScaledHeight(); - - screen_info.display_texcoords = MathUtil::Rectangle( - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - - screen_info.display_texture = src_surface->texture.handle; - - return true; + // TODO(bunnei): ImplementMe + return false; } void RasterizerOpenGL::SamplerInfo::Create() { @@ -674,7 +609,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; + state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle; state.texture_units[current_bindpoint].swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); state.texture_units[current_bindpoint].swizzle.g = @@ -700,16 +635,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); + color_surface != nullptr ? color_surface->Texture().handle : 0, 0); if (depth_surface != nullptr) { if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); } else { // attach depth glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); // clear stencil attachment glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } @@ -720,14 +655,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, } } -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale) { +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle& surfaces_rect) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; - state.viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left * res_scale; - state.viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; - state.viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); - state.viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); + state.viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left; + state.viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom; + state.viewport.width = static_cast(viewport_rect.GetWidth()); + state.viewport.height = static_cast(viewport_rect.GetHeight()); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4762983c9..621200f03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -109,7 +109,7 @@ private: u32 current_unit, const std::vector& entries); /// Syncs the viewport to match the guest state - void SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale); + void SyncViewport(const MathUtil::Rectangle& surfaces_rect); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 857164ff6..5fb099d8d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1,37 +1,22 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include -#include -#include -#include -#include -#include -#include -#include -#include #include + #include "common/alignment.h" -#include "common/bit_field.h" -#include "common/color.h" -#include "common/logging/log.h" -#include "common/math_util.h" +#include "common/assert.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" #include "video_core/utils.h" -#include "video_core/video_core.h" using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; @@ -77,15 +62,18 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return {}; } -template -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); +VAddr SurfaceParams::GetCpuAddr() const { + const auto& gpu = Core::System::GetInstance().GPU(); + return *gpu.memory_manager->GpuToCpuAddress(addr); } -static u16 GetResolutionScaleFactor() { - return static_cast(!Settings::values.resolution_factor - ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() - : Settings::values.resolution_factor); +static bool IsPixelFormatASTC(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + return true; + default: + return false; + } } static void ConvertASTCToRGBA8(std::vector& data, PixelFormat format, u32 width, u32 height) { @@ -106,18 +94,17 @@ static void ConvertASTCToRGBA8(std::vector& data, PixelFormat format, u32 wi } template -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, - Tegra::GPUVAddr start, Tegra::GPUVAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(base), + *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - if (SurfaceParams::IsFormatASTC(format)) { + if (IsPixelFormatASTC(format)) { // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this ConvertASTCToRGBA8(data, format, stride, height); } @@ -129,13 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra:: NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, morton_to_gl); } } -static constexpr std::array morton_to_gl_fns = { MortonCopy, MortonCopy, @@ -146,8 +132,7 @@ static constexpr std::array, MortonCopy, }; -static constexpr std::array gl_to_morton_fns = { MortonCopy, @@ -192,374 +177,76 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, - const MathUtil::Rectangle& dst_rect, SurfaceType type, - GLuint read_fb_handle, GLuint draw_fb_handle) { - - glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, - GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), - src_rect.GetHeight(), 0); - return true; -} - -static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle& fill_rect, GLuint draw_fb_handle) { - UNREACHABLE(); - return {}; -} - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { - SurfaceParams params = *this; - const u32 tiled_size = is_tiled ? 8 : 1; - const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - Tegra::GPUVAddr aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - Tegra::GPUVAddr aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - - if (aligned_end - aligned_start > stride_tiled_bytes) { - params.addr = aligned_start; - params.height = static_cast((aligned_end - aligned_start) / BytesInPixels(stride)); - } else { - // 1 row - ASSERT(aligned_end - aligned_start == stride_tiled_bytes); - const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); - aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); - aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); - params.addr = aligned_start; - params.width = static_cast(PixelsInBytes(aligned_end - aligned_start) / tiled_size); - params.stride = params.width; - params.height = tiled_size; - } - params.UpdateParams(); - - return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const { - if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { - return {}; - } - - if (is_tiled) { - unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; - unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; - unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; - unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; - } - - const u32 stride_tiled = !is_tiled ? stride : stride * 8; - - const u32 pixel_offset = - stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + - unscaled_rect.left; - - const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - - return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { - const u32 begin_pixel_index = static_cast(PixelsInBytes(sub_surface.addr - addr)); - - if (is_tiled) { - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - const int y0 = (begin_pixel_index / (stride * 8)) * 8; - // Top to bottom - return MathUtil::Rectangle(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); - } - - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - // Bottom to top - return MathUtil::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { - auto rect = GetSubRect(sub_surface); - rect.left = rect.left * res_scale; - rect.right = rect.right * res_scale; - rect.top = rect.top * res_scale; - rect.bottom = rect.bottom * res_scale; - return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.block_height, other_surface.pixel_format, - other_surface.component_type, - other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, - pixel_format, component_type, is_tiled) && - pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && - sub_surface.component_type == component_type && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).left + sub_surface.width <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && - component_type == expanded_surface.component_type && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { - if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end) { - return false; - } - if (texcopy_params.block_height != block_height || - texcopy_params.component_type != component_type) - return false; - - if (texcopy_params.width != texcopy_params.stride) { - const u32 tile_stride = static_cast(BytesInPixels(stride * (is_tiled ? 8 : 1))); - return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && - ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } - return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -VAddr SurfaceParams::GetCpuAddr() const { - // When this function is used, only cpu_addr or (GPU) addr should be set, not both - ASSERT(!(cpu_addr && addr)); - const auto& gpu = Core::System::GetInstance().GPU(); - return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); - std::vector fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = static_cast(BytesInPixels(stride)) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - - ASSERT(src_surface != dst_surface); - - // This is only called when CanCopy is true, no need to run checks here - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u64 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array fill_buffer; - - u64 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), - draw_framebuffer.handle); - return; - } - if (src_surface->CanSubRect(subrect_params)) { - BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), - dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), - src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); - return; - } - UNREACHABLE(); +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { + texture.Create(); + AllocateSurfaceTexture(texture.handle, + GetFormatTuple(params.pixel_format, params.component_type), params.width, + params.height); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { - ASSERT(type != SurfaceType::Fill); +void CachedSurface::LoadGLBuffer() { + ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); - if (texture_src_data == nullptr) - return; + u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); - if (gl_buffer == nullptr) { - gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); + ASSERT(texture_src_data); + + if (!gl_buffer) { + gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); } MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - ASSERT(load_start >= addr && load_end <= end); - const u64 start_offset = load_start - addr; - - if (!is_tiled) { - const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + if (!params.is_tiled) { + const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - bytes_per_pixel * width * height); + std::memcpy(&gl_buffer[0], texture_src_data, + bytes_per_pixel * params.width * params.height); } else { - morton_to_gl_fns[static_cast(pixel_format)](GetActualWidth(), block_height, - GetActualHeight(), &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast(params.pixel_format)]( + params.width, params.block_height, params.height, &gl_buffer[0], params.addr); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); - if (dst_buffer == nullptr) - return; - - ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); - - // TODO: Should probably be done in ::Memory:: and check for other regions too - // same as loadglbuffer() - if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) - flush_end = Memory::VRAM_VADDR_END; +void CachedSurface::FlushGLBuffer() { + u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); - if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) - flush_start = Memory::VRAM_VADDR; + ASSERT(dst_buffer); + ASSERT(gl_buffer_size == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - ASSERT(flush_start >= addr && flush_end <= end); - const u64 start_offset = flush_start - addr; - const u64 end_offset = flush_end - addr; - - if (type == SurfaceType::Fill) { - const u64 coarse_start_offset = start_offset - (start_offset % fill_size); - const u64 backup_bytes = start_offset % fill_size; - std::array backup_data; - if (backup_bytes) - std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - - for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { - std::memcpy(&dst_buffer[offset], &fill_data[0], - std::min(fill_size, end_offset - offset)); - } - - if (backup_bytes) - std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + if (!params.is_tiled) { + std::memcpy(dst_buffer, &gl_buffer[0], params.SizeInBytes()); } else { - gl_to_morton_fns[static_cast(pixel_format)]( - stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); + gl_to_morton_fns[static_cast(params.pixel_format)]( + params.width, params.block_height, params.height, &gl_buffer[0], params.addr); } } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureUL); ASSERT(gl_buffer_size == - GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + + const auto& rect{params.GetRect()}; // Load data from memory to the surface GLint x0 = static_cast(rect.left); GLint y0 = static_cast(rect.bottom); - size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in - // surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_tex = cur_state.texture_units[0].texture_2d; @@ -567,15 +254,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.width)); glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { - glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, - static_cast(rect.GetWidth() * GetCompresssionFactor()), - static_cast(rect.GetHeight() * GetCompresssionFactor()), 0, - static_cast(size), &gl_buffer[buffer_offset]); + glCompressedTexImage2D( + GL_TEXTURE_2D, 0, tuple.internal_format, static_cast(params.width), + static_cast(params.height), 0, static_cast(params.SizeInBytes()), + &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), tuple.format, tuple.type, @@ -586,29 +273,17 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle& rect, GLuint cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type, read_fb_handle, draw_fb_handle); - } } MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureDL); - if (gl_buffer == nullptr) { - gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + if (!gl_buffer) { + gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); } @@ -616,437 +291,45 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle& rect, GLui OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); - size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex; - unscaled_tex.Create(); - - MathUtil::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, - read_fb_handle, draw_fb_handle); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } else { - state.UnbindTexture(texture.handle); - state.draw.read_framebuffer = read_fb_handle; - state.Apply(); - - if (type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - } else if (type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } else { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - } - glReadPixels(static_cast(rect.left), static_cast(rect.bottom), - static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), - tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.width)); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} + const auto& rect{params.GetRect()}; + size_t buffer_offset = + (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); -enum class MatchFlags { - None = 0, - Invalid = 1, // Flag that can be applied to other match types, invalid matches require - // validation before they can be used - Exact = 1 << 1, // Surfaces perfectly match - SubRect = 1 << 2, // Surface encompasses params - Copy = 1 << 3, // Surface we can copy from - Expand = 1 << 4, // Surface that can expand params - TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters -}; - -constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { - return static_cast(static_cast(lhs) & static_cast(rhs)); -} + state.UnbindTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); -/// Get the best surface match (and its match type) for the given flags -template -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - boost::optional validate_interval = boost::none) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (auto& surface : pair.second) { - bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - (find_flags & MatchFlags::Copy) != MatchFlags::None - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if ((find_flags & check_type) == MatchFlags::None) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } + if (params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } else if (params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); } - return match_surface; + glReadPixels(static_cast(rect.left), static_cast(rect.bottom), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { read_framebuffer.Create(); draw_framebuffer.Create(); - - attributeless_vao.Create(); - - d24s8_abgr_buffer.Create(); - d24s8_abgr_buffer_size = 0; - - const char* vs_source = R"( -#version 330 core -const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); -} -)"; - const char* fs_source = R"( -#version 330 core - -uniform samplerBuffer tbo; -uniform vec2 tbo_size; -uniform vec4 viewport; - -out vec4 color; - -void main() { - vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; - int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); - color = texelFetch(tbo, tbo_offset).rabg; -} -)"; - d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); - - OpenGLState state = OpenGLState::GetCurState(); - GLuint old_program = state.draw.shader_program; - state.draw.shader_program = d24s8_abgr_shader.handle; - state.Apply(); - - GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); - ASSERT(tbo_u_id != -1); - glUniform1i(tbo_u_id, 0); - - state.draw.shader_program = old_program; - state.Apply(); - - d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); - ASSERT(d24s8_abgr_tbo_size_u_id != -1); - d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); - ASSERT(d24s8_abgr_viewport_u_id != -1); -} - -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); -} - -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle& src_rect, - const Surface& dst_surface, - const MathUtil::Rectangle& dst_rect) { - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type, read_framebuffer.handle, - draw_framebuffer.handle); -} - -void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, - const MathUtil::Rectangle& src_rect, - GLuint dst_tex, - const MathUtil::Rectangle& dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_framebuffer.handle; - state.draw.draw_framebuffer = draw_framebuffer.handle; - state.Apply(); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); - - GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; - if (target_pbo_size > d24s8_abgr_buffer_size) { - d24s8_abgr_buffer_size = target_pbo_size * 2; - glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); - } - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, - 0); - glReadPixels(static_cast(src_rect.left), static_cast(src_rect.bottom), - static_cast(src_rect.GetWidth()), - static_cast(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - 0); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - // PBO now contains src_tex in RABG format - state.draw.shader_program = d24s8_abgr_shader.handle; - state.draw.vertex_array = attributeless_vao.handle; - state.viewport.x = static_cast(dst_rect.left); - state.viewport.y = static_cast(dst_rect.bottom); - state.viewport.width = static_cast(dst_rect.GetWidth()); - state.viewport.height = static_cast(dst_rect.GetHeight()); - state.Apply(); - - OGLTexture tbo; - tbo.Create(); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); - - glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast(src_rect.GetWidth()), - static_cast(src_rect.GetHeight())); - glUniform4f(d24s8_abgr_viewport_u_id, static_cast(state.viewport.x), - static_cast(state.viewport.y), static_cast(state.viewport.width), - static_cast(state.viewport.height)); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find it - // to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -boost::optional RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( - VAddr cpu_addr) const { - // Tries to find the GPU address of a framebuffer based on the CPU address. This is because - // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU - // addresses. We iterate through all cached framebuffers, and compare their starting CPU address - // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps - // surfaces. - - std::vector gpu_addresses; - for (const auto& pair : surface_cache) { - for (const auto& surface : pair.second) { - const VAddr surface_cpu_addr = surface->GetCpuAddr(); - if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { - ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); - gpu_addresses.push_back(surface->addr); - } - } - } - - if (gpu_addresses.empty()) { - return {}; - } - - ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); - return gpu_addresses[0]; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, MathUtil::Rectangle{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - ASSERT(surface->res_scale < params.res_scale); - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = static_cast( - new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { @@ -1056,36 +339,21 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.addr = config.tic.Address(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = SurfaceParams::GetFormatType(params.pixel_format); + params.width = Common::AlignUp(config.tic.Width(), params.GetCompressionFactor()); + params.height = Common::AlignUp(config.tic.Height(), params.GetCompressionFactor()); - params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); + if (params.is_tiled) { + params.block_height = config.tic.BlockHeight(); + } // TODO(Subv): Different types per component are not supported. ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && config.tic.r_type.Value() == config.tic.b_type.Value() && config.tic.r_type.Value() == config.tic.a_type.Value()); - params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - - if (config.tic.IsTiled()) { - params.block_height = config.tic.BlockHeight(); - - // TODO(bunnei): The below align up is a hack. This is here because some compressed textures - // are not a multiple of their own compression factor, and so this accounts for that. This - // could potentially result in an extra row of 4px being decoded if a texture is not a - // multiple of 4. - params.width = Common::AlignUp(params.width, 4); - params.height = Common::AlignUp(params.height, 4); - } else { - // Use the texture-provided stride value if the texture isn't tiled. - params.stride = static_cast(params.PixelsInBytes(config.tic.Pitch())); - } - - params.UpdateParams(); - - return GetSurface(params, ScaleMatch::Ignore, true); + return GetSurface(params); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( @@ -1096,17 +364,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( // TODO(bunnei): This is hard corded to use just the first render buffer NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); - // update resolution_scale_factor and reset cache if changed - // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We - // need to fix this before making the renderer multi-threaded. - static u16 resolution_scale_factor = GetResolutionScaleFactor(); - if (resolution_scale_factor != GetResolutionScaleFactor()) { - resolution_scale_factor = GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - MathUtil::Rectangle viewport_clamped{ static_cast(std::clamp(viewport.left, 0, static_cast(config.width))), static_cast(std::clamp(viewport.top, 0, static_cast(config.height))), @@ -1116,7 +373,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( // get color and depth surfaces SurfaceParams color_params; color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; // TODO(Subv): Can framebuffers use a different block height? @@ -1126,319 +382,69 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.addr = config.Address(); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); - color_params.UpdateParams(); + color_params.type = SurfaceParams::GetFormatType(color_params.pixel_format); ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); - // depth_params.addr = config.GetDepthBufferPhysicalAddress(); - // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - // depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } MathUtil::Rectangle color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + Surface color_surface; + if (using_color_fb) { + color_surface = GetSurface(color_params); + color_rect = color_surface->GetSurfaceParams().GetRect(); + } MathUtil::Rectangle depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + Surface depth_surface; + if (using_depth_fb) { + depth_surface = GetSurface(depth_params); + depth_rect = depth_surface->GetSurfaceParams().GetRect(); + } MathUtil::Rectangle fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { + if (color_surface && depth_surface) { fb_rect = color_rect; // Color and Depth surfaces must have the same dimensions and offsets if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); + color_surface = GetSurface(color_params); + depth_surface = GetSurface(depth_params); + fb_rect = color_surface->GetSurfaceParams().GetRect(); } - } else if (color_surface != nullptr) { + } else if (color_surface) { fb_rect = color_rect; - } else if (depth_surface != nullptr) { + } else if (depth_surface) { fb_rect = depth_rect; } - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - return std::make_tuple(color_surface, depth_surface, fb_rect); } -Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNREACHABLE(); - return {}; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle rect{}; - - Surface match_surface = FindMatch( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = - static_cast(match_surface->PixelsInBytes(params.width) / tiled_size); - match_subrect.stride = - static_cast(match_surface->PixelsInBytes(params.stride) / tiled_size); - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, - u64 size) { - if (size == 0) - return; - - const SurfaceInterval validate_interval(addr, addr + size); - - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - while (true) { - const auto it = surface->invalid_regions.find(validate_interval); - if (it == surface->invalid_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = *surface; - - Surface copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(copy_interval); - continue; - } - - // Load data from Switch memory - FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - surface->invalid_regions.erase(params.GetInterval()); - } -} - -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { - if (size == 0) - return; - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to access - // that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - } - surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); - flushed_intervals += interval; - } - // Reset dirty regions - dirty_regions -= flushed_intervals; +void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { + surface->LoadGLBuffer(); + surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); +void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); } -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, - const Surface& region_owner) { - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - - // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures - if (cached_surface->type == SurfaceType::Fill && - cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { + if (params.addr == 0 || params.height * params.width == 0) { + return {}; } - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); + // Check for an exact match in existing surfaces + auto search = surface_cache.find(params.addr); + Surface surface; + if (search != surface_cache.end()) { + surface = search->second; + } else { + surface = std::make_shared(params); + surface_cache[params.addr] = surface; } - remove_surfaces.clear(); -} - -Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared(); - static_cast(*surface) = params; - - surface->texture.Create(); - - surface->gl_buffer_size = 0; - surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, - GetFormatTuple(surface->pixel_format, surface->component_type), - surface->GetScaledWidth(), surface->GetScaledHeight()); + LoadSurface(surface); return surface; } - -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { - const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - - (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; - const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; - const u64 page_end = page_start + num_pages; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) - << Tegra::MemoryManager::PAGE_BITS; - const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) - << Tegra::MemoryManager::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - else if (delta < 0 && count == -delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9da945e19..ca9945df4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -1,57 +1,22 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include +#include #include -#include -#include -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-local-typedefs" -#endif -#include -#include -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#include -#include -#include "common/assert.h" -#include "common/common_funcs.h" + #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" -struct CachedSurface; +class CachedSurface; using Surface = std::shared_ptr; -using SurfaceSet = std::set; - -using SurfaceRegions = boost::icl::interval_set; -using SurfaceMap = boost::icl::interval_map; -using SurfaceCache = boost::icl::interval_map; - -using SurfaceInterval = SurfaceCache::interval_type; -static_assert(std::is_same() && - std::is_same(), - "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple>; using SurfaceSurfaceRect_Tuple = std::tuple>; -using PageMap = boost::icl::interval_map; - -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; - struct SurfaceParams { enum class PixelFormat { ABGR8 = 0, @@ -93,10 +58,10 @@ struct SurfaceParams { /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed texture - * formats. + * compressed image. This is used for maintaining proper surface sizes for compressed + * texture formats. */ - static constexpr u32 GetCompresssionFactor(PixelFormat format) { + static constexpr u32 GetCompressionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; @@ -118,8 +83,8 @@ struct SurfaceParams { ASSERT(static_cast(format) < compression_factor_table.size()); return compression_factor_table[static_cast(format)]; } - u32 GetCompresssionFactor() const { - return GetCompresssionFactor(pixel_format); + u32 GetCompressionFactor() const { + return GetCompressionFactor(pixel_format); } static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -165,25 +130,6 @@ struct SurfaceParams { } } - static bool IsFormatASTC(PixelFormat format) { - switch (format) { - case PixelFormat::ASTC_2D_4X4: - return true; - default: - return false; - } - } - - static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); - UNREACHABLE(); - } - } - static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { // TODO(Subv): Properly implement this switch (format) { @@ -276,36 +222,6 @@ struct SurfaceParams { } } - static ComponentType ComponentTypeFromGPUPixelFormat( - Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return ComponentType::UNorm; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); - UNREACHABLE(); - } - } - - static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { - SurfaceType a_type = GetFormatType(pixel_format_a); - SurfaceType b_type = GetFormatType(pixel_format_b); - - if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { - return true; - } - - if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { - return true; - } - - if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { - return true; - } - - return false; - } - static SurfaceType GetFormatType(PixelFormat pixel_format) { if (static_cast(pixel_format) < MaxPixelFormat) { return SurfaceType::ColorTexture; @@ -317,220 +233,79 @@ struct SurfaceParams { return SurfaceType::Invalid; } - /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" - /// and "pixel_format" - void UpdateParams() { - if (stride == 0) { - stride = width; - } - type = GetFormatType(pixel_format); - size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) - : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); - end = addr + size; - } - - SurfaceInterval GetInterval() const { - return SurfaceInterval::right_open(addr, end); - } - - // Returns the outer rectangle containing "interval" - SurfaceParams FromInterval(SurfaceInterval interval) const; - - SurfaceInterval GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const; - - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - - /** - * Gets the actual width (in pixels) of the surface. This is provided because `width` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `width` is actually the compressed width. - */ - u32 GetActualWidth() const { - return width * GetCompresssionFactor(); - } - - /** - * Gets the actual height (in pixels) of the surface. This is provided because `height` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `height` is actually the compressed height. - */ - u32 GetActualHeight() const { - return height * GetCompresssionFactor(); - } - - u32 GetScaledWidth() const { - return width * res_scale; - } - - u32 GetScaledHeight() const { - return height * res_scale; - } - MathUtil::Rectangle GetRect() const { return {0, height, width, 0}; } - MathUtil::Rectangle GetScaledRect() const { - return {0, GetScaledHeight(), GetScaledWidth(), 0}; - } - - u64 PixelsInBytes(u64 size) const { - return size * CHAR_BIT / GetFormatBpp(pixel_format); - } - - u64 BytesInPixels(u64 pixels) const { - return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + size_t SizeInBytes() const { + const u32 compression_factor{GetCompressionFactor()}; + ASSERT(width % compression_factor == 0); + ASSERT(height % compression_factor == 0); + return (width / compression_factor) * (height / compression_factor) * + GetFormatBpp(pixel_format) / CHAR_BIT; } VAddr GetCpuAddr() const; - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - - Tegra::GPUVAddr addr = 0; - Tegra::GPUVAddr end = 0; - boost::optional cpu_addr; - u64 size = 0; - - u32 width = 0; - u32 height = 0; - u32 stride = 0; - u32 block_height = 0; - u16 res_scale = 1; - - bool is_tiled = false; - PixelFormat pixel_format = PixelFormat::Invalid; - SurfaceType type = SurfaceType::Invalid; - ComponentType component_type = ComponentType::Invalid; + Tegra::GPUVAddr addr; + u32 width; + u32 height; + u32 block_height; + bool is_tiled; + PixelFormat pixel_format; + SurfaceType type; + ComponentType component_type; }; +static_assert(std::is_pod::value, "SurfaceParams is not POD"); -struct CachedSurface : SurfaceParams { - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } +class CachedSurface final { +public: + CachedSurface(const SurfaceParams& params); - bool IsSurfaceFullyInvalid() const { - return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + const OGLTexture& Texture() const { + return texture; } - bool registered = false; - SurfaceRegions invalid_regions; - - u64 fill_size = 0; /// Number of bytes to read from fill_data - std::array fill_data; - - OGLTexture texture; - - static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { - if (format == PixelFormat::Invalid) + static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { + if (format == SurfaceParams::PixelFormat::Invalid) return 0; return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; } - std::unique_ptr gl_buffer; - size_t gl_buffer_size = 0; + const SurfaceParams& GetSurfaceParams() const { + return params; + } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); - void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); + void LoadGLBuffer(); + void FlushGLBuffer(); // Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); - void DownloadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); + void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + +private: + OGLTexture texture; + std::unique_ptr gl_buffer; + size_t gl_buffer_size; + SurfaceParams params; }; -class RasterizerCacheOpenGL : NonCopyable { +class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); - ~RasterizerCacheOpenGL(); - - /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, - const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); - - void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle& src_rect, - GLuint dst_tex, const MathUtil::Rectangle& dst_rect); - /// Copy one surface's region to another - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval); - - /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Tries to find a framebuffer GPU address based on the provided CPU address - boost::optional TryFindFramebufferGpuAddress(VAddr cpu_addr) const; - - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// Switch memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); - - /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport); - - /// Get a surface that matches the fill config - Surface GetFillSurface(const void* config); - - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - - /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); - - /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); - - /// Flush all cached resources tracked by this cache manager - void FlushAll(); + void LoadSurface(const Surface& surface); + void FlushSurface(const Surface& surface); private: - void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - - /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); - - /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); - - /// Register surface into the cache - void RegisterSurface(const Surface& surface); - - /// Remove surface from the cache - void UnregisterSurface(const Surface& surface); - - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - - SurfaceCache surface_cache; - PageMap cached_pages; - SurfaceMap dirty_regions; - SurfaceSet remove_surfaces; + Surface GetSurface(const SurfaceParams& params); + std::map surface_cache; OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; - - OGLVertexArray attributeless_vao; - OGLBuffer d24s8_abgr_buffer; - GLsizeiptr d24s8_abgr_buffer_size; - OGLProgram d24s8_abgr_shader; - GLint d24s8_abgr_tbo_size_u_id; - GLint d24s8_abgr_viewport_u_id; }; -- cgit v1.2.3 From 9f2f819bb631cc8a503ff87175eed69cb78cc9e4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 24 Jun 2018 09:50:08 -0400 Subject: gl_rasterizer_cache: Refactor to make SurfaceParams members const. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 63 +++++++++------------- .../renderer_opengl/gl_rasterizer_cache.h | 26 ++++----- 2 files changed, 37 insertions(+), 52 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5fb099d8d..779ab5ab4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -29,6 +29,28 @@ struct FormatTuple { bool compressed; }; +SurfaceParams::SurfaceParams(const Tegra::Texture::FullTextureInfo& config) + : addr(config.tic.Address()), is_tiled(config.tic.IsTiled()), + block_height(is_tiled ? config.tic.BlockHeight() : 0), + pixel_format(PixelFormatFromTextureFormat(config.tic.format)), + component_type(ComponentTypeFromTexture(config.tic.r_type.Value())), + type(GetFormatType(pixel_format)), + width(Common::AlignUp(config.tic.Width(), GetCompressionFactor(pixel_format))), + height(Common::AlignUp(config.tic.Height(), GetCompressionFactor(pixel_format))) { + + // TODO(Subv): Different types per component are not supported. + ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && + config.tic.r_type.Value() == config.tic.b_type.Value() && + config.tic.r_type.Value() == config.tic.a_type.Value()); +} + +SurfaceParams::SurfaceParams(const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) + : addr(config.Address()), is_tiled(true), + block_height(Tegra::Texture::TICEntry::DefaultBlockHeight), + pixel_format(PixelFormatFromRenderTargetFormat(config.format)), + component_type(ComponentTypeFromRenderTarget(config.format)), + type(GetFormatType(pixel_format)), width(config.width), height(config.height) {} + static constexpr std::array tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 @@ -333,57 +355,20 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - auto& gpu = Core::System::GetInstance().GPU(); - - SurfaceParams params; - params.addr = config.tic.Address(); - params.is_tiled = config.tic.IsTiled(); - params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); - params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - params.type = SurfaceParams::GetFormatType(params.pixel_format); - params.width = Common::AlignUp(config.tic.Width(), params.GetCompressionFactor()); - params.height = Common::AlignUp(config.tic.Height(), params.GetCompressionFactor()); - - if (params.is_tiled) { - params.block_height = config.tic.BlockHeight(); - } - - // TODO(Subv): Different types per component are not supported. - ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && - config.tic.r_type.Value() == config.tic.b_type.Value() && - config.tic.r_type.Value() == config.tic.a_type.Value()); - - return GetSurface(params); + return GetSurface(SurfaceParams(config)); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& config = regs.rt[0]; // TODO(bunnei): This is hard corded to use just the first render buffer NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); - MathUtil::Rectangle viewport_clamped{ - static_cast(std::clamp(viewport.left, 0, static_cast(config.width))), - static_cast(std::clamp(viewport.top, 0, static_cast(config.height))), - static_cast(std::clamp(viewport.right, 0, static_cast(config.width))), - static_cast(std::clamp(viewport.bottom, 0, static_cast(config.height)))}; - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.width = config.width; - color_params.height = config.height; - // TODO(Subv): Can framebuffers use a different block height? - color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + SurfaceParams color_params(regs.rt[0]); SurfaceParams depth_params = color_params; - color_params.addr = config.Address(); - color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); - color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); - color_params.type = SurfaceParams::GetFormatType(color_params.pixel_format); - ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); MathUtil::Rectangle color_rect{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index ca9945df4..9878bf9bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" @@ -83,9 +84,6 @@ struct SurfaceParams { ASSERT(static_cast(format) < compression_factor_table.size()); return compression_factor_table[static_cast(format)]; } - u32 GetCompressionFactor() const { - return GetCompressionFactor(pixel_format); - } static constexpr u32 GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) @@ -238,25 +236,27 @@ struct SurfaceParams { } size_t SizeInBytes() const { - const u32 compression_factor{GetCompressionFactor()}; + const u32 compression_factor{GetCompressionFactor(pixel_format)}; ASSERT(width % compression_factor == 0); ASSERT(height % compression_factor == 0); return (width / compression_factor) * (height / compression_factor) * GetFormatBpp(pixel_format) / CHAR_BIT; } + SurfaceParams(const Tegra::Texture::FullTextureInfo& config); + SurfaceParams(const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + VAddr GetCpuAddr() const; - Tegra::GPUVAddr addr; - u32 width; - u32 height; - u32 block_height; - bool is_tiled; - PixelFormat pixel_format; - SurfaceType type; - ComponentType component_type; + const Tegra::GPUVAddr addr; + const bool is_tiled; + const u32 block_height; + const PixelFormat pixel_format; + const ComponentType component_type; + const SurfaceType type; + const u32 width; + const u32 height; }; -static_assert(std::is_pod::value, "SurfaceParams is not POD"); class CachedSurface final { public: -- cgit v1.2.3 From ff6785f3e8547878d26c2571513a5596e5c9ddd5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 24 Jun 2018 17:28:06 -0400 Subject: gl_rasterizer_cache: Cache size_in_bytes as a const per surface. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 21 ++++++++++++--------- .../renderer_opengl/gl_rasterizer_cache.h | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 779ab5ab4..882490f47 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -36,7 +36,8 @@ SurfaceParams::SurfaceParams(const Tegra::Texture::FullTextureInfo& config) component_type(ComponentTypeFromTexture(config.tic.r_type.Value())), type(GetFormatType(pixel_format)), width(Common::AlignUp(config.tic.Width(), GetCompressionFactor(pixel_format))), - height(Common::AlignUp(config.tic.Height(), GetCompressionFactor(pixel_format))) { + height(Common::AlignUp(config.tic.Height(), GetCompressionFactor(pixel_format))), + size_in_bytes(SizeInBytes()) { // TODO(Subv): Different types per component are not supported. ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && @@ -49,7 +50,8 @@ SurfaceParams::SurfaceParams(const Tegra::Engines::Maxwell3D::Regs::RenderTarget block_height(Tegra::Texture::TICEntry::DefaultBlockHeight), pixel_format(PixelFormatFromRenderTargetFormat(config.format)), component_type(ComponentTypeFromRenderTarget(config.format)), - type(GetFormatType(pixel_format)), width(config.width), height(config.height) {} + type(GetFormatType(pixel_format)), width(config.width), height(config.height), + size_in_bytes(SizeInBytes()) {} static constexpr std::array tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 @@ -70,8 +72,8 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are - // type FLOAT + // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which + // are type FLOAT ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || pixel_format == PixelFormat::R11FG11FB10F); return tex_format_tuples[static_cast(pixel_format)]; @@ -127,14 +129,15 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra:: SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); if (IsPixelFormatASTC(format)) { - // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this + // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support + // this ConvertASTCToRGBA8(data, format, stride, height); } std::memcpy(gl_buffer, data.data(), data.size()); } else { - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should + // check the configuration for this and perform more generic un/swizzle NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, @@ -243,7 +246,7 @@ void CachedSurface::FlushGLBuffer() { MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); if (!params.is_tiled) { - std::memcpy(dst_buffer, &gl_buffer[0], params.SizeInBytes()); + std::memcpy(dst_buffer, &gl_buffer[0], params.size_in_bytes); } else { gl_to_morton_fns[static_cast(params.pixel_format)]( params.width, params.block_height, params.height, &gl_buffer[0], params.addr); @@ -283,7 +286,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle if (tuple.compressed) { glCompressedTexImage2D( GL_TEXTURE_2D, 0, tuple.internal_format, static_cast(params.width), - static_cast(params.height), 0, static_cast(params.SizeInBytes()), + static_cast(params.height), 0, static_cast(params.size_in_bytes), &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9878bf9bf..9f1209b0f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -256,6 +256,7 @@ struct SurfaceParams { const SurfaceType type; const u32 width; const u32 height; + const size_t size_in_bytes; }; class CachedSurface final { -- cgit v1.2.3 From 3f9f047375dd9aae7eadcb957747fa8db01544bf Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 24 Jun 2018 17:42:29 -0400 Subject: gl_rasterizer: Implement AccelerateDisplay to forward textures to framebuffers. --- src/video_core/rasterizer_interface.h | 5 ++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 ++++++++++++++++++--- .../renderer_opengl/gl_rasterizer_cache.cpp | 26 ++++++++++++++++++++++ .../renderer_opengl/gl_rasterizer_cache.h | 11 +++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 1 - src/video_core/renderer_opengl/renderer_opengl.h | 2 +- 6 files changed, 62 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f0e48a802..145e58334 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -51,9 +51,8 @@ public: } /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, - VAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) { + virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bc463fc30..f9b0ce434 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -468,11 +468,30 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { return true; } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - // TODO(bunnei): ImplementMe - return false; + if (!framebuffer_addr) { + return {}; + } + + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; + if (!surface) { + return {}; + } + + // Verify that the cached surface is the same size and format as the requested framebuffer + const auto& params{surface->GetSurfaceParams()}; + const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)}; + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); + + screen_info.display_texture = surface->Texture().handle; + + return true; } void RasterizerOpenGL::SamplerInfo::Create() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 882490f47..919931d64 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -436,3 +436,29 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { return surface; } + +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { + // Tries to find the GPU address of a framebuffer based on the CPU address. This is because + // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU + // addresses. We iterate through all cached framebuffers, and compare their starting CPU address + // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps + // surfaces. + + std::vector surfaces; + for (const auto& surface : surface_cache) { + const auto& params = surface.second->GetSurfaceParams(); + const VAddr surface_cpu_addr = params.GetCpuAddr(); + if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { + ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); + surfaces.push_back(surface.second); + } + } + + if (surfaces.empty()) { + return {}; + } + + ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); + + return surfaces[0]; +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9f1209b0f..53ff2722d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -220,6 +220,16 @@ struct SurfaceParams { } } + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return PixelFormat::ABGR8; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); + UNREACHABLE(); + } + } + static SurfaceType GetFormatType(PixelFormat pixel_format) { if (static_cast(pixel_format) < MaxPixelFormat) { return SurfaceType::ColorTexture; @@ -302,6 +312,7 @@ public: const MathUtil::Rectangle& viewport); void LoadSurface(const Surface& surface); void FlushSurface(const Surface& surface); + Surface TryFindFramebufferSurface(VAddr cpu_addr) const; private: Surface GetSurface(const SurfaceParams& params); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f33766bfd..e3bb2cbb8 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, Memory::FlushMode::Flush); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 2cc6d9a00..21f0d298c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -27,7 +27,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - MathUtil::Rectangle display_texcoords; + const MathUtil::Rectangle display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; -- cgit v1.2.3 From 6a28a66832c9ea98738ce5cf8527bc83f74c40f6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 26 Jun 2018 14:38:53 -0400 Subject: maxwell_3d: Add a struct for RenderTargetConfig. --- src/video_core/engines/maxwell_3d.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2dc251205..180be4ff4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -321,6 +321,24 @@ public: INSERT_PADDING_WORDS(1); }; + struct RenderTargetConfig { + u32 address_high; + u32 address_low; + u32 width; + u32 height; + Tegra::RenderTargetFormat format; + u32 block_dimensions; + u32 array_mode; + u32 layer_stride; + u32 base_layer; + INSERT_PADDING_WORDS(7); + + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + }; + union { struct { INSERT_PADDING_WORDS(0x45); @@ -333,23 +351,7 @@ public: INSERT_PADDING_WORDS(0x1B8); - struct { - u32 address_high; - u32 address_low; - u32 width; - u32 height; - Tegra::RenderTargetFormat format; - u32 block_dimensions; - u32 array_mode; - u32 layer_stride; - u32 base_layer; - INSERT_PADDING_WORDS(7); - - GPUVAddr Address() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - } rt[NumRenderTargets]; + RenderTargetConfig rt[NumRenderTargets]; struct { f32 scale_x; -- cgit v1.2.3 From c7c379bd1986830c9fc370ce581710d1098c975c Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 26 Jun 2018 14:59:45 -0400 Subject: gl_rasterizer_cache: Use SurfaceParams as a key for surface caching. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 69 ++++++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 46 +++++++++++---- 2 files changed, 72 insertions(+), 43 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 919931d64..59f1a89c9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -29,29 +29,37 @@ struct FormatTuple { bool compressed; }; -SurfaceParams::SurfaceParams(const Tegra::Texture::FullTextureInfo& config) - : addr(config.tic.Address()), is_tiled(config.tic.IsTiled()), - block_height(is_tiled ? config.tic.BlockHeight() : 0), - pixel_format(PixelFormatFromTextureFormat(config.tic.format)), - component_type(ComponentTypeFromTexture(config.tic.r_type.Value())), - type(GetFormatType(pixel_format)), - width(Common::AlignUp(config.tic.Width(), GetCompressionFactor(pixel_format))), - height(Common::AlignUp(config.tic.Height(), GetCompressionFactor(pixel_format))), - size_in_bytes(SizeInBytes()) { - - // TODO(Subv): Different types per component are not supported. - ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && - config.tic.r_type.Value() == config.tic.b_type.Value() && - config.tic.r_type.Value() == config.tic.a_type.Value()); +/*static*/ SurfaceParams SurfaceParams::CreateForTexture( + const Tegra::Texture::FullTextureInfo& config) { + + SurfaceParams params{}; + params.addr = config.tic.Address(); + params.is_tiled = config.tic.IsTiled(); + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.size_in_bytes = params.SizeInBytes(); + return params; } -SurfaceParams::SurfaceParams(const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) - : addr(config.Address()), is_tiled(true), - block_height(Tegra::Texture::TICEntry::DefaultBlockHeight), - pixel_format(PixelFormatFromRenderTargetFormat(config.format)), - component_type(ComponentTypeFromRenderTarget(config.format)), - type(GetFormatType(pixel_format)), width(config.width), height(config.height), - size_in_bytes(SizeInBytes()) {} +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { + + SurfaceParams params{}; + params.addr = config.Address(); + params.is_tiled = true; + params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.size_in_bytes = params.SizeInBytes(); + return params; +} static constexpr std::array tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 @@ -358,7 +366,7 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return GetSurface(SurfaceParams(config)); + return GetSurface(SurfaceParams::CreateForTexture(config)); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( @@ -369,8 +377,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); // get color and depth surfaces - SurfaceParams color_params(regs.rt[0]); - SurfaceParams depth_params = color_params; + const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])}; + const SurfaceParams depth_params{color_params}; ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); @@ -423,13 +431,14 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { } // Check for an exact match in existing surfaces - auto search = surface_cache.find(params.addr); + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; Surface surface; if (search != surface_cache.end()) { surface = search->second; } else { surface = std::make_shared(params); - surface_cache[params.addr] = surface; + surface_cache[surface_key] = surface; } LoadSurface(surface); @@ -439,10 +448,10 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { // Tries to find the GPU address of a framebuffer based on the CPU address. This is because - // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU - // addresses. We iterate through all cached framebuffers, and compare their starting CPU address - // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps - // surfaces. + // final output framebuffers are specified by CPU address, but internally our GPU cache uses + // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU + // address to the one provided. This is obviously not great, and won't work if the + // framebuffer overlaps surfaces. std::vector surfaces; for (const auto& surface : surface_cache) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 53ff2722d..bf36f6c24 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -9,6 +9,7 @@ #include #include "common/common_types.h" +#include "common/hash.h" #include "common/math_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -253,21 +254,40 @@ struct SurfaceParams { GetFormatBpp(pixel_format) / CHAR_BIT; } - SurfaceParams(const Tegra::Texture::FullTextureInfo& config); - SurfaceParams(const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); - VAddr GetCpuAddr() const; - const Tegra::GPUVAddr addr; - const bool is_tiled; - const u32 block_height; - const PixelFormat pixel_format; - const ComponentType component_type; - const SurfaceType type; - const u32 width; - const u32 height; - const size_t size_in_bytes; + static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + + static SurfaceParams CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + + Tegra::GPUVAddr addr; + bool is_tiled; + u32 block_height; + PixelFormat pixel_format; + ComponentType component_type; + SurfaceType type; + u32 width; + u32 height; + size_t size_in_bytes; +}; + +struct SurfaceKey : Common::HashableStruct { + static SurfaceKey Create(const SurfaceParams& params) { + SurfaceKey res; + res.state = params; + return res; + } +}; + +namespace std { +template <> +struct hash { + size_t operator()(const SurfaceKey& k) const { + return k.Hash(); + } }; +} // namespace std class CachedSurface final { public: @@ -317,7 +337,7 @@ public: private: Surface GetSurface(const SurfaceParams& params); - std::map surface_cache; + std::unordered_map surface_cache; OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; }; -- cgit v1.2.3 From 8af1ae46aa5a9303b21839b446d2ebf17ee12802 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 26 Jun 2018 15:05:13 -0400 Subject: gl_rasterizer_cache: Various fixes for ASTC handling. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 63 ++++++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 11 ++-- 2 files changed, 39 insertions(+), 35 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 59f1a89c9..bd35bdb02 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -41,6 +41,7 @@ struct FormatTuple { params.type = GetFormatType(params.pixel_format); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.unaligned_height = config.tic.Height(); params.size_in_bytes = params.SizeInBytes(); return params; } @@ -57,6 +58,7 @@ struct FormatTuple { params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; + params.unaligned_height = config.height; params.size_in_bytes = params.SizeInBytes(); return params; } @@ -108,20 +110,29 @@ static bool IsPixelFormatASTC(PixelFormat format) { } } -static void ConvertASTCToRGBA8(std::vector& data, PixelFormat format, u32 width, u32 height) { - u32 block_width{}; - u32 block_height{}; - +static std::pair GetASTCBlockSize(PixelFormat format) { switch (format) { case PixelFormat::ASTC_2D_4X4: - block_width = 4; - block_height = 4; - break; + return {4, 4}; default: NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast(format)); UNREACHABLE(); } +} +MathUtil::Rectangle SurfaceParams::GetRect() const { + u32 actual_height{unaligned_height}; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC formats must stop at the ATSC block size boundary + actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); + } + return {0, actual_height, width, 0}; +} + +static void ConvertASTCToRGBA8(std::vector& data, PixelFormat format, u32 width, u32 height) { + u32 block_width{}; + u32 block_height{}; + std::tie(block_width, block_height) = GetASTCBlockSize(format); data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); } @@ -136,12 +147,6 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra:: *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - if (IsPixelFormatASTC(format)) { - // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support - // this - ConvertASTCToRGBA8(data, format, stride, height); - } - std::memcpy(gl_buffer, data.data(), data.size()); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should @@ -212,9 +217,10 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { texture.Create(); + const auto& rect{params.GetRect()}; AllocateSurfaceTexture(texture.handle, - GetFormatTuple(params.pixel_format, params.component_type), params.width, - params.height); + GetFormatTuple(params.pixel_format, params.component_type), + rect.GetWidth(), rect.GetHeight()); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); @@ -225,21 +231,23 @@ void CachedSurface::LoadGLBuffer() { ASSERT(texture_src_data); - if (!gl_buffer) { - gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); if (!params.is_tiled) { const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; - std::memcpy(&gl_buffer[0], texture_src_data, + std::memcpy(gl_buffer.data(), texture_src_data, bytes_per_pixel * params.width * params.height); } else { morton_to_gl_fns[static_cast(params.pixel_format)]( - params.width, params.block_height, params.height, &gl_buffer[0], params.addr); + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + } + + if (IsPixelFormatASTC(params.pixel_format)) { + // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this + ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height); } } @@ -248,16 +256,16 @@ void CachedSurface::FlushGLBuffer() { u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); ASSERT(dst_buffer); - ASSERT(gl_buffer_size == + ASSERT(gl_buffer.size() == params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); if (!params.is_tiled) { - std::memcpy(dst_buffer, &gl_buffer[0], params.size_in_bytes); + std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { gl_to_morton_fns[static_cast(params.pixel_format)]( - params.width, params.block_height, params.height, &gl_buffer[0], params.addr); + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } } @@ -268,7 +276,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer_size == + ASSERT(gl_buffer.size() == params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); const auto& rect{params.GetRect()}; @@ -315,10 +323,7 @@ void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_hand MICROPROFILE_SCOPE(OpenGL_TextureDL); - if (!gl_buffer) { - gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index bf36f6c24..84bdec652 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "common/common_types.h" #include "common/hash.h" @@ -79,7 +80,7 @@ struct SurfaceParams { 4, // DXT23 4, // DXT45 4, // DXN1 - 1, // ASTC_2D_4X4 + 4, // ASTC_2D_4X4 }}; ASSERT(static_cast(format) < compression_factor_table.size()); @@ -242,9 +243,7 @@ struct SurfaceParams { return SurfaceType::Invalid; } - MathUtil::Rectangle GetRect() const { - return {0, height, width, 0}; - } + MathUtil::Rectangle GetRect() const; size_t SizeInBytes() const { const u32 compression_factor{GetCompressionFactor(pixel_format)}; @@ -269,6 +268,7 @@ struct SurfaceParams { SurfaceType type; u32 width; u32 height; + u32 unaligned_height; size_t size_in_bytes; }; @@ -318,8 +318,7 @@ public: private: OGLTexture texture; - std::unique_ptr gl_buffer; - size_t gl_buffer_size; + std::vector gl_buffer; SurfaceParams params; }; -- cgit v1.2.3 From 1dd754590fb9850bf00ddacbb860076dbbacabc6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 26 Jun 2018 16:14:14 -0400 Subject: gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces. gl_rasterizer_cache: Improved cache management based on Citra's implementation. gl_surface_cache: Add some docstrings. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 +++-- .../renderer_opengl/gl_rasterizer_cache.cpp | 116 +++++++++++++++++++-- .../renderer_opengl/gl_rasterizer_cache.h | 43 +++++++- 3 files changed, 168 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b0ce434..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() { // Mark framebuffer surfaces as dirty if (color_surface != nullptr && write_color_fb) { - res_cache.FlushSurface(color_surface); + res_cache.MarkSurfaceAsDirty(color_surface); } if (depth_surface != nullptr && write_depth_fb) { - res_cache.FlushSurface(depth_surface); + res_cache.MarkSurfaceAsDirty(depth_surface); } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} -void RasterizerOpenGL::FlushAll() {} +void RasterizerOpenGL::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); +} -void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} -void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size); +} -void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size); +} bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd35bdb02..71ad7be74 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/textures/astc.h" @@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { texture.Create(); const auto& rect{params.GetRect()}; AllocateSurfaceTexture(texture.handle, @@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { draw_framebuffer.Create(); } +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + while (!surface_cache.empty()) { + UnregisterSurface(surface_cache.begin()->second); + } +} + Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { return GetSurface(SurfaceParams::CreateForTexture(config)); } @@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { - surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); - surface->FlushGLBuffer(); +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { + if (Settings::values.use_accurate_framebuffers) { + // If enabled, always flush dirty surfaces + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); + } else { + // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads + // and flushes are very slow and do not seem to improve accuracy + const auto& params{surface->GetSurfaceParams()}; + Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); + } } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { @@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { Surface surface; if (search != surface_cache.end()) { surface = search->second; + if (Settings::values.use_accurate_framebuffers) { + // Reload the surface from Switch memory + LoadSurface(surface); + } } else { surface = std::make_shared(params); - surface_cache[surface_key] = surface; + RegisterSurface(surface); + LoadSurface(surface); } - LoadSurface(surface); - return surface; } @@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { return surfaces[0]; } + +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { + // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should + // probably implement this in the future, but for now, the `use_accurate_framebufers` setting + // can be used to always flush. +} + +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { + for (const auto& pair : surface_cache) { + const auto& surface{pair.second}; + const auto& params{surface->GetSurfaceParams()}; + + if (params.IsOverlappingRegion(addr, size)) { + UnregisterSurface(surface); + } + } +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + + if (search != surface_cache.end()) { + // Registered already + return; + } + + surface_cache[surface_key] = surface; + UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + + if (search == surface_cache.end()) { + // Unregistered already + return; + } + + UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); + surface_cache.erase(search); +} + +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { + const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - + (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; + const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) + << Tegra::MemoryManager::PAGE_BITS; + const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) + << Tegra::MemoryManager::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 84bdec652..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -8,7 +8,7 @@ #include #include #include - +#include #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -19,6 +19,7 @@ class CachedSurface; using Surface = std::shared_ptr; using SurfaceSurfaceRect_Tuple = std::tuple>; +using PageMap = boost::icl::interval_map; struct SurfaceParams { enum class PixelFormat { @@ -243,8 +244,10 @@ struct SurfaceParams { return SurfaceType::Invalid; } + /// Returns the rectangle corresponding to this surface MathUtil::Rectangle GetRect() const; + /// Returns the size of this surface in bytes, adjusted for compression size_t SizeInBytes() const { const u32 compression_factor{GetCompressionFactor(pixel_format)}; ASSERT(width % compression_factor == 0); @@ -253,10 +256,18 @@ struct SurfaceParams { GetFormatBpp(pixel_format) / CHAR_BIT; } + /// Returns the CPU virtual address for this surface VAddr GetCpuAddr() const; + /// Returns true if the specified region overlaps with this surface's region in Switch memory + bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { + return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); + } + + /// Creates SurfaceParams from a texture configation static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + /// Creates SurfaceParams from a framebuffer configation static SurfaceParams CreateForFramebuffer( const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); @@ -272,6 +283,7 @@ struct SurfaceParams { size_t size_in_bytes; }; +/// Hashable variation of SurfaceParams, used for a key in the surface cache struct SurfaceKey : Common::HashableStruct { static SurfaceKey Create(const SurfaceParams& params) { SurfaceKey res; @@ -325,18 +337,43 @@ private: class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); + ~RasterizerCacheOpenGL(); + /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); + + /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport); - void LoadSurface(const Surface& surface); - void FlushSurface(const Surface& surface); + + /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory + void MarkSurfaceAsDirty(const Surface& surface); + + /// Tries to find a framebuffer GPU address based on the provided CPU address Surface TryFindFramebufferSurface(VAddr cpu_addr) const; + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(Tegra::GPUVAddr addr, size_t size); + + /// Mark the specified region as being invalidated + void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); + private: + void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params); + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); + std::unordered_map surface_cache; + PageMap cached_pages; + OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; }; -- cgit v1.2.3 From c18425ef989fd0c7f9bc1bdf4ba6b5e9235a8193 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 29 Jun 2018 13:08:08 -0400 Subject: gl_rasterizer_cache: Only dereference color_surface/depth_surface if valid. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 71ad7be74..63f5999ea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -398,14 +398,18 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( Surface color_surface; if (using_color_fb) { color_surface = GetSurface(color_params); - color_rect = color_surface->GetSurfaceParams().GetRect(); + if (color_surface) { + color_rect = color_surface->GetSurfaceParams().GetRect(); + } } MathUtil::Rectangle depth_rect{}; Surface depth_surface; if (using_depth_fb) { depth_surface = GetSurface(depth_params); - depth_rect = depth_surface->GetSurfaceParams().GetRect(); + if (depth_surface) { + depth_rect = depth_surface->GetSurfaceParams().GetRect(); + } } MathUtil::Rectangle fb_rect{}; -- cgit v1.2.3