diff options
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 117 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1262 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 323 |
4 files changed, 210 insertions, 1494 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fbf8e1f9..bc463fc30 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - res_cache.FlushRegion(start, size, nullptr); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. @@ -325,29 +324,22 @@ void RasterizerOpenGL::DrawArrays() { std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); - const u16 res_scale = color_surface != nullptr - ? color_surface->res_scale - : (depth_surface == nullptr ? 1u : depth_surface->res_scale); - MathUtil::Rectangle<u32> draw_rect{ + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, + surfaces_rect.left, surfaces_rect.right)), // Right static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); // Sync the viewport - SyncViewport(surfaces_rect, res_scale); + SyncViewport(surfaces_rect); // Sync the blend state registers SyncBlendState(); @@ -442,44 +434,23 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Mark framebuffer surfaces as dirty - MathUtil::Rectangle<u32> draw_rect_unscaled{ - draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, - draw_rect.bottom / res_scale}; - if (color_surface != nullptr && write_color_fb) { - auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - color_surface); + res_cache.FlushSurface(color_surface); } if (depth_surface != nullptr && write_depth_fb) { - auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - depth_surface); + res_cache.FlushSurface(depth_surface); } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} -void RasterizerOpenGL::FlushAll() { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushAll(); -} +void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); -} +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} -void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); -} +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} -void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); -} +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); @@ -500,44 +471,8 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - if (framebuffer_addr == 0) { - return false; - } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - SurfaceParams src_params; - src_params.cpu_addr = framebuffer_addr; - src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); - src_params.width = std::min(framebuffer.width, pixel_stride); - src_params.height = framebuffer.height; - src_params.stride = pixel_stride; - src_params.is_tiled = true; - src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - src_params.pixel_format = - SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); - src_params.component_type = - SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); - src_params.UpdateParams(); - - MathUtil::Rectangle<u32> src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { - return false; - } - - u32 scaled_width = src_surface->GetScaledWidth(); - u32 scaled_height = src_surface->GetScaledHeight(); - - screen_info.display_texcoords = MathUtil::Rectangle<float>( - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - - screen_info.display_texture = src_surface->texture.handle; - - return true; + // TODO(bunnei): ImplementMe + return false; } void RasterizerOpenGL::SamplerInfo::Create() { @@ -674,7 +609,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; + state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle; state.texture_units[current_bindpoint].swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); state.texture_units[current_bindpoint].swizzle.g = @@ -700,16 +635,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); + color_surface != nullptr ? color_surface->Texture().handle : 0, 0); if (depth_surface != nullptr) { if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); } else { // attach depth glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); // clear stencil attachment glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } @@ -720,14 +655,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, } } -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; - state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; - state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); - state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); + state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; + state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; + state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); + state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4762983c9..621200f03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -109,7 +109,7 @@ private: u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); /// Syncs the viewport to match the guest state - void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); + void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 857164ff6..5fb099d8d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1,37 +1,22 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <algorithm> -#include <atomic> -#include <cstring> -#include <iterator> -#include <memory> -#include <utility> -#include <vector> -#include <boost/optional.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> + #include "common/alignment.h" -#include "common/bit_field.h" -#include "common/color.h" -#include "common/logging/log.h" -#include "common/math_util.h" +#include "common/assert.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" #include "video_core/utils.h" -#include "video_core/video_core.h" using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; @@ -77,15 +62,18 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return {}; } -template <typename Map, typename Interval> -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); +VAddr SurfaceParams::GetCpuAddr() const { + const auto& gpu = Core::System::GetInstance().GPU(); + return *gpu.memory_manager->GpuToCpuAddress(addr); } -static u16 GetResolutionScaleFactor() { - return static_cast<u16>(!Settings::values.resolution_factor - ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() - : Settings::values.resolution_factor); +static bool IsPixelFormatASTC(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + return true; + default: + return false; + } } static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) { @@ -106,18 +94,17 @@ static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 wi } template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, - Tegra::GPUVAddr start, Tegra::GPUVAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(base), + *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - if (SurfaceParams::IsFormatASTC(format)) { + if (IsPixelFormatASTC(format)) { // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this ConvertASTCToRGBA8(data, format, stride, height); } @@ -129,13 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra:: NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, @@ -146,8 +132,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy<false, PixelFormat::ABGR8>, @@ -192,374 +177,76 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, - GLuint read_fb_handle, GLuint draw_fb_handle) { - - glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, - GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), - src_rect.GetHeight(), 0); - return true; -} - -static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - UNREACHABLE(); - return {}; -} - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { - SurfaceParams params = *this; - const u32 tiled_size = is_tiled ? 8 : 1; - const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - Tegra::GPUVAddr aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - Tegra::GPUVAddr aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - - if (aligned_end - aligned_start > stride_tiled_bytes) { - params.addr = aligned_start; - params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); - } else { - // 1 row - ASSERT(aligned_end - aligned_start == stride_tiled_bytes); - const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); - aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); - aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); - params.addr = aligned_start; - params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); - params.stride = params.width; - params.height = tiled_size; - } - params.UpdateParams(); - - return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { - if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { - return {}; - } - - if (is_tiled) { - unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; - unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; - unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; - unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; - } - - const u32 stride_tiled = !is_tiled ? stride : stride * 8; - - const u32 pixel_offset = - stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + - unscaled_rect.left; - - const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - - return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { - const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); - - if (is_tiled) { - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - const int y0 = (begin_pixel_index / (stride * 8)) * 8; - // Top to bottom - return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); - } - - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - // Bottom to top - return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { - auto rect = GetSubRect(sub_surface); - rect.left = rect.left * res_scale; - rect.right = rect.right * res_scale; - rect.top = rect.top * res_scale; - rect.bottom = rect.bottom * res_scale; - return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.block_height, other_surface.pixel_format, - other_surface.component_type, - other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, - pixel_format, component_type, is_tiled) && - pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && - sub_surface.component_type == component_type && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).left + sub_surface.width <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && - component_type == expanded_surface.component_type && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { - if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end) { - return false; - } - if (texcopy_params.block_height != block_height || - texcopy_params.component_type != component_type) - return false; - - if (texcopy_params.width != texcopy_params.stride) { - const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); - return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && - ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } - return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -VAddr SurfaceParams::GetCpuAddr() const { - // When this function is used, only cpu_addr or (GPU) addr should be set, not both - ASSERT(!(cpu_addr && addr)); - const auto& gpu = Core::System::GetInstance().GPU(); - return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); - std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - - ASSERT(src_surface != dst_surface); - - // This is only called when CanCopy is true, no need to run checks here - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u64 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array<u8, 4> fill_buffer; - - u64 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), - draw_framebuffer.handle); - return; - } - if (src_surface->CanSubRect(subrect_params)) { - BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), - dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), - src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); - return; - } - UNREACHABLE(); +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { + texture.Create(); + AllocateSurfaceTexture(texture.handle, + GetFormatTuple(params.pixel_format, params.component_type), params.width, + params.height); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { - ASSERT(type != SurfaceType::Fill); +void CachedSurface::LoadGLBuffer() { + ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); - if (texture_src_data == nullptr) - return; + u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); - if (gl_buffer == nullptr) { - gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); + ASSERT(texture_src_data); + + if (!gl_buffer) { + gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); } MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - ASSERT(load_start >= addr && load_end <= end); - const u64 start_offset = load_start - addr; - - if (!is_tiled) { - const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + if (!params.is_tiled) { + const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - bytes_per_pixel * width * height); + std::memcpy(&gl_buffer[0], texture_src_data, + bytes_per_pixel * params.width * params.height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, - GetActualHeight(), &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, &gl_buffer[0], params.addr); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); - if (dst_buffer == nullptr) - return; - - ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); - - // TODO: Should probably be done in ::Memory:: and check for other regions too - // same as loadglbuffer() - if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) - flush_end = Memory::VRAM_VADDR_END; +void CachedSurface::FlushGLBuffer() { + u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); - if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) - flush_start = Memory::VRAM_VADDR; + ASSERT(dst_buffer); + ASSERT(gl_buffer_size == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - ASSERT(flush_start >= addr && flush_end <= end); - const u64 start_offset = flush_start - addr; - const u64 end_offset = flush_end - addr; - - if (type == SurfaceType::Fill) { - const u64 coarse_start_offset = start_offset - (start_offset % fill_size); - const u64 backup_bytes = start_offset % fill_size; - std::array<u8, 4> backup_data; - if (backup_bytes) - std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - - for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { - std::memcpy(&dst_buffer[offset], &fill_data[0], - std::min(fill_size, end_offset - offset)); - } - - if (backup_bytes) - std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + if (!params.is_tiled) { + std::memcpy(dst_buffer, &gl_buffer[0], params.SizeInBytes()); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)]( - stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, &gl_buffer[0], params.addr); } } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureUL); ASSERT(gl_buffer_size == - GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + + const auto& rect{params.GetRect()}; // Load data from memory to the surface GLint x0 = static_cast<GLint>(rect.left); GLint y0 = static_cast<GLint>(rect.bottom); - size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in - // surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_tex = cur_state.texture_units[0].texture_2d; @@ -567,15 +254,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width)); glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { - glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, - static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), - static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, - static_cast<GLsizei>(size), &gl_buffer[buffer_offset]); + glCompressedTexImage2D( + GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), + static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.SizeInBytes()), + &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, @@ -586,29 +273,17 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type, read_fb_handle, draw_fb_handle); - } } MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureDL); - if (gl_buffer == nullptr) { - gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + if (!gl_buffer) { + gl_buffer_size = params.width * params.height * GetGLBytesPerPixel(params.pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); } @@ -616,437 +291,45 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); - size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex; - unscaled_tex.Create(); - - MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, - read_fb_handle, draw_fb_handle); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } else { - state.UnbindTexture(texture.handle); - state.draw.read_framebuffer = read_fb_handle; - state.Apply(); - - if (type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - } else if (type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } else { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - } - glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), - static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), - tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} + const auto& rect{params.GetRect()}; + size_t buffer_offset = + (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); -enum class MatchFlags { - None = 0, - Invalid = 1, // Flag that can be applied to other match types, invalid matches require - // validation before they can be used - Exact = 1 << 1, // Surfaces perfectly match - SubRect = 1 << 2, // Surface encompasses params - Copy = 1 << 3, // Surface we can copy from - Expand = 1 << 4, // Surface that can expand params - TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters -}; - -constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); -} - -constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); -} + state.UnbindTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); -/// Get the best surface match (and its match type) for the given flags -template <MatchFlags find_flags> -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - boost::optional<SurfaceInterval> validate_interval = boost::none) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (auto& surface : pair.second) { - bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - (find_flags & MatchFlags::Copy) != MatchFlags::None - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if ((find_flags & check_type) == MatchFlags::None) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } + if (params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } else if (params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); } - return match_surface; + glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), + static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { read_framebuffer.Create(); draw_framebuffer.Create(); - - attributeless_vao.Create(); - - d24s8_abgr_buffer.Create(); - d24s8_abgr_buffer_size = 0; - - const char* vs_source = R"( -#version 330 core -const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); -} -)"; - const char* fs_source = R"( -#version 330 core - -uniform samplerBuffer tbo; -uniform vec2 tbo_size; -uniform vec4 viewport; - -out vec4 color; - -void main() { - vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; - int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); - color = texelFetch(tbo, tbo_offset).rabg; -} -)"; - d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); - - OpenGLState state = OpenGLState::GetCurState(); - GLuint old_program = state.draw.shader_program; - state.draw.shader_program = d24s8_abgr_shader.handle; - state.Apply(); - - GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); - ASSERT(tbo_u_id != -1); - glUniform1i(tbo_u_id, 0); - - state.draw.shader_program = old_program; - state.Apply(); - - d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); - ASSERT(d24s8_abgr_tbo_size_u_id != -1); - d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); - ASSERT(d24s8_abgr_viewport_u_id != -1); -} - -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); -} - -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, - const MathUtil::Rectangle<u32>& dst_rect) { - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type, read_framebuffer.handle, - draw_framebuffer.handle); -} - -void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, - const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_framebuffer.handle; - state.draw.draw_framebuffer = draw_framebuffer.handle; - state.Apply(); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); - - GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; - if (target_pbo_size > d24s8_abgr_buffer_size) { - d24s8_abgr_buffer_size = target_pbo_size * 2; - glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); - } - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, - 0); - glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), - static_cast<GLsizei>(src_rect.GetWidth()), - static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - 0); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - // PBO now contains src_tex in RABG format - state.draw.shader_program = d24s8_abgr_shader.handle; - state.draw.vertex_array = attributeless_vao.handle; - state.viewport.x = static_cast<GLint>(dst_rect.left); - state.viewport.y = static_cast<GLint>(dst_rect.bottom); - state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); - state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); - state.Apply(); - - OGLTexture tbo; - tbo.Create(); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); - - glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), - static_cast<GLfloat>(src_rect.GetHeight())); - glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), - static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), - static_cast<GLfloat>(state.viewport.height)); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find it - // to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( - VAddr cpu_addr) const { - // Tries to find the GPU address of a framebuffer based on the CPU address. This is because - // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU - // addresses. We iterate through all cached framebuffers, and compare their starting CPU address - // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps - // surfaces. - - std::vector<Tegra::GPUVAddr> gpu_addresses; - for (const auto& pair : surface_cache) { - for (const auto& surface : pair.second) { - const VAddr surface_cpu_addr = surface->GetCpuAddr(); - if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { - ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); - gpu_addresses.push_back(surface->addr); - } - } - } - - if (gpu_addresses.empty()) { - return {}; - } - - ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); - return gpu_addresses[0]; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - ASSERT(surface->res_scale < params.res_scale); - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = static_cast<u32>( - new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { @@ -1056,36 +339,21 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.addr = config.tic.Address(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = SurfaceParams::GetFormatType(params.pixel_format); + params.width = Common::AlignUp(config.tic.Width(), params.GetCompressionFactor()); + params.height = Common::AlignUp(config.tic.Height(), params.GetCompressionFactor()); - params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); + if (params.is_tiled) { + params.block_height = config.tic.BlockHeight(); + } // TODO(Subv): Different types per component are not supported. ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && config.tic.r_type.Value() == config.tic.b_type.Value() && config.tic.r_type.Value() == config.tic.a_type.Value()); - params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - - if (config.tic.IsTiled()) { - params.block_height = config.tic.BlockHeight(); - - // TODO(bunnei): The below align up is a hack. This is here because some compressed textures - // are not a multiple of their own compression factor, and so this accounts for that. This - // could potentially result in an extra row of 4px being decoded if a texture is not a - // multiple of 4. - params.width = Common::AlignUp(params.width, 4); - params.height = Common::AlignUp(params.height, 4); - } else { - // Use the texture-provided stride value if the texture isn't tiled. - params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); - } - - params.UpdateParams(); - - return GetSurface(params, ScaleMatch::Ignore, true); + return GetSurface(params); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( @@ -1096,17 +364,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( // TODO(bunnei): This is hard corded to use just the first render buffer NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); - // update resolution_scale_factor and reset cache if changed - // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We - // need to fix this before making the renderer multi-threaded. - static u16 resolution_scale_factor = GetResolutionScaleFactor(); - if (resolution_scale_factor != GetResolutionScaleFactor()) { - resolution_scale_factor = GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - MathUtil::Rectangle<u32> viewport_clamped{ static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), @@ -1116,7 +373,6 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( // get color and depth surfaces SurfaceParams color_params; color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; // TODO(Subv): Can framebuffers use a different block height? @@ -1126,319 +382,69 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.addr = config.Address(); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); - color_params.UpdateParams(); + color_params.type = SurfaceParams::GetFormatType(color_params.pixel_format); ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); - // depth_params.addr = config.GetDepthBufferPhysicalAddress(); - // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - // depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } MathUtil::Rectangle<u32> color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + Surface color_surface; + if (using_color_fb) { + color_surface = GetSurface(color_params); + color_rect = color_surface->GetSurfaceParams().GetRect(); + } MathUtil::Rectangle<u32> depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + Surface depth_surface; + if (using_depth_fb) { + depth_surface = GetSurface(depth_params); + depth_rect = depth_surface->GetSurfaceParams().GetRect(); + } MathUtil::Rectangle<u32> fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { + if (color_surface && depth_surface) { fb_rect = color_rect; // Color and Depth surfaces must have the same dimensions and offsets if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); + color_surface = GetSurface(color_params); + depth_surface = GetSurface(depth_params); + fb_rect = color_surface->GetSurfaceParams().GetRect(); } - } else if (color_surface != nullptr) { + } else if (color_surface) { fb_rect = color_rect; - } else if (depth_surface != nullptr) { + } else if (depth_surface) { fb_rect = depth_rect; } - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - return std::make_tuple(color_surface, depth_surface, fb_rect); } -Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNREACHABLE(); - return {}; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle<u32> rect{}; - - Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = - static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); - match_subrect.stride = - static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, - u64 size) { - if (size == 0) - return; - - const SurfaceInterval validate_interval(addr, addr + size); - - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - while (true) { - const auto it = surface->invalid_regions.find(validate_interval); - if (it == surface->invalid_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = *surface; - - Surface copy_surface = - FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(copy_interval); - continue; - } - - // Load data from Switch memory - FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - surface->invalid_regions.erase(params.GetInterval()); - } -} - -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { - if (size == 0) - return; - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to access - // that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - } - surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); - flushed_intervals += interval; - } - // Reset dirty regions - dirty_regions -= flushed_intervals; +void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { + surface->LoadGLBuffer(); + surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); +void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); } -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, - const Surface& region_owner) { - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - - // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures - if (cached_surface->type == SurfaceType::Fill && - cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { + if (params.addr == 0 || params.height * params.width == 0) { + return {}; } - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); + // Check for an exact match in existing surfaces + auto search = surface_cache.find(params.addr); + Surface surface; + if (search != surface_cache.end()) { + surface = search->second; + } else { + surface = std::make_shared<CachedSurface>(params); + surface_cache[params.addr] = surface; } - remove_surfaces.clear(); -} - -Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared<CachedSurface>(); - static_cast<SurfaceParams&>(*surface) = params; - - surface->texture.Create(); - - surface->gl_buffer_size = 0; - surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, - GetFormatTuple(surface->pixel_format, surface->component_type), - surface->GetScaledWidth(), surface->GetScaledHeight()); + LoadSurface(surface); return surface; } - -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { - const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - - (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; - const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; - const u64 page_end = page_start + num_pages; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) - << Tegra::MemoryManager::PAGE_BITS; - const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) - << Tegra::MemoryManager::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - else if (delta < 0 && count == -delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9da945e19..ca9945df4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -1,57 +1,22 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include <array> +#include <map> #include <memory> -#include <set> -#include <tuple> -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-local-typedefs" -#endif -#include <boost/icl/interval_map.hpp> -#include <boost/icl/interval_set.hpp> -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#include <boost/optional.hpp> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/common_funcs.h" + #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" -struct CachedSurface; +class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSet = std::set<Surface>; - -using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; -using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; - -using SurfaceInterval = SurfaceCache::interval_type; -static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && - std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), - "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; -using PageMap = boost::icl::interval_map<u64, int>; - -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; - struct SurfaceParams { enum class PixelFormat { ABGR8 = 0, @@ -93,10 +58,10 @@ struct SurfaceParams { /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed texture - * formats. + * compressed image. This is used for maintaining proper surface sizes for compressed + * texture formats. */ - static constexpr u32 GetCompresssionFactor(PixelFormat format) { + static constexpr u32 GetCompressionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; @@ -118,8 +83,8 @@ struct SurfaceParams { ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); return compression_factor_table[static_cast<size_t>(format)]; } - u32 GetCompresssionFactor() const { - return GetCompresssionFactor(pixel_format); + u32 GetCompressionFactor() const { + return GetCompressionFactor(pixel_format); } static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -165,25 +130,6 @@ struct SurfaceParams { } } - static bool IsFormatASTC(PixelFormat format) { - switch (format) { - case PixelFormat::ASTC_2D_4X4: - return true; - default: - return false; - } - } - - static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format) { // TODO(Subv): Properly implement this switch (format) { @@ -276,36 +222,6 @@ struct SurfaceParams { } } - static ComponentType ComponentTypeFromGPUPixelFormat( - Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return ComponentType::UNorm; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { - SurfaceType a_type = GetFormatType(pixel_format_a); - SurfaceType b_type = GetFormatType(pixel_format_b); - - if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { - return true; - } - - if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { - return true; - } - - if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { - return true; - } - - return false; - } - static SurfaceType GetFormatType(PixelFormat pixel_format) { if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { return SurfaceType::ColorTexture; @@ -317,220 +233,79 @@ struct SurfaceParams { return SurfaceType::Invalid; } - /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" - /// and "pixel_format" - void UpdateParams() { - if (stride == 0) { - stride = width; - } - type = GetFormatType(pixel_format); - size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) - : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); - end = addr + size; - } - - SurfaceInterval GetInterval() const { - return SurfaceInterval::right_open(addr, end); - } - - // Returns the outer rectangle containing "interval" - SurfaceParams FromInterval(SurfaceInterval interval) const; - - SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; - - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - - /** - * Gets the actual width (in pixels) of the surface. This is provided because `width` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `width` is actually the compressed width. - */ - u32 GetActualWidth() const { - return width * GetCompresssionFactor(); - } - - /** - * Gets the actual height (in pixels) of the surface. This is provided because `height` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `height` is actually the compressed height. - */ - u32 GetActualHeight() const { - return height * GetCompresssionFactor(); - } - - u32 GetScaledWidth() const { - return width * res_scale; - } - - u32 GetScaledHeight() const { - return height * res_scale; - } - MathUtil::Rectangle<u32> GetRect() const { return {0, height, width, 0}; } - MathUtil::Rectangle<u32> GetScaledRect() const { - return {0, GetScaledHeight(), GetScaledWidth(), 0}; - } - - u64 PixelsInBytes(u64 size) const { - return size * CHAR_BIT / GetFormatBpp(pixel_format); - } - - u64 BytesInPixels(u64 pixels) const { - return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + size_t SizeInBytes() const { + const u32 compression_factor{GetCompressionFactor()}; + ASSERT(width % compression_factor == 0); + ASSERT(height % compression_factor == 0); + return (width / compression_factor) * (height / compression_factor) * + GetFormatBpp(pixel_format) / CHAR_BIT; } VAddr GetCpuAddr() const; - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - - Tegra::GPUVAddr addr = 0; - Tegra::GPUVAddr end = 0; - boost::optional<VAddr> cpu_addr; - u64 size = 0; - - u32 width = 0; - u32 height = 0; - u32 stride = 0; - u32 block_height = 0; - u16 res_scale = 1; - - bool is_tiled = false; - PixelFormat pixel_format = PixelFormat::Invalid; - SurfaceType type = SurfaceType::Invalid; - ComponentType component_type = ComponentType::Invalid; + Tegra::GPUVAddr addr; + u32 width; + u32 height; + u32 block_height; + bool is_tiled; + PixelFormat pixel_format; + SurfaceType type; + ComponentType component_type; }; +static_assert(std::is_pod<SurfaceParams>::value, "SurfaceParams is not POD"); -struct CachedSurface : SurfaceParams { - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } +class CachedSurface final { +public: + CachedSurface(const SurfaceParams& params); - bool IsSurfaceFullyInvalid() const { - return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + const OGLTexture& Texture() const { + return texture; } - bool registered = false; - SurfaceRegions invalid_regions; - - u64 fill_size = 0; /// Number of bytes to read from fill_data - std::array<u8, 4> fill_data; - - OGLTexture texture; - - static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { - if (format == PixelFormat::Invalid) + static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { + if (format == SurfaceParams::PixelFormat::Invalid) return 0; return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; } - std::unique_ptr<u8[]> gl_buffer; - size_t gl_buffer_size = 0; + const SurfaceParams& GetSurfaceParams() const { + return params; + } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); - void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); + void LoadGLBuffer(); + void FlushGLBuffer(); // Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); - void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); + void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + +private: + OGLTexture texture; + std::unique_ptr<u8[]> gl_buffer; + size_t gl_buffer_size; + SurfaceParams params; }; -class RasterizerCacheOpenGL : NonCopyable { +class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); - ~RasterizerCacheOpenGL(); - - /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); - - void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); - /// Copy one surface's region to another - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval); - - /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Tries to find a framebuffer GPU address based on the provided CPU address - boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; - - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// Switch memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); - - /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport); - - /// Get a surface that matches the fill config - Surface GetFillSurface(const void* config); - - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - - /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); - - /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); - - /// Flush all cached resources tracked by this cache manager - void FlushAll(); + void LoadSurface(const Surface& surface); + void FlushSurface(const Surface& surface); private: - void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - - /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); - - /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); - - /// Register surface into the cache - void RegisterSurface(const Surface& surface); - - /// Remove surface from the cache - void UnregisterSurface(const Surface& surface); - - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - - SurfaceCache surface_cache; - PageMap cached_pages; - SurfaceMap dirty_regions; - SurfaceSet remove_surfaces; + Surface GetSurface(const SurfaceParams& params); + std::map<Tegra::GPUVAddr, Surface> surface_cache; OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; - - OGLVertexArray attributeless_vao; - OGLBuffer d24s8_abgr_buffer; - GLsizeiptr d24s8_abgr_buffer_size; - OGLProgram d24s8_abgr_shader; - GLint d24s8_abgr_tbo_size_u_id; - GLint d24s8_abgr_viewport_u_id; }; |