From 7731a0e2d15da04eea746b4b8dd5c6c4b29f9f29 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 May 2019 20:33:52 -0400 Subject: texture_cache: General Fixes Fixed ASTC mipmaps loading Fixed alignment on openGL upload/download Fixed Block Height Calculation Removed unalign_height --- .../renderer_opengl/gl_texture_cache.cpp | 11 ++- src/video_core/surface.h | 84 +++++++++++++++++++++- src/video_core/texture_cache/surface_base.cpp | 18 ++++- src/video_core/texture_cache/surface_base.h | 4 ++ src/video_core/texture_cache/surface_params.cpp | 52 ++++++++------ src/video_core/texture_cache/surface_params.h | 27 ++++--- src/video_core/textures/convert.cpp | 14 ++-- src/video_core/textures/convert.h | 7 +- 8 files changed, 170 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a55097e5f..197c9f02c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -23,6 +23,7 @@ using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -242,10 +243,10 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.num_levels; ++level) { + glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { @@ -270,10 +271,14 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + auto compression_type = params.GetCompressionType(); + + const std::size_t mip_offset = compression_type == SurfaceCompression::Converted + ? params.GetConvertedMipmapOffset(level) + : params.GetHostMipmapLevelOffset(level); u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 8e98033f3..5d49214e5 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -197,7 +197,7 @@ inline constexpr std::array compression_factor_shift_table */ inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast(format) < compression_factor_table.size()); + DEBUG_ASSERT(static_cast(format) < compression_factor_shift_table.size()); return compression_factor_shift_table[static_cast(format)]; } @@ -438,6 +438,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } +enum class SurfaceCompression : u8 { + None = 0, + Compressed = 1, + Converted = 2, + Rearranged = 3, +}; + +inline constexpr std::array compression_type_table = {{ + SurfaceCompression::None, // ABGR8U + SurfaceCompression::None, // ABGR8S + SurfaceCompression::None, // ABGR8UI + SurfaceCompression::None, // B5G6R5U + SurfaceCompression::None, // A2B10G10R10U + SurfaceCompression::None, // A1B5G5R5U + SurfaceCompression::None, // R8U + SurfaceCompression::None, // R8UI + SurfaceCompression::None, // RGBA16F + SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16UI + SurfaceCompression::None, // R11FG11FB10F + SurfaceCompression::None, // RGBA32UI + SurfaceCompression::Compressed, // DXT1 + SurfaceCompression::Compressed, // DXT23 + SurfaceCompression::Compressed, // DXT45 + SurfaceCompression::Compressed, // DXN1 + SurfaceCompression::Compressed, // DXN2UNORM + SurfaceCompression::Compressed, // DXN2SNORM + SurfaceCompression::Compressed, // BC7U + SurfaceCompression::Compressed, // BC6H_UF16 + SurfaceCompression::Compressed, // BC6H_SF16 + SurfaceCompression::Converted, // ASTC_2D_4X4 + SurfaceCompression::None, // BGRA8 + SurfaceCompression::None, // RGBA32F + SurfaceCompression::None, // RG32F + SurfaceCompression::None, // R32F + SurfaceCompression::None, // R16F + SurfaceCompression::None, // R16U + SurfaceCompression::None, // R16S + SurfaceCompression::None, // R16UI + SurfaceCompression::None, // R16I + SurfaceCompression::None, // RG16 + SurfaceCompression::None, // RG16F + SurfaceCompression::None, // RG16UI + SurfaceCompression::None, // RG16I + SurfaceCompression::None, // RG16S + SurfaceCompression::None, // RGB32F + SurfaceCompression::None, // RGBA8_SRGB + SurfaceCompression::None, // RG8U + SurfaceCompression::None, // RG8S + SurfaceCompression::None, // RG32UI + SurfaceCompression::None, // R32UI + SurfaceCompression::Converted, // ASTC_2D_8X8 + SurfaceCompression::Converted, // ASTC_2D_8X5 + SurfaceCompression::Converted, // ASTC_2D_5X4 + SurfaceCompression::None, // BGRA8_SRGB + SurfaceCompression::Compressed, // DXT1_SRGB + SurfaceCompression::Compressed, // DXT23_SRGB + SurfaceCompression::Compressed, // DXT45_SRGB + SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X5 + SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X8 + SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::None, // Z32F + SurfaceCompression::None, // Z16 + SurfaceCompression::None, // Z24S8 + SurfaceCompression::Rearranged, // S8Z24 + SurfaceCompression::None, // Z32FS8 +}}; + +static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { + if (format == PixelFormat::Invalid) + return SurfaceCompression::None; + + ASSERT(static_cast(format) < compression_type_table.size()); + return compression_type_table[static_cast(format)]; +} + SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); bool SurfaceTargetIsLayered(SurfaceTarget target); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 510d1aef5..ceff51043 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -17,6 +17,7 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceCompression; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), @@ -102,9 +103,20 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; - ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, + auto compression_type = params.GetCompressionType(); + if (compression_type == SurfaceCompression::None || + compression_type == SurfaceCompression::Compressed) + return; + + for (u32 level_up = params.num_levels; level_up > 0; --level_up) { + const u32 level = level_up - 1; + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged + ? in_host_offset + : params.GetConvertedMipmapOffset(level); + u8* in_buffer = staging_buffer.data() + in_host_offset; + u8* out_buffer = staging_buffer.data() + out_host_offset; + ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 78db2d665..cb7f22706 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -93,6 +93,10 @@ public: return mipmap_sizes[level]; } + bool IsLinear() const { + return !params.is_tiled; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 8472b69dc..d9d157d02 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,17 +76,14 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = - Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); - params.height = - Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); + params.width = config.tic.Width(); + params.height = config.tic.Height(); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; params.is_layered = params.IsLayered(); return params; @@ -108,7 +105,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; - params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; params.pitch = 0; @@ -141,7 +137,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz } params.height = config.height; params.depth = 1; - params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.is_layered = false; @@ -164,7 +159,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; @@ -185,18 +179,18 @@ bool SurfaceParams::IsLayered() const { } } +// Auto block resizing algorithm from: +// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c if (level == 0) { return this->block_height; } - const u32 height{GetMipHeight(level)}; + const u32 height_new{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - const u32 block_height = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height, 3U, 8U) - 3U; + const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; + const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height_new, 3U, 7U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -207,12 +201,12 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return 0; } - const u32 depth{GetMipDepth(level)}; - const u32 block_depth = Common::Log2Ceil32(depth); - if (block_depth > 4) { + const u32 depth_new{GetMipDepth(level)}; + const u32 block_depth_new = Common::Log2Ceil32(depth_new); + if (block_depth_new > 4) { return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; + return block_depth_new; } std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { @@ -231,6 +225,14 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetConvertedMipmapSize(i); + } + return offset; +} + std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, false, false); } @@ -239,6 +241,14 @@ std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } +std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { + constexpr std::size_t rgb8_bpp = 4ULL; + const std::size_t width_t = GetMipWidth(level); + const std::size_t height_t = GetMipHeight(level); + const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); + return width_t * height_t * depth_t * rgb8_bpp; +} + std::size_t SurfaceParams::GetGuestLayerSize() const { return GetLayerSize(false, false); } @@ -287,12 +297,10 @@ std::size_t SurfaceParams::Hash() const { bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, unaligned_height, num_levels, pixel_format, - component_type, type, target) == + height, depth, pitch, num_levels, pixel_format, component_type, type, target) == std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, - rhs.type, rhs.target); + rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); } std::string SurfaceParams::TargetName() const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 7c48782c7..b3082173f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -7,6 +7,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -16,6 +17,8 @@ namespace VideoCommon { +using VideoCore::Surface::SurfaceCompression; + class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -50,17 +53,12 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; - if (IsPixelFormatASTC(pixel_format)) { + if (GetCompressionType() == SurfaceCompression::Converted) { constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; for (u32 level = 0; level < num_levels; ++level) { - const std::size_t width = - Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); - const std::size_t height = - Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); - host_size_in_bytes += width * height * depth * rgb8_bpp; + host_size_in_bytes += GetConvertedMipmapSize(level); } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); @@ -93,6 +91,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + u32 GetRowAlignment(u32 level) const { + const u32 bpp = + GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); + } + // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { @@ -106,12 +110,16 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + std::size_t GetConvertedMipmapOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const; /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetConvertedMipmapSize(u32 level) const; + /// Returns the size of a layer in bytes in guest memory. std::size_t GetGuestLayerSize() const; @@ -141,6 +149,10 @@ public: /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; + SurfaceCompression GetCompressionType() const { + return VideoCore::Surface::GetFormatCompressionType(pixel_format); + } + std::string TargetName() const; bool is_tiled; @@ -154,7 +166,6 @@ public: u32 height; u32 depth; u32 pitch; - u32 unaligned_height; u32 num_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 82050bd51..f3efa7eb0 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { SwapS8Z24ToZ24S8(data, width, height); } -void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { +void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, + u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { if (convert_astc && IsPixelFormatASTC(pixel_format)) { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector rgba8_data = - Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), data); + const std::vector rgba8_data = Tegra::Texture::ASTC::Decompress( + in_data, width, height, depth, block_width, block_height); + std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { - Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); + Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h } } -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h index 12542e71c..d5d6c77bb 100644 --- a/src/video_core/textures/convert.h +++ b/src/video_core/textures/convert.h @@ -12,10 +12,11 @@ enum class PixelFormat; namespace Tegra::Texture { -void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); +void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, + u32 width, u32 height, u32 depth, bool convert_astc, + bool convert_s8z24); void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, u32 height, u32 depth, bool convert_astc, bool convert_s8z24); -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture -- cgit v1.2.3