From 2f83d9a61bca42d9ef24074beb2b11b19bd4cecd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 16:53:51 -0400 Subject: astc_decoder: Refactor for style and more efficient memory use --- .../renderer_opengl/gl_texture_cache.cpp | 11 +-- src/video_core/renderer_opengl/util_shaders.cpp | 96 +++++++++------------- src/video_core/renderer_opengl/util_shaders.h | 8 +- 3 files changed, 46 insertions(+), 69 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 29105ecad..623b43d8a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -307,7 +307,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array swizzles) { - if (IsPixelFormatASTC(image.info.format)) { - return util_shaders.ASTCDecode(image, map, swizzles); - } switch (image.info.type) { case ImageType::e2D: - return util_shaders.BlockLinearUpload2D(image, map, swizzles); + if (IsPixelFormatASTC(image.info.format)) { + return util_shaders.ASTCDecode(image, map, swizzles); + } else { + return util_shaders.BlockLinearUpload2D(image, map, swizzles); + } case ImageType::e3D: return util_shaders.BlockLinearUpload3D(image, map, swizzles); case ImageType::Linear: diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 85722c54a..47fddcb6e 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -2,11 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include #include -#include -#include #include #include @@ -24,7 +20,6 @@ #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" -#include "video_core/surface.h" #include "video_core/texture_cache/accelerated_swizzle.h" #include "video_core/texture_cache/types.h" #include "video_core/texture_cache/util.h" @@ -36,6 +31,7 @@ namespace OpenGL { using namespace HostShaders; using namespace Tegra::Texture::ASTC; +using VideoCommon::Extent2D; using VideoCommon::Extent3D; using VideoCommon::ImageCopy; using VideoCommon::ImageType; @@ -69,33 +65,15 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { - MakeBuffers(); -} - -UtilShaders::~UtilShaders() = default; - -void UtilShaders::MakeBuffers() { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); + astc_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); - - astc_encodings_buffer.Create(); - glNamedBufferStorage(astc_encodings_buffer.handle, sizeof(EncodingsValues), &EncodingsValues, - 0); - replicate_6_to_8_buffer.Create(); - glNamedBufferStorage(replicate_6_to_8_buffer.handle, sizeof(REPLICATE_6_BIT_TO_8_TABLE), - &REPLICATE_6_BIT_TO_8_TABLE, 0); - replicate_7_to_8_buffer.Create(); - glNamedBufferStorage(replicate_7_to_8_buffer.handle, sizeof(REPLICATE_7_BIT_TO_8_TABLE), - &REPLICATE_7_BIT_TO_8_TABLE, 0); - replicate_8_to_8_buffer.Create(); - glNamedBufferStorage(replicate_8_to_8_buffer.handle, sizeof(REPLICATE_8_BIT_TO_8_TABLE), - &REPLICATE_8_BIT_TO_8_TABLE, 0); - replicate_byte_to_16_buffer.Create(); - glNamedBufferStorage(replicate_byte_to_16_buffer.handle, sizeof(REPLICATE_BYTE_TO_16_TABLE), - &REPLICATE_BYTE_TO_16_TABLE, 0); + glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0); } +UtilShaders::~UtilShaders() = default; + void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -108,47 +86,51 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; - static constexpr GLuint LOC_BLOCK_DIMS = 1; - const Extent3D tile_size = { - VideoCore::Surface::DefaultBlockWidth(image.info.format), - VideoCore::Surface::DefaultBlockHeight(image.info.format), + const Extent2D tile_size{ + .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), + .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; program_manager.BindHostCompute(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_encodings_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, - replicate_6_to_8_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, - replicate_7_to_8_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, - replicate_8_to_8_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, - replicate_byte_to_16_buffer.handle); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle, + offsetof(AstcBufferData, encoding_values), + sizeof(AstcBufferData::encoding_values)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle, + offsetof(AstcBufferData, replicate_6_to_8), + sizeof(AstcBufferData::replicate_6_to_8)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle, + offsetof(AstcBufferData, replicate_7_to_8), + sizeof(AstcBufferData::replicate_7_to_8)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, + offsetof(AstcBufferData, replicate_8_to_8), + sizeof(AstcBufferData::replicate_8_to_8)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, + offsetof(AstcBufferData, replicate_byte_to_16), + sizeof(AstcBufferData::replicate_byte_to_16)); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); + glUniform2ui(1, tile_size.width, tile_size.height); + // Ensure buffer data is valid before dispatching + glFlush(); for (const SwizzleParameters& swizzle : swizzles) { - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, GL_RGBA8); const size_t input_offset = swizzle.buffer_offset + map.offset; - const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); - - glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); - // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); - glUniform3uiv(2, 1, params.origin.data()); - glUniform3iv(3, 1, params.destination.data()); - glUniform1ui(4, params.bytes_per_block_log2); - glUniform1ui(5, params.layer_stride); - glUniform1ui(6, params.block_size); - glUniform1ui(7, params.x_shift); - glUniform1ui(8, params.block_height); - glUniform1ui(9, params.block_height_mask); + ASSERT(params.origin == (std::array{0, 0, 0})); + ASSERT(params.destination == (std::array{0, 0, 0})); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RGBA8); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 08a1cb9b2..53d65f368 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -40,8 +40,6 @@ public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void MakeBuffers(); - void ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles); @@ -64,11 +62,7 @@ private: ProgramManager& program_manager; OGLBuffer swizzle_table_buffer; - OGLBuffer astc_encodings_buffer; - OGLBuffer replicate_6_to_8_buffer; - OGLBuffer replicate_7_to_8_buffer; - OGLBuffer replicate_8_to_8_buffer; - OGLBuffer replicate_byte_to_16_buffer; + OGLBuffer astc_buffer; OGLProgram astc_decoder_program; OGLProgram block_linear_unswizzle_2d_program; -- cgit v1.2.3