From c439fc9be994583801418743ab202fb63d1c83a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 23:55:20 -0400 Subject: astc_decoder: Reduce workgroup size This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32. --- src/video_core/host_shaders/astc_decoder.comp | 2 +- src/video_core/renderer_opengl/util_shaders.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 74ce058a9..f34c5f5d9 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -22,7 +22,7 @@ #endif -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 4e6f7cb00..333f35a1c 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glFlush(); for (const SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d13d58e8c..3e96c0f60 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -358,8 +358,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, }); for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; update_descriptor_queue.Acquire(); -- cgit v1.2.3