From ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7 Mon Sep 17 00:00:00 2001 From: Ameer J <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 Jul 2023 12:26:48 -0400 Subject: weights refactor --- src/video_core/host_shaders/astc_decoder.comp | 48 ++++++++++++--------------- 1 file changed, 22 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a814ef483..b84ddd67d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -116,8 +116,6 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] = 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); // Input ASTC texture globals -uint current_index = 0; -int bitsread = 0; int total_bitsread = 0; uvec4 local_buff; @@ -144,13 +142,6 @@ int texel_vector_index = 0; uint unquantized_texel_weights[2][144]; -uint SwizzleOffset(uvec2 pos) { - uint x = pos.x; - uint y = pos.y; - return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + - (y % 2) * 16 + (x % 16); -} - // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] // is the same as [(num_bits - 1):0] and repeats all the way down. uint Replicate(uint val, uint num_bits, uint to_bit) { @@ -1224,33 +1215,40 @@ void DecompressBlock(ivec3 coord) { uint local_partition = 0; if (num_partitions > 1) { local_partition = Select2DPartition(partition_index, i, j, num_partitions, - (block_dims.y * block_dims.x) < 32); + (block_dims.y * block_dims.x) < 32); } - vec4 p; - uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); - uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); - uvec4 plane_vec = uvec4(0); - uvec4 weight_vec = uvec4(0); - for (uint c = 0; c < 4; c++) { - if (params.dual_plane && (((plane_index + 1) & 3) == c)) { - plane_vec[c] = 1; + const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); + const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); + const uint weight_offset = (j * block_dims.x + i); + const uint primary_weight = unquantized_texel_weights[weight_offset][0]; + uvec4 weight_vec = uvec4(primary_weight); + if (params.dual_plane) { + const uint secondary_weight = unquantized_texel_weights[weight_offset][1]; + for (uint c = 0; c < 4; c++) { + const bool is_secondary = ((plane_index + 1u) & 3u) == c; + weight_vec[c] = is_secondary ? secondary_weight : primary_weight; } - weight_vec[c] = unquantized_texel_weights[plane_vec[c]][j * block_dims.x + i]; } - vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); - p = (Cf / 65535.0); + const vec4 Cf = + vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); + const vec4 p = (Cf / 65535.0); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); } } } + +uint SwizzleOffset(uvec2 pos) { + uint x = pos.x; + uint y = pos.y; + return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); +} + void main() { uvec3 pos = gl_GlobalInvocationID; pos.x <<= BYTES_PER_BLOCK_LOG2; - - // Read as soon as possible due to its latency const uint swizzle = SwizzleOffset(pos.xy); - const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; uint offset = 0; @@ -1264,8 +1262,6 @@ void main() { if (any(greaterThanEqual(coord, imageSize(dest_image)))) { return; } - current_index = 0; - bitsread = 0; local_buff = astc_data[offset / 16]; DecompressBlock(coord); } -- cgit v1.2.3