diff options
Diffstat (limited to '')
-rw-r--r-- | src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 60 | ||||
-rw-r--r-- | src/video_core/texture_cache/decode_bc.cpp | 50 | ||||
-rw-r--r-- | src/video_core/texture_cache/decode_bc.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 16 |
4 files changed, 73 insertions, 55 deletions
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ed023fcfe..89ebab08e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -96,9 +96,9 @@ Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) { } Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin, - spv::StorageClass storage_class) { + spv::StorageClass storage_class, std::optional<Id> initializer = std::nullopt) { const Id pointer_type{ctx.TypePointer(storage_class, type)}; - const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; + const Id id{ctx.AddGlobalVariable(pointer_type, storage_class, initializer)}; if (builtin) { ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); } @@ -144,11 +144,12 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, } Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations, - std::optional<spv::BuiltIn> builtin = std::nullopt) { + std::optional<spv::BuiltIn> builtin = std::nullopt, + std::optional<Id> initializer = std::nullopt) { if (invocations && ctx.stage == Stage::TessellationControl) { type = ctx.TypeArray(type, ctx.Const(*invocations)); } - return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); + return DefineVariable(ctx, type, builtin, spv::StorageClass::Output, initializer); } void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) { @@ -811,10 +812,14 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { labels.push_back(OpLabel()); } if (info.stores.ClipDistances()) { - literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); - labels.push_back(OpLabel()); - literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); - labels.push_back(OpLabel()); + if (profile.max_user_clip_distances >= 4) { + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); + labels.push_back(OpLabel()); + } + if (profile.max_user_clip_distances >= 8) { + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); + labels.push_back(OpLabel()); + } } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); OpSwitch(compare_index, default_label, literals, labels); @@ -843,17 +848,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { ++label_index; } if (info.stores.ClipDistances()) { - AddLabel(labels[label_index]); - const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; - OpStore(pointer, store_value); - OpReturn(); - ++label_index; - AddLabel(labels[label_index]); - const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; - const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; - OpStore(pointer2, store_value); - OpReturn(); - ++label_index; + if (profile.max_user_clip_distances >= 4) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } + if (profile.max_user_clip_distances >= 8) { + AddLabel(labels[label_index]); + const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; + const Id pointer{OpAccessChain(output_f32, clip_distances, fixed_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } } AddLabel(end_block); OpUnreachable(); @@ -1532,9 +1541,16 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } - const Id type{TypeArray( - F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))}; - clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); + if (profile.max_user_clip_distances > 0) { + const u32 used{std::min(profile.max_user_clip_distances, 8u)}; + const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value, + f32_zero_value, f32_zero_value, f32_zero_value, + f32_zero_value, f32_zero_value}; + const Id type{TypeArray(F32[1], Const(used))}; + const Id initializer{ConstantComposite(type, std::span(zero).subspan(0, used))}; + clip_distances = + DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance, initializer); + } } if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index 3e26474a3..a018c6df4 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { } template <auto decompress, PixelFormat pixel_format> -void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, bool is_signed = false) { const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); - const u32 block_width = std::min(extent.width, BLOCK_SIZE); - const u32 block_height = std::min(extent.height, BLOCK_SIZE); - const u32 pitch = extent.width * out_bpp; + const u32 block_size = BlockSize(pixel_format); + const u32 width = copy.image_extent.width; + const u32 height = copy.image_extent.height * copy.image_subresource.num_layers; + const u32 depth = copy.image_extent.depth; + const u32 block_width = std::min(width, BLOCK_SIZE); + const u32 block_height = std::min(height, BLOCK_SIZE); + const u32 pitch = width * out_bpp; size_t input_offset = 0; size_t output_offset = 0; - for (u32 slice = 0; slice < extent.depth; ++slice) { - for (u32 y = 0; y < extent.height; y += block_height) { - size_t row_offset = 0; - for (u32 x = 0; x < extent.width; - x += block_width, row_offset += block_width * out_bpp) { - const u8* src = input.data() + input_offset; - u8* const dst = output.data() + output_offset + row_offset; + for (u32 slice = 0; slice < depth; ++slice) { + for (u32 y = 0; y < height; y += block_height) { + size_t src_offset = input_offset; + size_t dst_offset = output_offset; + for (u32 x = 0; x < width; x += block_width) { + const u8* src = input.data() + src_offset; + u8* const dst = output.data() + dst_offset; if constexpr (IsSigned(pixel_format)) { - decompress(src, dst, x, y, extent.width, extent.height, is_signed); + decompress(src, dst, x, y, width, height, is_signed); } else { - decompress(src, dst, x, y, extent.width, extent.height); + decompress(src, dst, x, y, width, height); } - input_offset += BlockSize(pixel_format); + src_offset += block_size; + dst_offset += block_width * out_bpp; } + input_offset += copy.buffer_row_length * block_size / block_width; output_offset += block_height * pitch; } } } -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format) { switch (pixel_format) { case PixelFormat::BC1_RGBA_UNORM: case PixelFormat::BC1_RGBA_SRGB: - DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy); break; case PixelFormat::BC2_UNORM: case PixelFormat::BC2_SRGB: - DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy); break; case PixelFormat::BC3_UNORM: case PixelFormat::BC3_SRGB: - DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy); break; case PixelFormat::BC4_SNORM: case PixelFormat::BC4_UNORM: DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( - input, output, extent, pixel_format == PixelFormat::BC4_SNORM); + input, output, copy, pixel_format == PixelFormat::BC4_SNORM); break; case PixelFormat::BC5_SNORM: case PixelFormat::BC5_UNORM: DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( - input, output, extent, pixel_format == PixelFormat::BC5_SNORM); + input, output, copy, pixel_format == PixelFormat::BC5_SNORM); break; case PixelFormat::BC6H_SFLOAT: case PixelFormat::BC6H_UFLOAT: DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( - input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); + input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT); break; case PixelFormat::BC7_SRGB: case PixelFormat::BC7_UNORM: - DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); + DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy); break; default: LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h index 41d1ec0a3..4e3b9b8ac 100644 --- a/src/video_core/texture_cache/decode_bc.h +++ b/src/video_core/texture_cache/decode_bc.h @@ -13,7 +13,7 @@ namespace VideoCommon { [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); -void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, +void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format); } // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 15596c925..fcf70068e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory std::span<u8> output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const Extent2D tile_size = DefaultBlockSize(info.format); const Extent3D size = info.size; if (info.type == ImageType::Linear) { @@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory return {{ .buffer_offset = 0, .buffer_size = guest_size_bytes, - .buffer_row_length = info.pitch >> bpp_log2, + .buffer_row_length = info.pitch * tile_size.width >> bpp_log2, .buffer_image_height = size.height, .image_subresource = { @@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory const LevelInfo level_info = MakeLevelInfo(info); const s32 num_layers = info.resources.layers; const s32 num_levels = info.resources.levels; - const Extent2D tile_size = DefaultBlockSize(info.format); const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels); @@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 const auto input_offset = input.subspan(copy.buffer_offset); copy.buffer_offset = output_offset; - copy.buffer_row_length = mip_size.width; - copy.buffer_image_height = mip_size.height; const auto recompression_setting = Settings::values.astc_recompression.GetValue(); const bool astc = IsPixelFormatASTC(info.format); @@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 bpp_div; output_offset += static_cast<u32>(copy.buffer_size); } else { - const Extent3D image_extent{ - .width = copy.image_extent.width, - .height = copy.image_extent.height * copy.image_subresource.num_layers, - .depth = copy.image_extent.depth, - }; - DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); + DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format); } + + copy.buffer_row_length = mip_size.width; + copy.buffer_image_height = mip_size.height; } } |