summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp60
-rw-r--r--src/video_core/texture_cache/decode_bc.cpp50
-rw-r--r--src/video_core/texture_cache/decode_bc.h2
-rw-r--r--src/video_core/texture_cache/util.cpp16
4 files changed, 73 insertions, 55 deletions
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index ed023fcfe..89ebab08e 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -96,9 +96,9 @@ Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
}
Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
- spv::StorageClass storage_class) {
+ spv::StorageClass storage_class, std::optional<Id> initializer = std::nullopt) {
const Id pointer_type{ctx.TypePointer(storage_class, type)};
- const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
+ const Id id{ctx.AddGlobalVariable(pointer_type, storage_class, initializer)};
if (builtin) {
ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
}
@@ -144,11 +144,12 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
}
Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
- std::optional<spv::BuiltIn> builtin = std::nullopt) {
+ std::optional<spv::BuiltIn> builtin = std::nullopt,
+ std::optional<Id> initializer = std::nullopt) {
if (invocations && ctx.stage == Stage::TessellationControl) {
type = ctx.TypeArray(type, ctx.Const(*invocations));
}
- return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
+ return DefineVariable(ctx, type, builtin, spv::StorageClass::Output, initializer);
}
void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
@@ -811,10 +812,14 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
labels.push_back(OpLabel());
}
if (info.stores.ClipDistances()) {
- literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
- labels.push_back(OpLabel());
- literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
- labels.push_back(OpLabel());
+ if (profile.max_user_clip_distances >= 4) {
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
+ labels.push_back(OpLabel());
+ }
+ if (profile.max_user_clip_distances >= 8) {
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
+ labels.push_back(OpLabel());
+ }
}
OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
OpSwitch(compare_index, default_label, literals, labels);
@@ -843,17 +848,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
++label_index;
}
if (info.stores.ClipDistances()) {
- AddLabel(labels[label_index]);
- const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
- OpStore(pointer, store_value);
- OpReturn();
- ++label_index;
- AddLabel(labels[label_index]);
- const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
- const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
- OpStore(pointer2, store_value);
- OpReturn();
- ++label_index;
+ if (profile.max_user_clip_distances >= 4) {
+ AddLabel(labels[label_index]);
+ const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ if (profile.max_user_clip_distances >= 8) {
+ AddLabel(labels[label_index]);
+ const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
+ const Id pointer{OpAccessChain(output_f32, clip_distances, fixed_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
}
AddLabel(end_block);
OpUnreachable();
@@ -1532,9 +1541,16 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
if (stage == Stage::Fragment) {
throw NotImplementedException("Storing ClipDistance in fragment stage");
}
- const Id type{TypeArray(
- F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))};
- clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
+ if (profile.max_user_clip_distances > 0) {
+ const u32 used{std::min(profile.max_user_clip_distances, 8u)};
+ const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
+ f32_zero_value, f32_zero_value, f32_zero_value,
+ f32_zero_value, f32_zero_value};
+ const Id type{TypeArray(F32[1], Const(used))};
+ const Id initializer{ConstantComposite(type, std::span(zero).subspan(0, used))};
+ clip_distances =
+ DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance, initializer);
+ }
}
if (info.stores[IR::Attribute::Layer] &&
(profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
index 3e26474a3..a018c6df4 100644
--- a/src/video_core/texture_cache/decode_bc.cpp
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
}
template <auto decompress, PixelFormat pixel_format>
-void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
bool is_signed = false) {
const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
- const u32 block_width = std::min(extent.width, BLOCK_SIZE);
- const u32 block_height = std::min(extent.height, BLOCK_SIZE);
- const u32 pitch = extent.width * out_bpp;
+ const u32 block_size = BlockSize(pixel_format);
+ const u32 width = copy.image_extent.width;
+ const u32 height = copy.image_extent.height * copy.image_subresource.num_layers;
+ const u32 depth = copy.image_extent.depth;
+ const u32 block_width = std::min(width, BLOCK_SIZE);
+ const u32 block_height = std::min(height, BLOCK_SIZE);
+ const u32 pitch = width * out_bpp;
size_t input_offset = 0;
size_t output_offset = 0;
- for (u32 slice = 0; slice < extent.depth; ++slice) {
- for (u32 y = 0; y < extent.height; y += block_height) {
- size_t row_offset = 0;
- for (u32 x = 0; x < extent.width;
- x += block_width, row_offset += block_width * out_bpp) {
- const u8* src = input.data() + input_offset;
- u8* const dst = output.data() + output_offset + row_offset;
+ for (u32 slice = 0; slice < depth; ++slice) {
+ for (u32 y = 0; y < height; y += block_height) {
+ size_t src_offset = input_offset;
+ size_t dst_offset = output_offset;
+ for (u32 x = 0; x < width; x += block_width) {
+ const u8* src = input.data() + src_offset;
+ u8* const dst = output.data() + dst_offset;
if constexpr (IsSigned(pixel_format)) {
- decompress(src, dst, x, y, extent.width, extent.height, is_signed);
+ decompress(src, dst, x, y, width, height, is_signed);
} else {
- decompress(src, dst, x, y, extent.width, extent.height);
+ decompress(src, dst, x, y, width, height);
}
- input_offset += BlockSize(pixel_format);
+ src_offset += block_size;
+ dst_offset += block_width * out_bpp;
}
+ input_offset += copy.buffer_row_length * block_size / block_width;
output_offset += block_height * pitch;
}
}
}
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
VideoCore::Surface::PixelFormat pixel_format) {
switch (pixel_format) {
case PixelFormat::BC1_RGBA_UNORM:
case PixelFormat::BC1_RGBA_SRGB:
- DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);
break;
case PixelFormat::BC2_UNORM:
case PixelFormat::BC2_SRGB:
- DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);
break;
case PixelFormat::BC3_UNORM:
case PixelFormat::BC3_SRGB:
- DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);
break;
case PixelFormat::BC4_SNORM:
case PixelFormat::BC4_UNORM:
DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
- input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
+ input, output, copy, pixel_format == PixelFormat::BC4_SNORM);
break;
case PixelFormat::BC5_SNORM:
case PixelFormat::BC5_UNORM:
DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
- input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
+ input, output, copy, pixel_format == PixelFormat::BC5_SNORM);
break;
case PixelFormat::BC6H_SFLOAT:
case PixelFormat::BC6H_UFLOAT:
DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
- input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
+ input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);
break;
case PixelFormat::BC7_SRGB:
case PixelFormat::BC7_UNORM:
- DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
+ DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);
break;
default:
LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h
index 41d1ec0a3..4e3b9b8ac 100644
--- a/src/video_core/texture_cache/decode_bc.h
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -13,7 +13,7 @@ namespace VideoCommon {
[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
VideoCore::Surface::PixelFormat pixel_format);
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 15596c925..fcf70068e 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+ const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D size = info.size;
if (info.type == ImageType::Linear) {
@@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
return {{
.buffer_offset = 0,
.buffer_size = guest_size_bytes,
- .buffer_row_length = info.pitch >> bpp_log2,
+ .buffer_row_length = info.pitch * tile_size.width >> bpp_log2,
.buffer_image_height = size.height,
.image_subresource =
{
@@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
const LevelInfo level_info = MakeLevelInfo(info);
const s32 num_layers = info.resources.layers;
const s32 num_levels = info.resources.levels;
- const Extent2D tile_size = DefaultBlockSize(info.format);
const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels);
@@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
const auto input_offset = input.subspan(copy.buffer_offset);
copy.buffer_offset = output_offset;
- copy.buffer_row_length = mip_size.width;
- copy.buffer_image_height = mip_size.height;
const auto recompression_setting = Settings::values.astc_recompression.GetValue();
const bool astc = IsPixelFormatASTC(info.format);
@@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
bpp_div;
output_offset += static_cast<u32>(copy.buffer_size);
} else {
- const Extent3D image_extent{
- .width = copy.image_extent.width,
- .height = copy.image_extent.height * copy.image_subresource.num_layers,
- .depth = copy.image_extent.depth,
- };
- DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
+ DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers *
ConvertedBytesPerBlock(info.format);
}
+
+ copy.buffer_row_length = mip_size.width;
+ copy.buffer_image_height = mip_size.height;
}
}