summaryrefslogtreecommitdiffstats
path: root/src/video_core/texture_cache/util.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/texture_cache/util.cpp239
1 files changed, 192 insertions, 47 deletions
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 03acc68d9..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -18,6 +18,8 @@
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
+#include "common/scratch_buffer.h"
+#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@@ -28,6 +30,7 @@
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/astc.h"
+#include "video_core/textures/bcn.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon {
@@ -120,7 +123,9 @@ template <u32 GOB_EXTENT>
return {
.width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
.height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
- .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
+ .depth = level == 0
+ ? block_size.depth
+ : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
};
}
@@ -162,6 +167,13 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
+ if (level == 0) {
+ return Extent3D{
+ .width = info.block.width,
+ .height = info.block.height,
+ .depth = info.block.depth,
+ };
+ }
const Extent3D blocks = NumLevelBlocks(info, level);
return Extent3D{
.width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
@@ -317,13 +329,13 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
+ const auto slice_offsets = CalculateSliceOffsets(new_info);
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
const auto it = std::ranges::find(slice_offsets, diff);
if (it == slice_offsets.end()) {
return std::nullopt;
}
- const std::vector subresources = CalculateSliceSubresources(new_info);
+ const auto subresources = CalculateSliceSubresources(new_info);
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
const ImageInfo& info = overlap.info;
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -573,10 +585,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
if (info.type == ImageType::Buffer) {
return info.size.width * BytesPerBlock(info.format);
}
- if (info.num_samples > 1) {
- // Multisample images can't be uploaded or downloaded to the host
- return 0;
- }
if (info.type == ImageType::Linear) {
return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
}
@@ -589,6 +597,21 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
return info.size.width * BytesPerBlock(info.format);
}
static constexpr Extent2D TILE_SIZE{1, 1};
+ if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() !=
+ Settings::AstcRecompression::Uncompressed) {
+ const u32 bpp_div =
+ Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2
+ : 1;
+ // NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually.
+ u32 output_size = 0;
+ for (s32 i = 0; i < info.resources.levels; i++) {
+ const auto mip_size = AdjustMipSize(info.size, i);
+ const u32 plane_dim =
+ Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U);
+ output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div;
+ }
+ return output_size;
+ }
return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
}
@@ -632,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
return sizes;
}
-std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
+boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<u32> offsets;
+ boost::container::small_vector<u32, 16> offsets;
offsets.reserve(NumSlices(info));
const LevelInfo level_info = MakeLevelInfo(info);
@@ -656,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
return offsets;
}
-std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
+boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
+ const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<SubresourceBase> subresources;
+ boost::container::small_vector<SubresourceBase, 16> subresources;
subresources.reserve(NumSlices(info));
for (s32 level = 0; level < info.resources.levels; ++level) {
const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -700,10 +724,11 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
}
}
-std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
- SubresourceBase base, u32 up_scale, u32 down_shift) {
+boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base,
+ u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels);
- ASSERT(dst.num_samples == src.num_samples);
const bool is_dst_3d = dst.type == ImageType::e3D;
if (is_dst_3d) {
@@ -711,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.resources.levels == 1);
}
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
- std::vector<ImageCopy> copies;
+ boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) {
ImageCopy& copy = copies.emplace_back();
@@ -748,6 +773,45 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
+boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
+ u32 up_scale,
+ u32 down_shift) {
+ boost::container::small_vector<ImageCopy, 16> copies;
+ copies.reserve(src.resources.levels);
+ const bool is_3d = src.type == ImageType::e3D;
+ for (s32 level = 0; level < src.resources.levels; ++level) {
+ ImageCopy& copy = copies.emplace_back();
+ copy.src_subresource = SubresourceLayers{
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = src.resources.layers,
+ };
+ copy.dst_subresource = SubresourceLayers{
+ .base_level = level,
+ .base_layer = 0,
+ .num_layers = src.resources.layers,
+ };
+ copy.src_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ };
+ copy.dst_offset = Offset3D{
+ .x = 0,
+ .y = 0,
+ .z = 0,
+ };
+ const Extent3D mip_size = AdjustMipSize(src.size, level);
+ copy.extent = AdjustSamplesSize(mip_size, src.num_samples);
+ if (is_3d) {
+ copy.extent.depth = src.size.depth;
+ }
+ copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1);
+ copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1);
+ }
+ return copies;
+}
+
bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
const GPUVAddr address = config.Address();
if (address == 0) {
@@ -764,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
-std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
- const ImageInfo& info, std::span<const u8> input,
- std::span<u8> output) {
+boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr,
+ const ImageInfo& info,
+ std::span<const u8> input,
+ std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size;
@@ -801,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
info.tile_width_spacing);
size_t guest_offset = 0;
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
@@ -852,6 +918,7 @@ BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies) {
u32 output_offset = 0;
+ Common::ScratchBuffer<u8> decode_scratch;
const Extent2D tile_size = DefaultBlockSize(info.format);
for (BufferImageCopy& copy : copies) {
@@ -862,26 +929,62 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
ASSERT(copy.image_extent == mip_size);
ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
- if (IsPixelFormatASTC(info.format)) {
- ASSERT(copy.image_extent.depth == 1);
- Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
- copy.image_extent.width, copy.image_extent.height,
- copy.image_subresource.num_layers, tile_size.width,
- tile_size.height, output.subspan(output_offset));
- } else {
- DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
- output.subspan(output_offset));
- }
+
+ const auto input_offset = input.subspan(copy.buffer_offset);
copy.buffer_offset = output_offset;
copy.buffer_row_length = mip_size.width;
copy.buffer_image_height = mip_size.height;
- output_offset += copy.image_extent.width * copy.image_extent.height *
- copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ const auto recompression_setting = Settings::values.astc_recompression.GetValue();
+ const bool astc = IsPixelFormatASTC(info.format);
+
+ if (astc && recompression_setting == Settings::AstcRecompression::Uncompressed) {
+ Tegra::Texture::ASTC::Decompress(
+ input_offset, copy.image_extent.width, copy.image_extent.height,
+ copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
+ tile_size.height, output.subspan(output_offset));
+
+ output_offset += copy.image_extent.width * copy.image_extent.height *
+ copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ } else if (astc) {
+ // BC1 uses 0.5 bytes per texel
+ // BC3 uses 1 byte per texel
+ const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
+ ? Tegra::Texture::BCN::CompressBC1
+ : Tegra::Texture::BCN::CompressBC3;
+ const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
+
+ const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
+ const u32 level_size = plane_dim * copy.image_extent.depth *
+ copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ decode_scratch.resize_destructive(level_size);
+
+ Tegra::Texture::ASTC::Decompress(
+ input_offset, copy.image_extent.width, copy.image_extent.height,
+ copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
+ tile_size.height, decode_scratch);
+
+ compress(decode_scratch, copy.image_extent.width, copy.image_extent.height,
+ copy.image_subresource.num_layers * copy.image_extent.depth,
+ output.subspan(output_offset));
+
+ const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
+ Common::AlignUp(copy.image_extent.height, 4);
+
+ copy.buffer_size =
+ (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
+ bpp_div;
+ output_offset += static_cast<u32>(copy.buffer_size);
+ } else {
+ DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
+
+ output_offset += copy.image_extent.width * copy.image_extent.height *
+ copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+ }
}
}
-std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
+boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -909,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -945,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
return AdjustMipBlockSize(num_tiles, level_info.block, level);
}
-std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
+boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
const Extent2D tile_size = DefaultBlockSize(info.format);
if (info.type == ImageType::Linear) {
- return std::vector{SwizzleParameters{
+ return {SwizzleParameters{
.num_tiles = AdjustTileSize(info.size, tile_size),
.block = {},
.buffer_offset = 0,
@@ -960,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
const s32 num_levels = info.resources.levels;
u32 guest_offset = 0;
- std::vector<SwizzleParameters> params(num_levels);
+ boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
@@ -1004,6 +1107,20 @@ bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32
}
}
+bool IsBlockLinearSizeCompatibleBPPRelaxed(const ImageInfo& lhs, const ImageInfo& rhs,
+ u32 lhs_level, u32 rhs_level) noexcept {
+ ASSERT(lhs.type != ImageType::Linear);
+ ASSERT(rhs.type != ImageType::Linear);
+ const auto lhs_bpp = BytesPerBlock(lhs.format);
+ const auto rhs_bpp = BytesPerBlock(rhs.format);
+ const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
+ const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
+ return Common::AlignUpLog2(lhs_size.width * lhs_bpp, GOB_SIZE_X_SHIFT) ==
+ Common::AlignUpLog2(rhs_size.width * rhs_bpp, GOB_SIZE_X_SHIFT) &&
+ Common::AlignUpLog2(lhs_size.height, GOB_SIZE_Y_SHIFT) ==
+ Common::AlignUpLog2(rhs_size.height, GOB_SIZE_Y_SHIFT);
+}
+
bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
ASSERT(lhs.type == ImageType::Linear);
ASSERT(rhs.type == ImageType::Linear);
@@ -1078,7 +1195,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
// Format checking is relaxed, but we still have to check for matching bytes per block.
// This avoids creating a view for blits on UE4 titles where formats with different bytes
// per block are aliased.
- if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) {
+ if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format) &&
+ False(options & RelaxedOptions::FormatBpp)) {
return std::nullopt;
}
} else {
@@ -1093,10 +1211,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
if (existing.type != candidate.type) {
return std::nullopt;
}
- if (False(options & RelaxedOptions::Samples)) {
- if (existing.num_samples != candidate.num_samples) {
- return std::nullopt;
- }
+ if (False(options & RelaxedOptions::Samples) && existing.num_samples != candidate.num_samples) {
+ return std::nullopt;
}
if (existing.resources.levels < candidate.resources.levels + base->level) {
return std::nullopt;
@@ -1106,14 +1222,16 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
if (mip_depth < candidate.size.depth + base->layer) {
return std::nullopt;
}
- } else {
- if (existing.resources.layers < candidate.resources.layers + base->layer) {
- return std::nullopt;
- }
+ } else if (existing.resources.layers < candidate.resources.layers + base->layer) {
+ return std::nullopt;
}
const bool strict_size = False(options & RelaxedOptions::Size);
if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
- return std::nullopt;
+ if (False(options & RelaxedOptions::FormatBpp)) {
+ return std::nullopt;
+ } else if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) {
+ return std::nullopt;
+ }
}
// TODO: compare block sizes
return base;
@@ -1125,6 +1243,31 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
.has_value();
}
+bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr) {
+ const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
+ if (!base) {
+ return false;
+ }
+ const ImageInfo& existing = image.info;
+ if (existing.resources.levels < candidate.resources.levels + base->level) {
+ return false;
+ }
+ if (existing.type == ImageType::e3D) {
+ const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
+ if (mip_depth < candidate.size.depth + base->layer) {
+ return false;
+ }
+ } else {
+ if (existing.resources.layers < candidate.resources.layers + base->layer) {
+ return false;
+ }
+ }
+ if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) {
+ return false;
+ }
+ return true;
+}
+
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
const ImageBase* src) {
const auto original_dst_format = dst_info.format;
@@ -1160,7 +1303,9 @@ u32 MapSizeBytes(const ImageBase& image) {
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
0x7f8000);
-static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
+
+static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);