diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 45 | ||||
-rw-r--r-- | src/video_core/compatible_formats.cpp | 49 | ||||
-rw-r--r-- | src/video_core/compatible_formats.h | 5 | ||||
-rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/host_shaders/opengl_copy_bgra.comp | 15 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 20 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 76 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 22 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_command_pool.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 5 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_base.cpp | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 15 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 13 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.h | 9 |
17 files changed, 250 insertions, 42 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2a6844ab1..4de1e37e5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,6 +9,7 @@ #include <deque> #include <memory> #include <mutex> +#include <numeric> #include <span> #include <unordered_map> #include <vector> @@ -91,7 +92,7 @@ class BufferCache { }; public: - static constexpr u32 SKIP_CACHE_SIZE = 4096; + static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096; explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -240,9 +241,9 @@ private: template <bool insert> void ChangeRegister(BufferId buffer_id); - void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); + bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); - void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); + bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span<BufferCopy> copies); @@ -297,6 +298,11 @@ private: std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<u32, 16> uniform_cache_hits{}; + std::array<u32, 16> uniform_cache_shots{}; + + u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; + bool has_deleted_buffers = false; std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> @@ -328,6 +334,19 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, template <class P> void BufferCache<P>::TickFrame() { + // Calculate hits and shots and move hit bits to the right + const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); + const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); + std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, + uniform_cache_hits.begin() + 1); + std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, + uniform_cache_shots.begin() + 1); + uniform_cache_hits[0] = 0; + uniform_cache_shots[0] = 0; + + const bool skip_preferred = hits * 256 < shots * 251; + uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + delayed_destruction_ring.Tick(); } @@ -671,7 +690,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; - if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia @@ -692,7 +711,12 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } // Classic cached path - SynchronizeBuffer(buffer, cpu_addr, size); + const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); + if (sync_cached) { + ++uniform_cache_hits[0]; + } + ++uniform_cache_shots[0]; + if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens @@ -1106,15 +1130,15 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { } template <class P> -void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { +bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { if (buffer.CpuAddr() == 0) { - return; + return true; } - SynchronizeBufferImpl(buffer, cpu_addr, size); + return SynchronizeBufferImpl(buffer, cpu_addr, size); } template <class P> -void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { +bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { boost::container::small_vector<BufferCopy, 4> copies; u64 total_size_bytes = 0; u64 largest_copy = 0; @@ -1128,10 +1152,11 @@ void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s largest_copy = std::max(largest_copy, range_size); }); if (total_size_bytes == 0) { - return; + return true; } const std::span<BufferCopy> copies_span(copies.data(), copies.size()); UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); + return false; } template <class P> diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index acf2668dc..8317d0636 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -48,6 +48,15 @@ constexpr std::array VIEW_CLASS_32_BITS{ PixelFormat::A2B10G10R10_UINT, }; +constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{ + PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, + PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, + PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, + PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, + PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT, + PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT, +}; + // TODO: How should we handle 24 bits? constexpr std::array VIEW_CLASS_16_BITS{ @@ -205,7 +214,6 @@ constexpr Table MakeViewTable() { EnableRange(view, VIEW_CLASS_128_BITS); EnableRange(view, VIEW_CLASS_96_BITS); EnableRange(view, VIEW_CLASS_64_BITS); - EnableRange(view, VIEW_CLASS_32_BITS); EnableRange(view, VIEW_CLASS_16_BITS); EnableRange(view, VIEW_CLASS_8_BITS); EnableRange(view, VIEW_CLASS_RGTC1_RED); @@ -231,20 +239,47 @@ constexpr Table MakeCopyTable() { EnableRange(copy, COPY_CLASS_64_BITS); return copy; } + +constexpr Table MakeNativeBgrViewTable() { + Table copy = MakeViewTable(); + EnableRange(copy, VIEW_CLASS_32_BITS); + return copy; +} + +constexpr Table MakeNonNativeBgrViewTable() { + Table copy = MakeViewTable(); + EnableRange(copy, VIEW_CLASS_32_BITS_NO_BGR); + return copy; +} + +constexpr Table MakeNativeBgrCopyTable() { + Table copy = MakeCopyTable(); + EnableRange(copy, VIEW_CLASS_32_BITS); + return copy; +} + +constexpr Table MakeNonNativeBgrCopyTable() { + Table copy = MakeCopyTable(); + EnableRange(copy, VIEW_CLASS_32_BITS); + return copy; +} } // Anonymous namespace -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) { +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views, + bool native_bgr) { if (broken_views) { // If format views are broken, only accept formats that are identical. return format_a == format_b; } - static constexpr Table TABLE = MakeViewTable(); - return IsSupported(TABLE, format_a, format_b); + static constexpr Table BGR_TABLE = MakeNativeBgrViewTable(); + static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrViewTable(); + return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b); } -bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { - static constexpr Table TABLE = MakeCopyTable(); - return IsSupported(TABLE, format_a, format_b); +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) { + static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable(); + static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable(); + return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b); } } // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 9a0522988..55745e042 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -8,8 +8,9 @@ namespace VideoCore::Surface { -bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views); +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views, + bool native_bgr); -bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr); } // namespace VideoCore::Surface diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 970120acc..3494318ca 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -5,6 +5,7 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp + opengl_copy_bgra.comp opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp new file mode 100644 index 000000000..2571a4abf --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bgra.comp @@ -0,0 +1,15 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 core + +layout (local_size_x = 4, local_size_y = 4) in; + +layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; +layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; + +void main() { + vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); + imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); +} diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6da3906a4..c225d1fc9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); - glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); + glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, + GL_STREAM_DRAW); } } for (auto& stage_uniforms : copy_uniforms) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 12434db67..e028677e9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT @@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB @@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { + switch (format) { + case PixelFormat::B5G6R5_UNORM: + case PixelFormat::B8G8R8A8_UNORM: + case PixelFormat::B8G8R8A8_SRGB: + return true; + default: + return false; + } +} + } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { return false; } + if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + return false; + } return true; } @@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { ASSERT(src.info.type == ImageType::e3D); util_shaders.CopyBC4(dst, src, copies); + } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + util_shaders.CopyBGR(dst, src, copies); } else { UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index a6172f009..3fbaa102f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -86,6 +86,11 @@ public: FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; + bool HasNativeBgr() const noexcept { + // OpenGL does not have native support for the BGR internal format + return false; + } + bool HasBrokenTextureViewFormats() const noexcept { return has_broken_texture_view_formats; } diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 31ec68505..2fe4799bc 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -14,6 +14,7 @@ #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) { return program; } +size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { + return static_cast<size_t>(copy.extent.width * copy.extent.height * + copy.src_subresource.num_layers); +} + } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) @@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); @@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im program_manager.RestoreGuestCompute(); } +void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; + const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); + switch (bytes_per_block) { + case 2: + // BGR565 copy + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + bgr_copy_pass.Execute(dst_image, src_image, copy); + } + break; + case 4: { + // BGRA8 copy + program_manager.BindHostCompute(copy_bgra_program.handle); + constexpr GLenum FORMAT = GL_RGBA8; + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), + copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); + break; + } + default: + UNREACHABLE(); + break; + } +} + GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: @@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) { return GL_R8UI; } +void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, + const ImageCopy& copy) { + if (CopyBufferCreationNeeded(copy)) { + CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); + } + // Copy from source to PBO + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); + glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + static_cast<GLsizei>(bgr16_pbo_size), nullptr); + + // Copy from PBO to destination in reverse order + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, + nullptr); +} + +bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { + return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); +} + +void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { + bgr16_pbo.Create(); + bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); + glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 7b1d16b09..93b009743 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -19,6 +19,22 @@ class ProgramManager; struct ImageBufferMap; +class Bgr565CopyPass { +public: + Bgr565CopyPass() = default; + ~Bgr565CopyPass() = default; + + void Execute(const Image& dst_image, const Image& src_image, + const VideoCommon::ImageCopy& copy); + +private: + [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); + void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); + + OGLBuffer bgr16_pbo; + size_t bgr16_pbo_size{}; +}; + class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); @@ -36,6 +52,9 @@ public: void CopyBC4(Image& dst_image, Image& src_image, std::span<const VideoCommon::ImageCopy> copies); + void CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); + private: ProgramManager& program_manager; @@ -44,7 +63,10 @@ private: OGLProgram block_linear_unswizzle_2d_program; OGLProgram block_linear_unswizzle_3d_program; OGLProgram pitch_unswizzle_program; + OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; + + Bgr565CopyPass bgr_copy_pass; }; GLenum StoreFormat(u32 bytes_per_block); diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index a99df9323..d8e92ac0e 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -10,7 +10,7 @@ namespace Vulkan { -constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4; struct CommandPool::Pool { vk::CommandPool handle; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b08c23459..3aee27ce0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -93,6 +93,11 @@ struct TextureCacheRuntime { // No known Vulkan driver has broken image views return false; } + + bool HasNativeBgr() const noexcept { + // All known Vulkan drivers can natively handle BGR textures + return true; + } }; class Image : public VideoCommon::ImageBase { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 959b3f115..9914926b3 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -120,9 +120,10 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i if (lhs.info.type == ImageType::Linear) { base = SubresourceBase{.level = 0, .layer = 0}; } else { - // We are passing relaxed formats as an option, having broken views or not won't matter + // We are passing relaxed formats as an option, having broken views/bgr or not won't matter static constexpr bool broken_views = false; - base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); + static constexpr bool native_bgr = true; + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views, native_bgr); } if (!base) { LOG_ERROR(HW_GPU, "Image alias should have been flipped"); diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 18f72e508..f89a40b4c 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i .height = std::max(image_info.size.height >> range.base.level, 1u), .depth = std::max(image_info.size.depth >> range.base.level, 1u), } { - ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true), "Image view format {} is incompatible with image format {}", info.format, image_info.format); const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b1da69971..98e33c3a0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -876,6 +876,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return ImageId{}; } const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { @@ -885,11 +886,12 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && existing.pitch == info.pitch && IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views)) { + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { image_id = existing_image_id; return true; } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { image_id = existing_image_id; return true; } @@ -920,6 +922,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); std::vector<ImageId> overlap_ids; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; @@ -935,8 +938,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA return; } static constexpr bool strict_size = true; - const std::optional<OverlapResult> solution = - ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); + const std::optional<OverlapResult> solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); if (solution) { gpu_addr = solution->gpu_addr; cpu_addr = solution->cpu_addr; @@ -946,10 +949,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views)) { + broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); } }); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index a0bc1f7b6..2c42d1449 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1035,13 +1035,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size, bool broken_views) { + bool strict_size, bool broken_views, bool native_bgr) { ASSERT(new_info.type != ImageType::Linear); ASSERT(overlap.info.type != ImageType::Linear); if (!IsLayerStrideCompatible(new_info, overlap.info)) { return std::nullopt; } - if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { + if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views, native_bgr)) { return std::nullopt; } if (gpu_addr == overlap.gpu_addr) { @@ -1085,14 +1085,14 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, RelaxedOptions options, - bool broken_views) { + bool broken_views, bool native_bgr) { const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); if (!base) { return std::nullopt; } const ImageInfo& existing = image.info; if (False(options & RelaxedOptions::Format)) { - if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { + if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) { return std::nullopt; } } @@ -1129,8 +1129,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const } bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, - RelaxedOptions options, bool broken_views) { - return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); + RelaxedOptions options, bool broken_views, bool native_bgr) { + return FindSubresource(candidate, image, candidate_addr, options, broken_views, native_bgr) + .has_value(); } void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 52a9207d6..4d0072867 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -87,7 +87,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, - bool strict_size, bool broken_views); + bool strict_size, bool broken_views, + bool native_bgr); [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); @@ -95,11 +96,11 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima const ImageBase& image, GPUVAddr candidate_addr, RelaxedOptions options, - bool broken_views); + bool broken_views, bool native_bgr); [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, - GPUVAddr candidate_addr, RelaxedOptions options, - bool broken_views); + GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views, + bool native_bgr); void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src); |