summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h45
-rw-r--r--src/video_core/compatible_formats.cpp49
-rw-r--r--src/video_core/compatible_formats.h5
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_copy_bgra.comp15
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h5
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp76
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h5
-rw-r--r--src/video_core/texture_cache/image_base.cpp5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp2
-rw-r--r--src/video_core/texture_cache/texture_cache.h15
-rw-r--r--src/video_core/texture_cache/util.cpp13
-rw-r--r--src/video_core/texture_cache/util.h9
17 files changed, 250 insertions, 42 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2a6844ab1..4de1e37e5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -9,6 +9,7 @@
#include <deque>
#include <memory>
#include <mutex>
+#include <numeric>
#include <span>
#include <unordered_map>
#include <vector>
@@ -91,7 +92,7 @@ class BufferCache {
};
public:
- static constexpr u32 SKIP_CACHE_SIZE = 4096;
+ static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096;
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -240,9 +241,9 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
- void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
+ bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
- void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
+ bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies);
@@ -297,6 +298,11 @@ private:
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+ std::array<u32, 16> uniform_cache_hits{};
+ std::array<u32, 16> uniform_cache_shots{};
+
+ u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
+
bool has_deleted_buffers = false;
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
@@ -328,6 +334,19 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
template <class P>
void BufferCache<P>::TickFrame() {
+ // Calculate hits and shots and move hit bits to the right
+ const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
+ const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
+ std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
+ uniform_cache_hits.begin() + 1);
+ std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
+ uniform_cache_shots.begin() + 1);
+ uniform_cache_hits[0] = 0;
+ uniform_cache_shots[0] = 0;
+
+ const bool skip_preferred = hits * 256 < shots * 251;
+ uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
+
delayed_destruction_ring.Tick();
}
@@ -671,7 +690,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = binding.size;
Buffer& buffer = slot_buffers[binding.buffer_id];
- if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) {
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
// Fast path for Nvidia
@@ -692,7 +711,12 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
// Classic cached path
- SynchronizeBuffer(buffer, cpu_addr, size);
+ const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
+ if (sync_cached) {
+ ++uniform_cache_hits[0];
+ }
+ ++uniform_cache_shots[0];
+
if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
@@ -1106,15 +1130,15 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
template <class P>
-void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
+bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
if (buffer.CpuAddr() == 0) {
- return;
+ return true;
}
- SynchronizeBufferImpl(buffer, cpu_addr, size);
+ return SynchronizeBufferImpl(buffer, cpu_addr, size);
}
template <class P>
-void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
+bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@@ -1128,10 +1152,11 @@ void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
largest_copy = std::max(largest_copy, range_size);
});
if (total_size_bytes == 0) {
- return;
+ return true;
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
+ return false;
}
template <class P>
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index acf2668dc..8317d0636 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -48,6 +48,15 @@ constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::A2B10G10R10_UINT,
};
+constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
+ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
+ PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
+ PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
+ PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
+ PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
+ PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
+};
+
// TODO: How should we handle 24 bits?
constexpr std::array VIEW_CLASS_16_BITS{
@@ -205,7 +214,6 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_128_BITS);
EnableRange(view, VIEW_CLASS_96_BITS);
EnableRange(view, VIEW_CLASS_64_BITS);
- EnableRange(view, VIEW_CLASS_32_BITS);
EnableRange(view, VIEW_CLASS_16_BITS);
EnableRange(view, VIEW_CLASS_8_BITS);
EnableRange(view, VIEW_CLASS_RGTC1_RED);
@@ -231,20 +239,47 @@ constexpr Table MakeCopyTable() {
EnableRange(copy, COPY_CLASS_64_BITS);
return copy;
}
+
+constexpr Table MakeNativeBgrViewTable() {
+ Table copy = MakeViewTable();
+ EnableRange(copy, VIEW_CLASS_32_BITS);
+ return copy;
+}
+
+constexpr Table MakeNonNativeBgrViewTable() {
+ Table copy = MakeViewTable();
+ EnableRange(copy, VIEW_CLASS_32_BITS_NO_BGR);
+ return copy;
+}
+
+constexpr Table MakeNativeBgrCopyTable() {
+ Table copy = MakeCopyTable();
+ EnableRange(copy, VIEW_CLASS_32_BITS);
+ return copy;
+}
+
+constexpr Table MakeNonNativeBgrCopyTable() {
+ Table copy = MakeCopyTable();
+ EnableRange(copy, VIEW_CLASS_32_BITS);
+ return copy;
+}
} // Anonymous namespace
-bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
+ bool native_bgr) {
if (broken_views) {
// If format views are broken, only accept formats that are identical.
return format_a == format_b;
}
- static constexpr Table TABLE = MakeViewTable();
- return IsSupported(TABLE, format_a, format_b);
+ static constexpr Table BGR_TABLE = MakeNativeBgrViewTable();
+ static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrViewTable();
+ return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
}
-bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
- static constexpr Table TABLE = MakeCopyTable();
- return IsSupported(TABLE, format_a, format_b);
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) {
+ static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable();
+ static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable();
+ return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
}
} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 9a0522988..55745e042 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -8,8 +8,9 @@
namespace VideoCore::Surface {
-bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
+ bool native_bgr);
-bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr);
} // namespace VideoCore::Surface
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 970120acc..3494318ca 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SHADER_FILES
convert_float_to_depth.frag
full_screen_triangle.vert
opengl_copy_bc4.comp
+ opengl_copy_bgra.comp
opengl_present.frag
opengl_present.vert
pitch_unswizzle.comp
diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp
new file mode 100644
index 000000000..2571a4abf
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bgra.comp
@@ -0,0 +1,15 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430 core
+
+layout (local_size_x = 4, local_size_y = 4) in;
+
+layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
+layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
+
+void main() {
+ vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
+ imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
+}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6da3906a4..c225d1fc9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
- glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+ glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
+ GL_STREAM_DRAW);
}
}
for (auto& stage_uniforms : copy_uniforms) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 12434db67..e028677e9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
@@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
@@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::B5G6R5_UNORM:
+ case PixelFormat::B8G8R8A8_UNORM:
+ case PixelFormat::B8G8R8A8_SRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
return false;
}
+ if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
+ return false;
+ }
return true;
}
@@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
ASSERT(src.info.type == ImageType::e3D);
util_shaders.CopyBC4(dst, src, copies);
+ } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
+ util_shaders.CopyBGR(dst, src, copies);
} else {
UNREACHABLE();
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index a6172f009..3fbaa102f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -86,6 +86,11 @@ public:
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
+ bool HasNativeBgr() const noexcept {
+ // OpenGL does not have native support for the BGR internal format
+ return false;
+ }
+
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 31ec68505..2fe4799bc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -14,6 +14,7 @@
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) {
return program;
}
+size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
+ return static_cast<size_t>(copy.extent.width * copy.extent.height *
+ copy.src_subresource.num_layers);
+}
+
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
@@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
program_manager.RestoreGuestCompute();
}
+void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
+ const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
+ switch (bytes_per_block) {
+ case 2:
+ // BGR565 copy
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ bgr_copy_pass.Execute(dst_image, src_image, copy);
+ }
+ break;
+ case 4: {
+ // BGRA8 copy
+ program_manager.BindHostCompute(copy_bgra_program.handle);
+ constexpr GLenum FORMAT = GL_RGBA8;
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
@@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) {
return GL_R8UI;
}
+void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
+ const ImageCopy& copy) {
+ if (CopyBufferCreationNeeded(copy)) {
+ CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
+ }
+ // Copy from source to PBO
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
+ glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+ static_cast<GLsizei>(bgr16_pbo_size), nullptr);
+
+ // Copy from PBO to destination in reverse order
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
+ glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
+ nullptr);
+}
+
+bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
+ return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
+}
+
+void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
+ bgr16_pbo.Create();
+ bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
+ glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 7b1d16b09..93b009743 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -19,6 +19,22 @@ class ProgramManager;
struct ImageBufferMap;
+class Bgr565CopyPass {
+public:
+ Bgr565CopyPass() = default;
+ ~Bgr565CopyPass() = default;
+
+ void Execute(const Image& dst_image, const Image& src_image,
+ const VideoCommon::ImageCopy& copy);
+
+private:
+ [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy);
+ void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format);
+
+ OGLBuffer bgr16_pbo;
+ size_t bgr16_pbo_size{};
+};
+
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
@@ -36,6 +52,9 @@ public:
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
+ void CopyBGR(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -44,7 +63,10 @@ private:
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
+ OGLProgram copy_bgra_program;
OGLProgram copy_bc4_program;
+
+ Bgr565CopyPass bgr_copy_pass;
};
GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index a99df9323..d8e92ac0e 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -10,7 +10,7 @@
namespace Vulkan {
-constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b08c23459..3aee27ce0 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -93,6 +93,11 @@ struct TextureCacheRuntime {
// No known Vulkan driver has broken image views
return false;
}
+
+ bool HasNativeBgr() const noexcept {
+ // All known Vulkan drivers can natively handle BGR textures
+ return true;
+ }
};
class Image : public VideoCommon::ImageBase {
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 959b3f115..9914926b3 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -120,9 +120,10 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
if (lhs.info.type == ImageType::Linear) {
base = SubresourceBase{.level = 0, .layer = 0};
} else {
- // We are passing relaxed formats as an option, having broken views or not won't matter
+ // We are passing relaxed formats as an option, having broken views/bgr or not won't matter
static constexpr bool broken_views = false;
- base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
+ static constexpr bool native_bgr = true;
+ base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views, native_bgr);
}
if (!base) {
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 18f72e508..f89a40b4c 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
.height = std::max(image_info.size.height >> range.base.level, 1u),
.depth = std::max(image_info.size.depth >> range.base.level, 1u),
} {
- ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
+ ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true),
"Image view format {} is incompatible with image format {}", info.format,
image_info.format);
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b1da69971..98e33c3a0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -876,6 +876,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
return ImageId{};
}
const bool broken_views = runtime.HasBrokenTextureViewFormats();
+ const bool native_bgr = runtime.HasNativeBgr();
ImageId image_id;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
@@ -885,11 +886,12 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
existing.pitch == info.pitch &&
IsPitchLinearSameSize(existing, info, strict_size) &&
- IsViewCompatible(existing.format, info.format, broken_views)) {
+ IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
image_id = existing_image_id;
return true;
}
- } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
+ } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
+ native_bgr)) {
image_id = existing_image_id;
return true;
}
@@ -920,6 +922,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
ImageInfo new_info = info;
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
const bool broken_views = runtime.HasBrokenTextureViewFormats();
+ const bool native_bgr = runtime.HasNativeBgr();
std::vector<ImageId> overlap_ids;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
@@ -935,8 +938,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
return;
}
static constexpr bool strict_size = true;
- const std::optional<OverlapResult> solution =
- ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
+ const std::optional<OverlapResult> solution = ResolveOverlap(
+ new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
if (solution) {
gpu_addr = solution->gpu_addr;
cpu_addr = solution->cpu_addr;
@@ -946,10 +949,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
- if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
+ if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
- broken_views)) {
+ broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
}
});
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index a0bc1f7b6..2c42d1449 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -1035,13 +1035,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri
std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
VAddr cpu_addr, const ImageBase& overlap,
- bool strict_size, bool broken_views) {
+ bool strict_size, bool broken_views, bool native_bgr) {
ASSERT(new_info.type != ImageType::Linear);
ASSERT(overlap.info.type != ImageType::Linear);
if (!IsLayerStrideCompatible(new_info, overlap.info)) {
return std::nullopt;
}
- if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
+ if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views, native_bgr)) {
return std::nullopt;
}
if (gpu_addr == overlap.gpu_addr) {
@@ -1085,14 +1085,14 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
GPUVAddr candidate_addr, RelaxedOptions options,
- bool broken_views) {
+ bool broken_views, bool native_bgr) {
const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
if (!base) {
return std::nullopt;
}
const ImageInfo& existing = image.info;
if (False(options & RelaxedOptions::Format)) {
- if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
+ if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) {
return std::nullopt;
}
}
@@ -1129,8 +1129,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
}
bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
- RelaxedOptions options, bool broken_views) {
- return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
+ RelaxedOptions options, bool broken_views, bool native_bgr) {
+ return FindSubresource(candidate, image, candidate_addr, options, broken_views, native_bgr)
+ .has_value();
}
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 52a9207d6..4d0072867 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -87,7 +87,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
GPUVAddr gpu_addr, VAddr cpu_addr,
const ImageBase& overlap,
- bool strict_size, bool broken_views);
+ bool strict_size, bool broken_views,
+ bool native_bgr);
[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
@@ -95,11 +96,11 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
const ImageBase& image,
GPUVAddr candidate_addr,
RelaxedOptions options,
- bool broken_views);
+ bool broken_views, bool native_bgr);
[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
- GPUVAddr candidate_addr, RelaxedOptions options,
- bool broken_views);
+ GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views,
+ bool native_bgr);
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
const ImageBase* src);