summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h24
-rw-r--r--src/video_core/gpu_thread.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp11
10 files changed, 55 insertions, 27 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2a150ccdc..1f656ffa8 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -383,7 +383,8 @@ private:
void NotifyBufferDeletion();
- [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;
+ [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
+ bool is_written = false) const;
[[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
PixelFormat format);
@@ -802,7 +803,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
- storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
+ storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
@@ -842,7 +843,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
const auto& cbufs = launch_desc.const_buffer_config;
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
- compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
+ compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
@@ -1988,11 +1989,26 @@ void BufferCache<P>::NotifyBufferDeletion() {
template <class P>
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
+ u32 cbuf_index,
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
- const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+ const auto size = [&]() {
+ const bool is_nvn_cbuf = cbuf_index == 0;
+ // The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
+ if (is_nvn_cbuf) {
+ return gpu_memory->Read<u32>(ssbo_addr + 8);
+ }
+ // Other titles (notably Doom Eternal) may use STG/LDG on buffer addresses in custom defined
+ // cbufs, which do not store the sizes adjacent to the addresses, so use the fully
+ // mapped buffer size for now.
+ const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
+ LOG_INFO(HW_GPU, "Binding storage buffer for cbuf index {}, MemoryLayoutSize 0x{:X}",
+ cbuf_index, memory_layout_size);
+ return memory_layout_size;
+ }();
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
+ LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
return NULL_BINDING;
}
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9c103c0d4..050b11874 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -25,7 +25,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
Common::SetCurrentThreadName(name.c_str());
- Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
system.RegisterHostThread();
auto current_context = context.Acquire();
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 91463f854..5326172af 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -27,9 +27,7 @@ bool GLInnerFence::IsSignaled() const {
return true;
}
ASSERT(sync_object.handle != 0);
- GLint sync_status;
- glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
- return sync_status == GL_SIGNALED;
+ return sync_object.IsSignaled();
}
void GLInnerFence::Wait() {
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 29491e762..89000d6e0 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -621,10 +621,7 @@ bool GraphicsPipeline::IsBuilt() noexcept {
if (built_fence.handle == 0) {
return false;
}
- // Timeout of zero means this is non-blocking
- const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
- ASSERT(sync_status != GL_WAIT_FAILED);
- is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ is_built = built_fence.IsSignaled();
return is_built;
}
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3a664fdec..eae8fd110 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -3,6 +3,7 @@
#include <string_view>
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -158,6 +159,15 @@ void OGLSync::Release() {
handle = 0;
}
+bool OGLSync::IsSignaled() const noexcept {
+ // At least on Nvidia, glClientWaitSync with a timeout of 0
+ // is faster than glGetSynciv of GL_SYNC_STATUS.
+ // Timeout of 0 means this check is non-blocking.
+ const auto sync_status = glClientWaitSync(handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ return sync_status != GL_TIMEOUT_EXPIRED;
+}
+
void OGLFramebuffer::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index bc05ba4bd..77362acd2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -263,6 +263,9 @@ public:
/// Deletes the internal OpenGL resource
void Release();
+ /// Checks if the sync has been signaled
+ bool IsSignaled() const noexcept;
+
GLsync handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b047e7b3d..9b99125e5 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -112,13 +112,17 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
return GL_NONE;
}
-GLenum TextureMode(PixelFormat format, bool is_first) {
+GLenum TextureMode(PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
+ bool any_r =
+ std::ranges::any_of(swizzle, [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+ // R = depth, G = stencil
+ return any_r ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+ // R = stencil, G = depth
+ return any_r ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
ASSERT(false);
return GL_DEPTH_COMPONENT;
@@ -208,8 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
case PixelFormat::D32_FLOAT_S8_UINT:
case PixelFormat::S8_UINT_D24_UNORM:
UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
- glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
- TextureMode(format, swizzle[0] == SwizzleSource::R));
+ glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, TextureMode(format, swizzle));
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
case PixelFormat::A5B5G5R1_UNORM: {
@@ -714,9 +717,7 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
continue;
}
if (syncs[index].handle != 0) {
- GLint status;
- glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
- if (status != GL_SIGNALED) {
+ if (!syncs[index].IsSignaled()) {
continue;
}
syncs[index].Release();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index b0153a502..9cbcb3c8f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -238,7 +238,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -278,7 +278,7 @@ private:
return indices;
}
- void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
+ void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f085d53a1..25965b684 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -1294,7 +1294,7 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
- scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8a204f93f..e013d1c60 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -189,13 +189,16 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
if (info.IsRenderTarget()) {
return ImageAspectMask(info.format);
}
- const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
+ bool any_r =
+ std::ranges::any_of(info.Swizzle(), [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (info.format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
- return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
+ // R = depth, G = stencil
+ return any_r ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
case PixelFormat::S8_UINT_D24_UNORM:
- return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
+ // R = stencil, G = depth
+ return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
case PixelFormat::D16_UNORM:
case PixelFormat::D32_FLOAT:
return VK_IMAGE_ASPECT_DEPTH_BIT;
@@ -1769,7 +1772,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
.borderColor =
- arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
+ arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
.unnormalizedCoordinates = VK_FALSE,
});
}