summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h46
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/engines/draw_manager.cpp24
-rw-r--r--src/video_core/engines/draw_manager.h2
-rw-r--r--src/video_core/host1x/codecs/codec.cpp10
-rw-r--r--src/video_core/host_shaders/convert_d24s8_to_abgr8.frag8
-rw-r--r--src/video_core/host_shaders/convert_s8d24_to_abgr8.frag8
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h1
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp4
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp5
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp67
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp33
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp45
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/surface.cpp3
-rw-r--r--src/video_core/surface.h4
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp6
-rw-r--r--src/video_core/texture_cache/formatter.cpp8
-rw-r--r--src/video_core/texture_cache/formatter.h2
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp1
-rw-r--r--src/video_core/texture_cache/samples_helper.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h1
-rw-r--r--src/video_core/texture_cache/util.cpp36
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp56
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp29
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h14
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h3
37 files changed, 323 insertions, 159 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9e90c587c..9b2698fad 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -544,7 +544,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
it++;
}
- boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+ boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
for (const IntervalSet& intervals : committed_ranges) {
@@ -914,6 +914,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
+
+ if (is_written) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+ }
+
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
++binding_index;
@@ -931,6 +936,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
+ const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
+ if (is_written) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+ }
+
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -962,6 +972,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+
const u32 offset = buffer.Offset(binding.cpu_addr);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
@@ -1011,6 +1023,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
+
+ if (is_written) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+ }
+
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
++binding_index;
@@ -1028,6 +1045,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
+ const bool is_written =
+ ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
+ if (is_written) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+ }
+
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1201,16 +1224,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
- const u32 written_mask = channel_state->written_storage_buffers[stage];
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
// Resolve buffer
Binding& binding = channel_state->storage_buffers[stage][index];
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
binding.buffer_id = buffer_id;
- // Mark buffer as written if needed
- if (((written_mask >> index) & 1) != 0) {
- MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
- }
});
}
@@ -1219,10 +1237,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
Binding& binding = channel_state->texture_buffers[stage][index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- // Mark buffer as written if needed
- if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
- MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
- }
});
}
@@ -1252,7 +1266,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
.size = size,
.buffer_id = buffer_id,
};
- MarkWrittenBuffer(buffer_id, *cpu_addr, size);
}
template <class P>
@@ -1279,10 +1292,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
// Resolve buffer
Binding& binding = channel_state->compute_storage_buffers[index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- // Mark as written if needed
- if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
- MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
- }
});
}
@@ -1291,18 +1300,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
Binding& binding = channel_state->compute_texture_buffers[index];
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- // Mark as written if needed
- if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
- MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
- }
});
}
template <class P>
void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
- if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
- SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
- }
memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
const IntervalType base_interval{cpu_addr, cpu_addr + size};
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index c4f6e8d12..eed267361 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -62,7 +62,11 @@ using BufferId = SlotId;
using VideoCore::Surface::PixelFormat;
using namespace Common::Literals;
+#ifdef __APPLE__
+constexpr u32 NUM_VERTEX_BUFFERS = 16;
+#else
constexpr u32 NUM_VERTEX_BUFFERS = 32;
+#endif
constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index f34090791..d77ff455b 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -48,8 +48,14 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
SetInlineIndexBuffer(regs.inline_index_4x8.index3);
break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
+ DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(),
+ regs.vertex_array_instance_first.start.Value(),
+ regs.vertex_array_instance_first.count.Value(), false);
+ break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): {
- LOG_WARNING(HW_GPU, "(STUBBED) called");
+ DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(),
+ regs.vertex_array_instance_subsequent.start.Value(),
+ regs.vertex_array_instance_subsequent.count.Value(), true);
break;
}
case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
@@ -84,6 +90,22 @@ void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 ve
ProcessDraw(false, num_instances);
}
+void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
+ bool subsequent) {
+ draw_state.topology = topology;
+ draw_state.vertex_buffer.first = vertex_first;
+ draw_state.vertex_buffer.count = vertex_count;
+
+ if (!subsequent) {
+ draw_state.instance_count = 1;
+ }
+
+ draw_state.base_instance = draw_state.instance_count - 1;
+ draw_state.draw_mode = DrawMode::Instance;
+ draw_state.instance_count++;
+ ProcessDraw(false, 1);
+}
+
void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count,
u32 base_index, u32 base_instance, u32 num_instances) {
const auto& regs{maxwell3d->regs};
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 18d959143..cfc8127fc 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -66,6 +66,8 @@ public:
void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances);
+ void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
+ bool subsequent);
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 8d7da50fc..dbcf508e5 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() {
break;
}
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
-#if defined(__unix__)
- // Some linux decoding backends are reported to crash with this config method
- // TODO(ameerj): Properly support this method
- if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
- // skip zero-copy decoders, we don't currently support them
- LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
- av_hwdevice_get_type_name(type), config->methods);
- continue;
- }
-#endif
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
index d33131d7c..b81a54056 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -3,16 +3,16 @@
#version 450
+precision mediump int;
+precision highp float;
+
layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
highp uint depth_val =
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
index 31db7d426..6a457981d 100644
--- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -3,16 +3,16 @@
#version 450
+precision mediump int;
+precision highp float;
+
layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
highp uint depth_val =
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 3e12a8813..78ea5208b 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -89,9 +89,6 @@ public:
void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout);
- /// This is called to notify the rendering backend of a surface change
- virtual void NotifySurfaceChanged() {}
-
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<Core::Frontend::GraphicsContext> context;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9cafd2983..512eef575 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1048,6 +1048,10 @@ void Image::Scale(bool up_scale) {
}
bool Image::ScaleUp(bool ignore) {
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
+ return false;
+ }
if (True(flags & ImageFlagBits::Rescaled)) {
return false;
}
@@ -1060,9 +1064,6 @@ bool Image::ScaleUp(bool ignore) {
return false;
}
flags |= ImageFlagBits::Rescaled;
- if (!runtime->resolution.active) {
- return false;
- }
has_scaled = true;
if (ignore) {
current_texture = upscaled_backup.handle;
@@ -1073,13 +1074,14 @@ bool Image::ScaleUp(bool ignore) {
}
bool Image::ScaleDown(bool ignore) {
- if (False(flags & ImageFlagBits::Rescaled)) {
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
return false;
}
- flags &= ~ImageFlagBits::Rescaled;
- if (!runtime->resolution.active) {
+ if (False(flags & ImageFlagBits::Rescaled)) {
return false;
}
+ flags &= ~ImageFlagBits::Rescaled;
if (ignore) {
current_texture = texture.handle;
return true;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3676eaaa9..e71b87e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -118,6 +118,8 @@ public:
void InsertUploadMemoryBarrier();
+ void TransitionImageLayout(Image& image) {}
+
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
bool HasNativeBgr() const noexcept {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index c7dc7e0a1..5ea9e2378 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -116,6 +116,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM
{GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 1a40a4d05..c3db09424 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -627,6 +627,8 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool
const VkPipelineLayout layout = *clear_color_pipeline_layout;
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) {
+ constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f};
+ cmdbuf.SetBlendConstants(blend_constants.data());
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
BindBlitState(cmdbuf, dst_region);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth);
@@ -883,7 +885,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .depthTestEnable = VK_FALSE,
+ .depthTestEnable = key.depth_clear,
.depthWriteEnable = key.depth_clear,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 208e88533..a08f2f67f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -214,8 +214,9 @@ struct FormatTuple {
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
// Depth formats
- {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
- {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
+ {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
+ {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
+ {VK_FORMAT_X8_D24_UNORM_PACK32, Attachable}, // X8_D24_UNORM
// Stencil formats
{VK_FORMAT_S8_UINT, Attachable}, // S8_UINT
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 590bc1c64..14e257cf7 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -56,10 +56,6 @@ public:
return device.GetDriverName();
}
- void NotifySurfaceChanged() override {
- present_manager.NotifySurfaceChanged();
- }
-
private:
void Report() const;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 31928bb94..52fc142d1 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -96,6 +96,7 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
case Service::android::PixelFormat::Rgba8888:
+ case Service::android::PixelFormat::Rgbx8888:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case Service::android::PixelFormat::Rgb565:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index d681bd22a..2ef36583b 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_,
surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(),
swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()},
- image_count{swapchain.GetImageCount()}, last_render_surface{
- render_window_.GetWindowInfo().render_surface} {
+ image_count{swapchain.GetImageCount()} {
auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({
@@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) {
}
}
-void PresentManager::NotifySurfaceChanged() {
-#ifdef ANDROID
- std::scoped_lock lock{recreate_surface_mutex};
- recreate_surface_cv.notify_one();
-#endif
+void PresentManager::RecreateSwapchain(Frame* frame) {
+ swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
+ image_count = swapchain.GetImageCount();
}
void PresentManager::CopyToSwapchain(Frame* frame) {
- MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
-
- const auto recreate_swapchain = [&] {
- swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
- image_count = swapchain.GetImageCount();
- };
-
-#ifdef ANDROID
- std::unique_lock lock{recreate_surface_mutex};
-
- const auto needs_recreation = [&] {
- if (last_render_surface != render_window.GetWindowInfo().render_surface) {
- return true;
- }
- if (swapchain.NeedsRecreation(frame->is_srgb)) {
- return true;
+ bool requires_recreation = false;
+
+ while (true) {
+ try {
+ // Recreate surface and swapchain if needed.
+ if (requires_recreation) {
+ surface = CreateSurface(instance, render_window.GetWindowInfo());
+ RecreateSwapchain(frame);
+ }
+
+ // Draw to swapchain.
+ return CopyToSwapchainImpl(frame);
+ } catch (const vk::Exception& except) {
+ if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) {
+ throw;
+ }
+
+ requires_recreation = true;
}
- return false;
- };
-
- recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400),
- [&]() { return !needs_recreation(); });
-
- // If the frontend recreated the surface, recreate the renderer surface and swapchain.
- if (last_render_surface != render_window.GetWindowInfo().render_surface) {
- last_render_surface = render_window.GetWindowInfo().render_surface;
- surface = CreateSurface(instance, render_window.GetWindowInfo());
- recreate_swapchain();
}
-#endif
+}
+
+void PresentManager::CopyToSwapchainImpl(Frame* frame) {
+ MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
// If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that.
@@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
if (srgb_changed || size_changed) {
- recreate_swapchain();
+ RecreateSwapchain(frame);
}
while (swapchain.AcquireNextImage()) {
- recreate_swapchain();
+ RecreateSwapchain(frame);
}
const vk::CommandBuffer cmdbuf{frame->cmdbuf};
@@ -488,4 +479,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
swapchain.Present(render_semaphore);
}
-} // namespace Vulkan \ No newline at end of file
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 83e859416..a3d825fe6 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -54,14 +54,15 @@ public:
/// Waits for the present thread to finish presenting all queued frames.
void WaitPresent();
- /// This is called to notify the rendering backend of a surface change
- void NotifySurfaceChanged();
-
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
+ void CopyToSwapchainImpl(Frame* frame);
+
+ void RecreateSwapchain(Frame* frame);
+
private:
const vk::Instance& instance;
Core::Frontend::EmuWindow& render_window;
@@ -76,16 +77,13 @@ private:
std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv;
std::condition_variable free_cv;
- std::condition_variable recreate_surface_cv;
std::mutex swapchain_mutex;
- std::mutex recreate_surface_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool blit_supported;
bool use_present_thread;
std::size_t image_count{};
- void* last_render_surface{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2edaafa7e..66c03bf17 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) {
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
+ impl->scheduler.RequestOutsideRenderPassOperationContext();
if (is_prebarrier) {
impl->scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1628d76d6..61d03daae 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -422,7 +422,8 @@ void RasterizerVulkan::Clear(u32 layer_count) {
return;
}
- if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) {
+ if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF &&
+ regs.stencil_front_mask != 0) {
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
@@ -974,6 +975,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
+ if (!regs.viewport_scale_offset_enabled) {
+ const auto x = static_cast<float>(regs.surface_clip.x);
+ const auto y = static_cast<float>(regs.surface_clip.y);
+ const auto width = static_cast<float>(regs.surface_clip.width);
+ const auto height = static_cast<float>(regs.surface_clip.height);
+ VkRect2D scissor;
+ scissor.offset.x = static_cast<u32>(x);
+ scissor.offset.y = static_cast<u32>(y);
+ scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f);
+ scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f);
+ scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); });
+ return;
+ }
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index ae9f1de64..7746a88d3 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat
VkSampleCountFlagBits samples) {
using MaxwellToVK::SurfaceFormat;
return {
- .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index ce92f66ab..b278614e6 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -24,25 +24,38 @@ using namespace Common::Literals;
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
-// Maximum size to put elements in the stream buffer
-constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
-constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
-constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
+constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
-size_t Region(size_t iterator) noexcept {
- return iterator / REGION_SIZE;
+size_t GetStreamBufferSize(const Device& device) {
+ VkDeviceSize size{0};
+ if (device.HasDebuggingToolAttached()) {
+ ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
+ size = std::max(size, heap.size);
+ });
+ // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
+ // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
+ // as the heap will be much larger.
+ if (size <= 256_MiB) {
+ size = size * 40 / 100;
+ }
+ } else {
+ size = MAX_STREAM_BUFFER_SIZE;
+ }
+ return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
- : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+ stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
+ StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = STREAM_BUFFER_SIZE,
+ .size = stream_buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
- if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+ if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size);
}
return GetStagingBuffer(size, usage, deferred);
@@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size);
- if (iterator + size >= STREAM_BUFFER_SIZE) {
+ if (iterator + size >= stream_buffer_size) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick);
used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 5f69f08b1..d3deb9072 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -90,6 +90,9 @@ private:
void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
+ size_t Region(size_t iter) const noexcept {
+ return iter / region_size;
+ }
const Device& device;
MemoryAllocator& memory_allocator;
@@ -97,6 +100,8 @@ private:
vk::Buffer stream_buffer;
std::span<u8> stream_pointer;
+ VkDeviceSize stream_buffer_size;
+ VkDeviceSize region_size;
size_t iterator = 0;
size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index cdc41816f..80efd9517 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -238,6 +238,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
case PixelFormat::D16_UNORM:
case PixelFormat::D32_FLOAT:
+ case PixelFormat::X8_D24_UNORM:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case PixelFormat::S8_UINT:
return VK_IMAGE_ASPECT_STENCIL_BIT;
@@ -1550,15 +1551,15 @@ bool Image::IsRescaled() const noexcept {
}
bool Image::ScaleUp(bool ignore) {
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
+ return false;
+ }
if (True(flags & ImageFlagBits::Rescaled)) {
return false;
}
ASSERT(info.type != ImageType::Linear);
flags |= ImageFlagBits::Rescaled;
- const auto& resolution = runtime->resolution;
- if (!resolution.active) {
- return false;
- }
has_scaled = true;
if (!scaled_image) {
const bool is_2d = info.type == ImageType::e2D;
@@ -1587,15 +1588,15 @@ bool Image::ScaleUp(bool ignore) {
}
bool Image::ScaleDown(bool ignore) {
+ const auto& resolution = runtime->resolution;
+ if (!resolution.active) {
+ return false;
+ }
if (False(flags & ImageFlagBits::Rescaled)) {
return false;
}
ASSERT(info.type != ImageType::Linear);
flags &= ~ImageFlagBits::Rescaled;
- const auto& resolution = runtime->resolution;
- if (!resolution.active) {
- return false;
- }
current_image = *original_image;
if (ignore) {
return true;
@@ -2033,4 +2034,32 @@ void TextureCacheRuntime::AccelerateImageUpload(
ASSERT(false);
}
+void TextureCacheRuntime::TransitionImageLayout(Image& image) {
+ if (!image.ExchangeInitialization()) {
+ VkImageMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_NONE,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image.Handle(),
+ .subresourceRange{
+ .aspectMask = image.AspectMask(),
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
+ });
+ }
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d6c5a15cc..7a0807709 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -92,6 +92,8 @@ public:
void InsertUploadMemoryBarrier() {}
+ void TransitionImageLayout(Image& image);
+
bool HasBrokenTextureViewFormats() const noexcept {
// No known Vulkan driver has broken image views
return false;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index e16cd5e73..5b3c7aa5a 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -85,6 +85,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
return PixelFormat::S8_UINT;
case Tegra::DepthFormat::Z32_FLOAT_X24S8_UINT:
return PixelFormat::D32_FLOAT_S8_UINT;
+ case Tegra::DepthFormat::X8Z24_UNORM:
+ return PixelFormat::X8_D24_UNORM;
default:
UNIMPLEMENTED_MSG("Unimplemented format={}", format);
return PixelFormat::S8_UINT_D24_UNORM;
@@ -202,6 +204,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
PixelFormat PixelFormatFromGPUPixelFormat(Service::android::PixelFormat format) {
switch (format) {
case Service::android::PixelFormat::Rgba8888:
+ case Service::android::PixelFormat::Rgbx8888:
return PixelFormat::A8B8G8R8_UNORM;
case Service::android::PixelFormat::Rgb565:
return PixelFormat::R5G6B5_UNORM;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 9b9c4d9bc..a5e8e2f62 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -115,6 +115,7 @@ enum class PixelFormat {
// Depth formats
D32_FLOAT = MaxColorFormat,
D16_UNORM,
+ X8_D24_UNORM,
MaxDepthFormat,
@@ -251,6 +252,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
1, // E5B9G9R9_FLOAT
1, // D32_FLOAT
1, // D16_UNORM
+ 1, // X8_D24_UNORM
1, // S8_UINT
1, // D24_UNORM_S8_UINT
1, // S8_UINT_D24_UNORM
@@ -360,6 +362,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
1, // E5B9G9R9_FLOAT
1, // D32_FLOAT
1, // D16_UNORM
+ 1, // X8_D24_UNORM
1, // S8_UINT
1, // D24_UNORM_S8_UINT
1, // S8_UINT_D24_UNORM
@@ -469,6 +472,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
32, // E5B9G9R9_FLOAT
32, // D32_FLOAT
16, // D16_UNORM
+ 32, // X8_D24_UNORM
8, // S8_UINT
32, // D24_UNORM_S8_UINT
32, // S8_UINT_D24_UNORM
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 56307d030..8c774f512 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -138,10 +138,16 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::E5B9G9R9_FLOAT;
case Hash(TextureFormat::Z32, FLOAT):
return PixelFormat::D32_FLOAT;
+ case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR):
+ return PixelFormat::D32_FLOAT;
case Hash(TextureFormat::Z16, UNORM):
return PixelFormat::D16_UNORM;
case Hash(TextureFormat::Z16, UNORM, UINT, UINT, UINT, LINEAR):
return PixelFormat::D16_UNORM;
+ case Hash(TextureFormat::X8Z24, UNORM):
+ return PixelFormat::X8_D24_UNORM;
+ case Hash(TextureFormat::X8Z24, UNORM, UINT, UINT, UINT, LINEAR):
+ return PixelFormat::X8_D24_UNORM;
case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR):
return PixelFormat::S8_UINT_D24_UNORM;
case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR):
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 6279d8e9e..2b7e0df72 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -10,19 +10,23 @@
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/samples_helper.h"
namespace VideoCommon {
std::string Name(const ImageBase& image) {
const GPUVAddr gpu_addr = image.gpu_addr;
const ImageInfo& info = image.info;
- const u32 width = info.size.width;
- const u32 height = info.size.height;
+ u32 width = info.size.width;
+ u32 height = info.size.height;
const u32 depth = info.size.depth;
const u32 num_layers = image.info.resources.layers;
const u32 num_levels = image.info.resources.levels;
std::string resource;
if (image.info.num_samples > 1) {
+ const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples);
+ width >>= samples_x;
+ height >>= samples_y;
resource += fmt::format(":{}xMSAA", image.info.num_samples);
}
if (num_layers > 1) {
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index 9ee57a076..cabbfcb2d 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -211,6 +211,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "D32_FLOAT";
case PixelFormat::D16_UNORM:
return "D16_UNORM";
+ case PixelFormat::X8_D24_UNORM:
+ return "X8_D24_UNORM";
case PixelFormat::S8_UINT:
return "S8_UINT";
case PixelFormat::D24_UNORM_S8_UINT:
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 0c5f4450d..18b9250f9 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -85,6 +85,7 @@ bool ImageViewBase::SupportsAnisotropy() const noexcept {
// Depth formats
case PixelFormat::D32_FLOAT:
case PixelFormat::D16_UNORM:
+ case PixelFormat::X8_D24_UNORM:
// Stencil formats
case PixelFormat::S8_UINT:
// DepthStencil formats
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
index 203ac1b11..2ee2f8312 100644
--- a/src/video_core/texture_cache/samples_helper.h
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -24,7 +24,7 @@ namespace VideoCommon {
return {2, 2};
}
ASSERT_MSG(false, "Invalid number of samples={}", num_samples);
- return {1, 1};
+ return {0, 0};
}
[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1bdb0def5..d575c57ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1016,6 +1016,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
+ runtime.TransitionImageLayout(image);
return;
}
if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 0a86ce139..15596c925 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -68,6 +68,7 @@ struct LevelInfo {
Extent2D tile_size;
u32 bpp_log2;
u32 tile_width_spacing;
+ u32 num_levels;
};
[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
@@ -118,11 +119,11 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
- u32 level) {
+ u32 level, u32 num_levels) {
return {
.width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
.height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
- .depth = level == 0
+ .depth = level == 0 && num_levels == 1
? block_size.depth
: AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
};
@@ -166,7 +167,7 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
- if (level == 0) {
+ if (level == 0 && info.num_levels == 1) {
return Extent3D{
.width = info.block.width,
.height = info.block.height,
@@ -257,7 +258,7 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
- u32 tile_width_spacing) {
+ u32 tile_width_spacing, u32 num_levels) {
const u32 bytes_per_block = BytesPerBlock(format);
return {
.size =
@@ -270,16 +271,18 @@ template <u32 GOB_EXTENT>
.tile_size = DefaultBlockSize(format),
.bpp_log2 = BytesPerBlockLog2(bytes_per_block),
.tile_width_spacing = tile_width_spacing,
+ .num_levels = num_levels,
};
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
- return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
+ return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing,
+ info.resources.levels);
}
[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
u32 tile_width_spacing, u32 level) {
- const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
+ const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing, level);
u32 offset = 0;
for (u32 current_level = 0; current_level < level; ++current_level) {
offset += CalculateLevelSize(info, current_level);
@@ -466,7 +469,7 @@ template <u32 GOB_EXTENT>
};
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
- const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
+ const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0, info.resources.levels);
return Extent3D{
.width = Common::AlignUpLog2(num_tiles.width, alignment),
.height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
@@ -533,7 +536,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
UNIMPLEMENTED_IF(copy.image_extent != level_size);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
- const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ const Extent3D block =
+ AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
size_t host_offset = copy.buffer_offset;
@@ -698,7 +702,7 @@ u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D level_size = AdjustMipSize(info.size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
- const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
+ const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level, info.resources.levels);
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
}
@@ -887,7 +891,8 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
.image_extent = level_size,
};
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
- const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ const Extent3D block =
+ AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
size_t guest_layer_offset = 0;
@@ -1041,7 +1046,7 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
const Extent2D tile_size = DefaultBlockSize(info.format);
const Extent3D level_size = AdjustMipSize(info.size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
- return AdjustMipBlockSize(num_tiles, level_info.block, level);
+ return AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
}
boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
@@ -1063,7 +1068,8 @@ boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const I
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
- const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
+ const Extent3D block =
+ AdjustMipBlockSize(num_tiles, level_info.block, level, level_info.num_levels);
params[level] = SwizzleParameters{
.num_tiles = num_tiles,
.block = block,
@@ -1292,11 +1298,11 @@ u32 MapSizeBytes(const ImageBase& image) {
}
}
-static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
+static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) ==
0x7f8000);
-static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
-static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
+static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3960b135a..876cec2e8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -84,9 +84,12 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
} // namespace Alternatives
enum class NvidiaArchitecture {
- AmpereOrNewer,
+ KeplerOrOlder,
+ Maxwell,
+ Pascal,
+ Volta,
Turing,
- VoltaOrOlder,
+ AmpereOrNewer,
};
template <typename T>
@@ -200,6 +203,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_BC7_UNORM_BLOCK,
VK_FORMAT_D16_UNORM,
VK_FORMAT_D16_UNORM_S8_UINT,
+ VK_FORMAT_X8_D24_UNORM_PACK32,
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
@@ -321,13 +325,38 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
physical.GetProperties2(physical_properties);
if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
// Only Ampere and newer support this feature
+ // TODO: Find a way to differentiate Ampere and Ada
return NvidiaArchitecture::AmpereOrNewer;
}
- }
- if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
return NvidiaArchitecture::Turing;
}
- return NvidiaArchitecture::VoltaOrOlder;
+
+ if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
+ VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT advanced_blending_props{};
+ advanced_blending_props.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT;
+ VkPhysicalDeviceProperties2 physical_properties{};
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ physical_properties.pNext = &advanced_blending_props;
+ physical.GetProperties2(physical_properties);
+ if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
+ return NvidiaArchitecture::Maxwell;
+ }
+
+ if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
+ VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_raster_props{};
+ conservative_raster_props.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT;
+ physical_properties.pNext = &conservative_raster_props;
+ physical.GetProperties2(physical_properties);
+ if (conservative_raster_props.degenerateLinesRasterized) {
+ return NvidiaArchitecture::Volta;
+ }
+ return NvidiaArchitecture::Pascal;
+ }
+ }
+
+ return NvidiaArchitecture::KeplerOrOlder;
}
std::vector<const char*> ExtensionListForVulkan(
@@ -504,19 +533,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (is_nvidia) {
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
- switch (arch) {
- case NvidiaArchitecture::AmpereOrNewer:
+ if (arch >= NvidiaArchitecture::AmpereOrNewer) {
LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
features.shader_float16_int8.shaderFloat16 = false;
- break;
- case NvidiaArchitecture::Turing:
- break;
- case NvidiaArchitecture::VoltaOrOlder:
+ } else if (arch <= NvidiaArchitecture::Volta) {
if (nv_major_version < 527) {
LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
- break;
}
if (nv_major_version >= 510) {
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
@@ -661,7 +685,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
"ANV drivers 22.3.0 to 23.1.0 have broken VK_KHR_push_descriptor");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
+ } else if (extensions.push_descriptor && is_nvidia) {
+ const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
+ if (arch <= NvidiaArchitecture::Pascal) {
+ LOG_WARNING(Render_Vulkan,
+ "Pascal and older architectures have broken VK_KHR_push_descriptor");
+ RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
+ }
}
+
if (is_mvk) {
LOG_WARNING(Render_Vulkan,
"MVK driver breaks when using more than 16 vertex attributes/bindings");
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 3ef381a38..8dd1667f3 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -9,6 +9,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/literals.h"
#include "common/logging/log.h"
#include "common/polyfill_ranges.h"
#include "video_core/vulkan_common/vma.h"
@@ -65,12 +66,12 @@ struct Range {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
- return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+ return VMA_ALLOCATION_CREATE_MAPPED_BIT |
+ VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
- return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+ return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
- return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
- VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
+ return {};
}
return {};
}
@@ -212,7 +213,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
- device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
+ device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
+ // GPUs not supporting rebar may only have a region with less than 256MB host visible/device
+ // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
+ // the heap running out of memory. With RenderDoc attached and only a small host/device region,
+ // only allow the stream buffer in this memory heap.
+ if (device.HasDebuggingToolAttached()) {
+ using namespace Common::Literals;
+ ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
+ if (heap.size <= 256_MiB) {
+ valid_memory_types &= ~(1u << index);
+ }
+ });
+ }
+}
MemoryAllocator::~MemoryAllocator() = default;
@@ -239,12 +253,11 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
const VmaAllocationCreateInfo alloc_ci = {
- .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT |
- MemoryUsageVmaFlags(usage),
+ .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferedVmaFlags(usage),
- .memoryTypeBits = 0,
+ .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index f449bc8d0..38a182bcb 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -7,6 +7,7 @@
#include <span>
#include <vector>
#include "common/common_types.h"
+#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
VK_DEFINE_HANDLE(VmaAllocator)
@@ -26,6 +27,18 @@ enum class MemoryUsage {
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
+template <typename F>
+void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
+ auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
+ for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
+ auto& memory_type = memory_props.memoryTypes[i];
+ if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
+ (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
+ f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
+ }
+ }
+}
+
/// Ownership handle of a memory commitment.
/// Points to a subregion of a memory allocation.
class MemoryCommit {
@@ -124,6 +137,7 @@ private:
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
+ u32 valid_memory_types{~0u};
};
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 1e3c0fa64..0487cd3b6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -117,6 +117,9 @@ public:
virtual ~Exception() = default;
const char* what() const noexcept override;
+ VkResult GetResult() const noexcept {
+ return result;
+ }
private:
VkResult result;