diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 5 | ||||
-rw-r--r-- | src/video_core/gpu.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/blit_image.cpp | 26 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/blit_image.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 38 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 57 | ||||
-rw-r--r-- | src/video_core/texture_cache/types.h | 7 |
13 files changed, 112 insertions, 62 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 32dcbd693..de971041f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -690,7 +690,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; - if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) { + const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && + size <= uniform_buffer_skip_cache_size && + !buffer.IsRegionGpuModified(cpu_addr, size); + if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a38024242..37f7b24e1 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -13,6 +13,7 @@ #include "core/frontend/emu_window.h" #include "core/hardware_interrupt_manager.h" #include "core/memory.h" +#include "core/perf_stats.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" @@ -191,6 +192,10 @@ u64 GPU::GetTicks() const { return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; } +void GPU::RendererFrameEndNotify() { + system.GetPerfStats().EndGameFrame(); +} + void GPU::FlushCommands() { rasterizer->FlushCommands(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 8669e9940..29a867863 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -247,6 +247,8 @@ public: return use_nvdec; } + void RendererFrameEndNotify(); + enum class FenceOperation : u32 { Acquire = 0, Increment = 1, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 623b43d8a..ffe9edc1b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -543,8 +543,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, } void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { state_tracker.NotifyScissor0(); @@ -560,9 +559,9 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const GLbitfield buffer_bits = dst->BufferBits(); const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; - glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, - src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, - dst_region[1].x, dst_region[1].y, buffer_bits, + glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region.start.x, src_region.start.y, + src_region.end.x, src_region.end.y, dst_region.start.x, + dst_region.start.y, dst_region.end.x, dst_region.end.y, buffer_bits, is_linear ? GL_LINEAR : GL_NEAREST); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3c871541b..df8be12ff 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -28,7 +28,7 @@ using VideoCommon::ImageId; using VideoCommon::ImageViewId; using VideoCommon::ImageViewType; using VideoCommon::NUM_RT; -using VideoCommon::Offset2D; +using VideoCommon::Region2D; using VideoCommon::RenderTargets; struct ImageBufferMap { @@ -73,10 +73,8 @@ public: void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, - Tegra::Engines::Fermi2D::Filter filter, + void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const Region2D& dst_region, + const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void AccelerateImageUpload(Image& image, const ImageBufferMap& map, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cc2e499f9..a718bff7a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -155,6 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { ++m_current_frame; + gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); context->SwapBuffers(); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 1f6a169ae..b7f5b8bc2 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -289,16 +289,15 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); } -void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region) { +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, + const Region2D& src_region) { const VkOffset2D offset{ - .x = std::min(dst_region[0].x, dst_region[1].x), - .y = std::min(dst_region[0].y, dst_region[1].y), + .x = std::min(dst_region.start.x, dst_region.end.x), + .y = std::min(dst_region.start.y, dst_region.end.y), }; const VkExtent2D extent{ - .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), - .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), + .width = static_cast<u32>(std::abs(dst_region.end.x - dst_region.start.x)), + .height = static_cast<u32>(std::abs(dst_region.end.y - dst_region.start.y)), }; const VkViewport viewport{ .x = static_cast<float>(offset.x), @@ -313,11 +312,12 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, .offset = offset, .extent = extent, }; - const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); - const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); + const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); + const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, + .tex_offset = {static_cast<float>(src_region.start.x), + static_cast<float>(src_region.start.y)}, }; cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -353,8 +353,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; @@ -383,8 +382,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 43fd3d737..0d81a06ed 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -13,7 +13,7 @@ namespace Vulkan { -using VideoCommon::Offset2D; +using VideoCommon::Region2D; class Device; class Framebuffer; @@ -35,15 +35,13 @@ public: ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, - VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, - Tegra::Engines::Fermi2D::Filter filter, + VkImageView src_stencil_view, const Region2D& dst_region, + const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2e0cf4232..3986eb172 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -154,6 +154,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (swapchain.Present(render_semaphore)) { blit_screen.Recreate(); } + gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 017348e05..bdd0ce8bc 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -490,8 +490,7 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im write_barrier); } -[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, +[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region, const VkImageSubresourceLayers& dst_layers, const VkImageSubresourceLayers& src_layers) { return VkImageBlit{ @@ -499,13 +498,13 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im .srcOffsets = { { - .x = src_region[0].x, - .y = src_region[0].y, + .x = src_region.start.x, + .y = src_region.start.y, .z = 0, }, { - .x = src_region[1].x, - .y = src_region[1].y, + .x = src_region.end.x, + .y = src_region.end.y, .z = 1, }, }, @@ -513,42 +512,42 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im .dstOffsets = { { - .x = dst_region[0].x, - .y = dst_region[0].y, + .x = dst_region.start.x, + .y = dst_region.start.y, .z = 0, }, { - .x = dst_region[1].x, - .y = dst_region[1].y, + .x = dst_region.end.x, + .y = dst_region.end.y, .z = 1, }, }, }; } -[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, +[[nodiscard]] VkImageResolve MakeImageResolve(const Region2D& dst_region, + const Region2D& src_region, const VkImageSubresourceLayers& dst_layers, const VkImageSubresourceLayers& src_layers) { return VkImageResolve{ .srcSubresource = src_layers, .srcOffset = { - .x = src_region[0].x, - .y = src_region[0].y, + .x = src_region.start.x, + .y = src_region.start.y, .z = 0, }, .dstSubresource = dst_layers, .dstOffset = { - .x = dst_region[0].x, - .y = dst_region[0].y, + .x = dst_region.start.x, + .y = dst_region.start.y, .z = 0, }, .extent = { - .width = static_cast<u32>(dst_region[1].x - dst_region[0].x), - .height = static_cast<u32>(dst_region[1].y - dst_region[0].y), + .width = static_cast<u32>(dst_region.end.x - dst_region.start.x), + .height = static_cast<u32>(dst_region.end.y - dst_region.start.y), .depth = 1, }, }; @@ -602,8 +601,7 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { } void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 628785d5e..4a57d378b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -16,7 +16,7 @@ namespace Vulkan { using VideoCommon::ImageId; using VideoCommon::NUM_RT; -using VideoCommon::Offset2D; +using VideoCommon::Region2D; using VideoCommon::RenderTargets; using VideoCore::Surface::PixelFormat; @@ -71,8 +71,7 @@ struct TextureCacheRuntime { [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, - const std::array<Offset2D, 2>& dst_region, - const std::array<Offset2D, 2>& src_region, + const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 98e33c3a0..59b7c678b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -148,7 +148,9 @@ public: /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy); + const Tegra::Engines::Fermi2D::Config& copy, + std::optional<Region2D> src_region_override = {}, + std::optional<Region2D> dst_region_override = {}); /// Invalidate the contents of the color buffer index /// These contents become unspecified, the cache can assume aggressive optimizations. @@ -615,7 +617,9 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { + const Tegra::Engines::Fermi2D::Config& copy, + std::optional<Region2D> src_override, + std::optional<Region2D> dst_override) { const BlitImages images = GetBlitImages(dst, src); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -631,20 +635,42 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const std::array src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + + // out of bounds texture blit checking + const bool use_override = src_override.has_value(); + const s32 src_x0 = copy.src_x0 >> src_samples_x; + s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x; + const s32 src_y0 = copy.src_y0 >> src_samples_y; + const s32 src_y1 = copy.src_y1 >> src_samples_y; + + const auto src_width = static_cast<s32>(src_image.info.size.width); + const bool width_oob = src_x1 > src_width; + const auto width_diff = width_oob ? src_x1 - src_width : 0; + if (width_oob) { + src_x1 = src_width; + } + + const Region2D src_dimensions{ + Offset2D{.x = src_x0, .y = src_y0}, + Offset2D{.x = src_x1, .y = src_y1}, }; + const auto src_region = use_override ? *src_override : src_dimensions; const std::optional src_base = src_image.TryFindBase(src.Address()); const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const std::array dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + + const s32 dst_x0 = copy.dst_x0 >> dst_samples_x; + const s32 dst_x1 = copy.dst_x1 >> dst_samples_x; + const s32 dst_y0 = copy.dst_y0 >> dst_samples_y; + const s32 dst_y1 = copy.dst_y1 >> dst_samples_y; + const Region2D dst_dimensions{ + Offset2D{.x = dst_x0, .y = dst_y0}, + Offset2D{.x = dst_x1 - width_diff, .y = dst_y1}, }; + const auto dst_region = use_override ? *dst_override : dst_dimensions; // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -661,6 +687,21 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, copy.operation); } + + if (width_oob) { + // Continue copy of the oob region of the texture on the next row + auto oob_src = src; + oob_src.height++; + const Region2D src_region_override{ + Offset2D{.x = 0, .y = src_y0 + 1}, + Offset2D{.x = width_diff, .y = src_y1 + 1}, + }; + const Region2D dst_region_override{ + Offset2D{.x = dst_x1 - width_diff, .y = dst_y0}, + Offset2D{.x = dst_x1, .y = dst_y1}, + }; + BlitImage(dst, oob_src, copy, src_region_override, dst_region_override); + } } template <class P> diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 2ad2d72a6..c9571f7e4 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -64,6 +64,13 @@ struct Offset3D { s32 z; }; +struct Region2D { + constexpr auto operator<=>(const Region2D&) const noexcept = default; + + Offset2D start; + Offset2D end; +}; + struct Extent2D { constexpr auto operator<=>(const Extent2D&) const noexcept = default; |