author: ameerj <52414509+ameerj@users.noreply.github.com> 2021-05-08 04:14:21 +0200
committer: ameerj <52414509+ameerj@users.noreply.github.com> 2021-05-08 04:14:21 +0200
commit: 3671fd0a97351c1e5b2ea691d85ab45d5f83288e (patch)
tree: a4723be2513e3d5dbd7de423c865c2ff78c06843
parent: Merge pull request #6279 from ogniK5377/nvhost-prof (diff)
download: yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.gz
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.bz2
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.lz
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.xz
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.zst
yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.zip
8 files changed, 99 insertions, 61 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 623b43d8a..ffe9edc1b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -543,8 +543,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
 }
 
 void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
-                                          const std::array<Offset2D, 2>& dst_region,
-                                          const std::array<Offset2D, 2>& src_region,
+                                          const Region2D& dst_region, const Region2D& src_region,
                                           Tegra::Engines::Fermi2D::Filter filter,
                                           Tegra::Engines::Fermi2D::Operation operation) {
     state_tracker.NotifyScissor0();
@@ -560,9 +559,9 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
     const GLbitfield buffer_bits = dst->BufferBits();
     const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
     const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
-    glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
-                           src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
-                           dst_region[1].x, dst_region[1].y, buffer_bits,
+    glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region.start.x, src_region.start.y,
+                           src_region.end.x, src_region.end.y, dst_region.start.x,
+                           dst_region.start.y, dst_region.end.x, dst_region.end.y, buffer_bits,
                            is_linear ? GL_LINEAR : GL_NEAREST);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3c871541b..df8be12ff 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -28,7 +28,7 @@ using VideoCommon::ImageId;
 using VideoCommon::ImageViewId;
 using VideoCommon::ImageViewType;
 using VideoCommon::NUM_RT;
-using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
 using VideoCommon::RenderTargets;
 
 struct ImageBufferMap {
@@ -73,10 +73,8 @@ public:
 
     void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
-    void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
-                         const std::array<Offset2D, 2>& dst_region,
-                         const std::array<Offset2D, 2>& src_region,
-                         Tegra::Engines::Fermi2D::Filter filter,
+    void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const Region2D& dst_region,
+                         const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
                          Tegra::Engines::Fermi2D::Operation operation);
 
     void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 1f6a169ae..b7f5b8bc2 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -289,16 +289,15 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
     device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
 }
 
-void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
-                   const std::array<Offset2D, 2>& dst_region,
-                   const std::array<Offset2D, 2>& src_region) {
+void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
+                   const Region2D& src_region) {
     const VkOffset2D offset{
-        .x = std::min(dst_region[0].x, dst_region[1].x),
-        .y = std::min(dst_region[0].y, dst_region[1].y),
+        .x = std::min(dst_region.start.x, dst_region.end.x),
+        .y = std::min(dst_region.start.y, dst_region.end.y),
     };
     const VkExtent2D extent{
-        .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
-        .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
+        .width = static_cast<u32>(std::abs(dst_region.end.x - dst_region.start.x)),
+        .height = static_cast<u32>(std::abs(dst_region.end.y - dst_region.start.y)),
     };
     const VkViewport viewport{
         .x = static_cast<float>(offset.x),
@@ -313,11 +312,12 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
         .offset = offset,
         .extent = extent,
     };
-    const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
-    const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
+    const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
+    const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
     const PushConstants push_constants{
         .tex_scale = {scale_x, scale_y},
-        .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
+        .tex_offset = {static_cast<float>(src_region.start.x),
+                       static_cast<float>(src_region.start.y)},
     };
     cmdbuf.SetViewport(0, viewport);
     cmdbuf.SetScissor(0, scissor);
@@ -353,8 +353,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
 BlitImageHelper::~BlitImageHelper() = default;
 
 void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
-                                const std::array<Offset2D, 2>& dst_region,
-                                const std::array<Offset2D, 2>& src_region,
+                                const Region2D& dst_region, const Region2D& src_region,
                                 Tegra::Engines::Fermi2D::Filter filter,
                                 Tegra::Engines::Fermi2D::Operation operation) {
     const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
@@ -383,8 +382,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
 
 void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
                                        VkImageView src_depth_view, VkImageView src_stencil_view,
-                                       const std::array<Offset2D, 2>& dst_region,
-                                       const std::array<Offset2D, 2>& src_region,
+                                       const Region2D& dst_region, const Region2D& src_region,
                                        Tegra::Engines::Fermi2D::Filter filter,
                                        Tegra::Engines::Fermi2D::Operation operation) {
     ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 43fd3d737..0d81a06ed 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -13,7 +13,7 @@
 
 namespace Vulkan {
 
-using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
 
 class Device;
 class Framebuffer;
@@ -35,15 +35,13 @@ public:
     ~BlitImageHelper();
 
     void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
-                   const std::array<Offset2D, 2>& dst_region,
-                   const std::array<Offset2D, 2>& src_region,
+                   const Region2D& dst_region, const Region2D& src_region,
                    Tegra::Engines::Fermi2D::Filter filter,
                    Tegra::Engines::Fermi2D::Operation operation);
 
     void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
-                          VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
-                          const std::array<Offset2D, 2>& src_region,
-                          Tegra::Engines::Fermi2D::Filter filter,
+                          VkImageView src_stencil_view, const Region2D& dst_region,
+                          const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
                           Tegra::Engines::Fermi2D::Operation operation);
 
     void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 017348e05..bdd0ce8bc 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -490,8 +490,7 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
                            write_barrier);
 }
 
-[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region,
-                                        const std::array<Offset2D, 2>& src_region,
+[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
                                         const VkImageSubresourceLayers& dst_layers,
                                         const VkImageSubresourceLayers& src_layers) {
     return VkImageBlit{
@@ -499,13 +498,13 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
         .srcOffsets =
             {
                 {
-                    .x = src_region[0].x,
-                    .y = src_region[0].y,
+                    .x = src_region.start.x,
+                    .y = src_region.start.y,
                     .z = 0,
                 },
                 {
-                    .x = src_region[1].x,
-                    .y = src_region[1].y,
+                    .x = src_region.end.x,
+                    .y = src_region.end.y,
                     .z = 1,
                 },
             },
@@ -513,42 +512,42 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
         .dstOffsets =
             {
                 {
-                    .x = dst_region[0].x,
-                    .y = dst_region[0].y,
+                    .x = dst_region.start.x,
+                    .y = dst_region.start.y,
                     .z = 0,
                 },
                 {
-                    .x = dst_region[1].x,
-                    .y = dst_region[1].y,
+                    .x = dst_region.end.x,
+                    .y = dst_region.end.y,
                     .z = 1,
                 },
             },
     };
 }
 
-[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region,
-                                              const std::array<Offset2D, 2>& src_region,
+[[nodiscard]] VkImageResolve MakeImageResolve(const Region2D& dst_region,
+                                              const Region2D& src_region,
                                               const VkImageSubresourceLayers& dst_layers,
                                               const VkImageSubresourceLayers& src_layers) {
     return VkImageResolve{
         .srcSubresource = src_layers,
         .srcOffset =
             {
-                .x = src_region[0].x,
-                .y = src_region[0].y,
+                .x = src_region.start.x,
+                .y = src_region.start.y,
                 .z = 0,
             },
         .dstSubresource = dst_layers,
         .dstOffset =
             {
-                .x = dst_region[0].x,
-                .y = dst_region[0].y,
+                .x = dst_region.start.x,
+                .y = dst_region.start.y,
                 .z = 0,
             },
         .extent =
             {
-                .width = static_cast<u32>(dst_region[1].x - dst_region[0].x),
-                .height = static_cast<u32>(dst_region[1].y - dst_region[0].y),
+                .width = static_cast<u32>(dst_region.end.x - dst_region.start.x),
+                .height = static_cast<u32>(dst_region.end.y - dst_region.start.y),
                 .depth = 1,
             },
     };
@@ -602,8 +601,7 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
 }
 
 void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
-                                    const std::array<Offset2D, 2>& dst_region,
-                                    const std::array<Offset2D, 2>& src_region,
+                                    const Region2D& dst_region, const Region2D& src_region,
                                     Tegra::Engines::Fermi2D::Filter filter,
                                     Tegra::Engines::Fermi2D::Operation operation) {
     const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 628785d5e..4a57d378b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -16,7 +16,7 @@ namespace Vulkan {
 
 using VideoCommon::ImageId;
 using VideoCommon::NUM_RT;
-using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
 using VideoCommon::RenderTargets;
 using VideoCore::Surface::PixelFormat;
 
@@ -71,8 +71,7 @@ struct TextureCacheRuntime {
     [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
 
     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
-                   const std::array<Offset2D, 2>& dst_region,
-                   const std::array<Offset2D, 2>& src_region,
+                   const Region2D& dst_region, const Region2D& src_region,
                    Tegra::Engines::Fermi2D::Filter filter,
                    Tegra::Engines::Fermi2D::Operation operation);
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 98e33c3a0..59b7c678b 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -148,7 +148,9 @@ public:
     /// Blit an image with the given parameters
     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                    const Tegra::Engines::Fermi2D::Surface& src,
-                   const Tegra::Engines::Fermi2D::Config& copy);
+                   const Tegra::Engines::Fermi2D::Config& copy,
+                   std::optional<Region2D> src_region_override = {},
+                   std::optional<Region2D> dst_region_override = {});
 
     /// Invalidate the contents of the color buffer index
     /// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -615,7 +617,9 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
 template <class P>
 void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
                                 const Tegra::Engines::Fermi2D::Surface& src,
-                                const Tegra::Engines::Fermi2D::Config& copy) {
+                                const Tegra::Engines::Fermi2D::Config& copy,
+                                std::optional<Region2D> src_override,
+                                std::optional<Region2D> dst_override) {
     const BlitImages images = GetBlitImages(dst, src);
     const ImageId dst_id = images.dst_id;
     const ImageId src_id = images.src_id;
@@ -631,20 +635,42 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
     const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
     const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
     const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
-    const std::array src_region{
-        Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
-        Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+
+    // out of bounds texture blit checking
+    const bool use_override = src_override.has_value();
+    const s32 src_x0 = copy.src_x0 >> src_samples_x;
+    s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
+    const s32 src_y0 = copy.src_y0 >> src_samples_y;
+    const s32 src_y1 = copy.src_y1 >> src_samples_y;
+
+    const auto src_width = static_cast<s32>(src_image.info.size.width);
+    const bool width_oob = src_x1 > src_width;
+    const auto width_diff = width_oob ? src_x1 - src_width : 0;
+    if (width_oob) {
+        src_x1 = src_width;
+    }
+
+    const Region2D src_dimensions{
+        Offset2D{.x = src_x0, .y = src_y0},
+        Offset2D{.x = src_x1, .y = src_y1},
     };
+    const auto src_region = use_override ? *src_override : src_dimensions;
 
     const std::optional src_base = src_image.TryFindBase(src.Address());
     const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
     const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
     const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
     const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
-    const std::array dst_region{
-        Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
-        Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
+
+    const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
+    const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
+    const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
+    const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
+    const Region2D dst_dimensions{
+        Offset2D{.x = dst_x0, .y = dst_y0},
+        Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
     };
+    const auto dst_region = use_override ? *dst_override : dst_dimensions;
 
     // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
     Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -661,6 +687,21 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
         runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
                           copy.operation);
     }
+
+    if (width_oob) {
+        // Continue copy of the oob region of the texture on the next row
+        auto oob_src = src;
+        oob_src.height++;
+        const Region2D src_region_override{
+            Offset2D{.x = 0, .y = src_y0 + 1},
+            Offset2D{.x = width_diff, .y = src_y1 + 1},
+        };
+        const Region2D dst_region_override{
+            Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
+            Offset2D{.x = dst_x1, .y = dst_y1},
+        };
+        BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
+    }
 }
 
 template <class P>
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 2ad2d72a6..c9571f7e4 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -64,6 +64,13 @@ struct Offset3D {
     s32 z;
 };
 
+struct Region2D {
+    constexpr auto operator<=>(const Region2D&) const noexcept = default;
+
+    Offset2D start;
+    Offset2D end;
+};
+
 struct Extent2D {
     constexpr auto operator<=>(const Extent2D&) const noexcept = default;
author	ameerj <52414509+ameerj@users.noreply.github.com>	2021-05-08 04:14:21 +0200
committer	ameerj <52414509+ameerj@users.noreply.github.com>	2021-05-08 04:14:21 +0200
commit	3671fd0a97351c1e5b2ea691d85ab45d5f83288e (patch)
tree	a4723be2513e3d5dbd7de423c865c2ff78c06843
parent	Merge pull request #6279 from ogniK5377/nvhost-prof (diff)
download	yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.gz yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.bz2 yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.lz yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.xz yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.tar.zst yuzu-3671fd0a97351c1e5b2ea691d85ab45d5f83288e.zip