author: liamwhite <liamwhite@users.noreply.github.com> 2023-07-02 04:38:18 +0200
committer: GitHub <noreply@github.com> 2023-07-02 04:38:18 +0200
commit: 971b89b979cb3b903263234f3a6fdd2bceb03cbe (patch)
tree: d32c8012765d9d94c57292ddfac3f84ec247a6e1 /src/video_core
parent: Merge pull request #10966 from Morph1984/heap-corruption (diff)
parent: parcel: Optimize small_vector sizes (diff)
download: yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.gz
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.bz2
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.lz
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.xz
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.zst
yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.zip
9 files changed, 31 insertions, 23 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a290d6ea7..f8598fd98 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -174,8 +174,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     src_operand.address = regs.offset_in;
 
     DMA::BufferOperand dst_operand;
-    u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
-    dst_operand.pitch = abs_pitch_out;
+    dst_operand.pitch = static_cast<u32>(std::abs(regs.pitch_out));
     dst_operand.width = regs.line_length_in;
     dst_operand.height = regs.line_count;
     dst_operand.address = regs.offset_out;
@@ -222,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
     const size_t src_size =
         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
 
-    const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
+    const size_t dst_size = dst_operand.pitch * regs.line_count;
     read_buffer.resize_destructive(src_size);
     write_buffer.resize_destructive(dst_size);
 
@@ -231,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
 
     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
-                     abs_pitch_out);
+                     dst_operand.pitch);
 
     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
 }
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index cd6a3a9b8..da07a556f 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -290,7 +290,7 @@ void Codec::Decode() {
             return vp9_decoder->GetFrameBytes();
         default:
             ASSERT(false);
-            return std::vector<u8>{};
+            return std::span<const u8>{};
         }
     }();
     AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index ce827eb6c..862904e39 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -29,15 +29,15 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
 
 H264::~H264() = default;
 
-const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
-                                          bool is_first_frame) {
+std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+                                       bool is_first_frame) {
     H264DecoderContext context;
     host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
                                      sizeof(H264DecoderContext));
 
     const s64 frame_number = context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
-        frame.resize(context.stream_len);
+        frame.resize_destructive(context.stream_len);
         host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
         return frame;
     }
@@ -135,14 +135,14 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist
     for (s32 index = 0; index < 6; index++) {
         writer.WriteBit(true);
         std::span<const u8> matrix{context.weight_scale};
-        writer.WriteScalingList(matrix, index * 16, 16);
+        writer.WriteScalingList(scan, matrix, index * 16, 16);
     }
 
     if (context.h264_parameter_set.transform_8x8_mode_flag) {
         for (s32 index = 0; index < 2; index++) {
             writer.WriteBit(true);
             std::span<const u8> matrix{context.weight_scale_8x8};
-            writer.WriteScalingList(matrix, index * 64, 64);
+            writer.WriteScalingList(scan, matrix, index * 64, 64);
         }
     }
 
@@ -188,8 +188,8 @@ void H264BitWriter::WriteBit(bool state) {
     WriteBits(state ? 1 : 0, 1);
 }
 
-void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
-    static Common::ScratchBuffer<u8> scan{};
+void H264BitWriter::WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list,
+                                     s32 start, s32 count) {
     scan.resize_destructive(count);
     if (count == 16) {
         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index 5cc86454e..d6b556322 100644
--- a/src/video_core/host1x/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -5,9 +5,11 @@
 
 #include <span>
 #include <vector>
+
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
@@ -37,7 +39,8 @@ public:
 
     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
     /// Writes the scaling matrices of the sream
-    void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
+    void WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list, s32 start,
+                          s32 count);
 
     /// Return the bitstream as a vector.
     [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -63,11 +66,12 @@ public:
     ~H264();
 
     /// Compose the H264 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(
-        const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
+    [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+                                                   bool is_first_frame = false);
 
 private:
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
+    Common::ScratchBuffer<u8> scan;
     Host1x::Host1x& host1x;
 
     struct H264ParameterSet {
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index 28fb12cb8..ee6392ff9 100644
--- a/src/video_core/host1x/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -12,7 +12,7 @@ VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
 
 VP8::~VP8() = default;
 
-const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
+std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     VP8PictureInfo info;
     host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
 
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
index 5bf07ecab..7926b73f3 100644
--- a/src/video_core/host1x/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -4,10 +4,11 @@
 #pragma once
 
 #include <array>
-#include <vector>
+#include <span>
 
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
@@ -24,11 +25,11 @@ public:
     ~VP8();
 
     /// Compose the VP8 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(
+    [[nodiscard]] std::span<const u8> ComposeFrame(
         const Host1x::NvdecCommon::NvdecRegisters& state);
 
 private:
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
     Host1x::Host1x& host1x;
 
     struct VP8PictureInfo {
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index cf40c9012..306c3d0e8 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -3,6 +3,7 @@
 
 #include <algorithm> // for std::copy
 #include <numeric>
+
 #include "common/assert.h"
 #include "video_core/host1x/codecs/vp9.h"
 #include "video_core/host1x/host1x.h"
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
index d4083e8d3..f1ed19508 100644
--- a/src/video_core/host1x/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -4,9 +4,11 @@
 #pragma once
 
 #include <array>
+#include <span>
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/scratch_buffer.h"
 #include "common/stream.h"
 #include "video_core/host1x/codecs/vp9_types.h"
 #include "video_core/host1x/nvdec_common.h"
@@ -128,8 +130,8 @@ public:
         return !current_frame_info.show_frame;
     }
 
-    /// Returns a const reference to the composed frame data.
-    [[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
+    /// Returns a const span to the composed frame data.
+    [[nodiscard]] std::span<const u8> GetFrameBytes() const {
         return frame;
     }
 
@@ -181,7 +183,7 @@ private:
     [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
 
     Host1x::Host1x& host1x;
-    std::vector<u8> frame;
+    Common::ScratchBuffer<u8> frame;
 
     std::array<s8, 4> loop_filter_ref_deltas{};
     std::array<s8, 2> loop_filter_mode_deltas{};
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
index adad8ed7e..cc9b25690 100644
--- a/src/video_core/host1x/codecs/vp9_types.h
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -5,6 +5,7 @@
 
 #include <array>
 #include <vector>
+
 #include "common/common_funcs.h"
 #include "common/common_types.h"
author	liamwhite <liamwhite@users.noreply.github.com>	2023-07-02 04:38:18 +0200
committer	GitHub <noreply@github.com>	2023-07-02 04:38:18 +0200
commit	971b89b979cb3b903263234f3a6fdd2bceb03cbe (patch)
tree	d32c8012765d9d94c57292ddfac3f84ec247a6e1 /src/video_core
parent	Merge pull request #10966 from Morph1984/heap-corruption (diff)
parent	parcel: Optimize small_vector sizes (diff)
download	yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.gz yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.bz2 yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.lz yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.xz yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.tar.zst yuzu-971b89b979cb3b903263234f3a6fdd2bceb03cbe.zip