summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/gpu.cpp15
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h1
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp19
-rw-r--r--src/video_core/gpu_thread.h9
-rw-r--r--src/video_core/macro_interpreter.cpp71
-rw-r--r--src/video_core/macro_interpreter.h80
-rw-r--r--src/video_core/morton.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp44
-rw-r--r--src/video_core/shader/ast.cpp85
-rw-r--r--src/video_core/shader/ast.h6
-rw-r--r--src/video_core/shader/control_flow.cpp4
-rw-r--r--src/video_core/shader/control_flow.h4
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/surface.cpp34
-rw-r--r--src/video_core/surface.h222
-rw-r--r--src/video_core/texture_cache/texture_cache.h139
25 files changed, 523 insertions, 336 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7802fd808..59976943a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -101,7 +101,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
void Maxwell3D::InitDirtySettings() {
- const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+ const auto set_block = [this](const std::size_t start, const std::size_t range,
+ const u8 position) {
const auto start_itr = dirty_pointers.begin() + start;
const auto end_itr = start_itr + range;
std::fill(start_itr, end_itr, position);
@@ -478,7 +479,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
}
void Maxwell3D::FlushMMEInlineDraw() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 76cfe8107..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/memory.h"
@@ -17,6 +18,8 @@
namespace Tegra {
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+ // Synced GPU, is always in sync
+ if (!is_async) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_wait);
+ while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
+ }
+}
+
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex};
@@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+ block.timestamp = system.CoreTiming().GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..dbca19f35 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
/// Returns a reference to the GPU DMA pusher.
Tegra::DmaPusher& DmaPusher();
+ // Waits for the GPU to finish working
+ virtual void WaitIdle() const = 0;
+
+ /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
+ void WaitFence(u32 syncpoint_id, u32 value) const;
+
void IncrementSyncPoint(u32 syncpoint_id);
u32 GetSyncpointValue(u32 syncpoint_id) const;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
+void GPUAsynch::WaitIdle() const {
+ gpu_thread.WaitIdle();
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override {}
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
- synchronization_event = system.CoreTiming().RegisterEvent(
- "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
- const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
- system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
+ PushCommand(SubmitListCommand(std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
+void ThreadManager::WaitIdle() const {
+ while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+ }
+}
+
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
return fence;
}
-MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-void SynchState::WaitForSynchronization(u64 fence) {
- while (signaled_fence.load() < fence)
- ;
-}
-
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
namespace Core {
class System;
-namespace Timing {
-struct EventType;
-} // namespace Timing
} // namespace Core
namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
struct SynchState final {
std::atomic_bool is_running{true};
- void WaitForSynchronization(u64 fence);
-
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
u64 last_fence{};
@@ -121,6 +116,9 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+ // Wait until the gpu thread is idle.
+ void WaitIdle() const;
+
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
private:
SynchState state;
Core::System& system;
- Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index dbaeac6db..42031d80a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,6 +11,77 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
+namespace {
+enum class Operation : u32 {
+ ALU = 0,
+ AddImmediate = 1,
+ ExtractInsert = 2,
+ ExtractShiftLeftImmediate = 3,
+ ExtractShiftLeftRegister = 4,
+ Read = 5,
+ Unused = 6, // This operation doesn't seem to be a valid encoding.
+ Branch = 7,
+};
+} // Anonymous namespace
+
+enum class MacroInterpreter::ALUOperation : u32 {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ // Operations 4-7 don't seem to be valid encodings.
+ Xor = 8,
+ Or = 9,
+ And = 10,
+ AndNot = 11,
+ Nand = 12
+};
+
+enum class MacroInterpreter::ResultOperation : u32 {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMethod = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMethod = 5,
+ MoveAndSetMethodFetchAndSend = 6,
+ MoveAndSetMethodSend = 7
+};
+
+enum class MacroInterpreter::BranchCondition : u32 {
+ Zero = 0,
+ NotZero = 1,
+};
+
+union MacroInterpreter::Opcode {
+ u32 raw;
+ BitField<0, 3, Operation> operation;
+ BitField<4, 3, ResultOperation> result_operation;
+ BitField<4, 1, BranchCondition> branch_condition;
+ // If set on a branch, then the branch doesn't have a delay slot.
+ BitField<5, 1, u32> branch_annul;
+ BitField<7, 1, u32> is_exit;
+ BitField<8, 3, u32> dst;
+ BitField<11, 3, u32> src_a;
+ BitField<14, 3, u32> src_b;
+ // The signed immediate overlaps the second source operand and the alu operation.
+ BitField<14, 18, s32> immediate;
+
+ BitField<17, 5, ALUOperation> alu_operation;
+
+ // Bitfield instructions data
+ BitField<17, 5, u32> bf_src_bit;
+ BitField<22, 5, u32> bf_size;
+ BitField<27, 5, u32> bf_dst_bit;
+
+ u32 GetBitfieldMask() const {
+ return (1 << bf_size) - 1;
+ }
+
+ s32 GetBranchTarget() const {
+ return static_cast<s32>(immediate * sizeof(u32));
+ }
+};
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 76b6a895b..631146d89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -6,7 +6,6 @@
#include <array>
#include <optional>
-#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -28,75 +27,11 @@ public:
void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
private:
- enum class Operation : u32 {
- ALU = 0,
- AddImmediate = 1,
- ExtractInsert = 2,
- ExtractShiftLeftImmediate = 3,
- ExtractShiftLeftRegister = 4,
- Read = 5,
- Unused = 6, // This operation doesn't seem to be a valid encoding.
- Branch = 7,
- };
-
- enum class ALUOperation : u32 {
- Add = 0,
- AddWithCarry = 1,
- Subtract = 2,
- SubtractWithBorrow = 3,
- // Operations 4-7 don't seem to be valid encodings.
- Xor = 8,
- Or = 9,
- And = 10,
- AndNot = 11,
- Nand = 12
- };
-
- enum class ResultOperation : u32 {
- IgnoreAndFetch = 0,
- Move = 1,
- MoveAndSetMethod = 2,
- FetchAndSend = 3,
- MoveAndSend = 4,
- FetchAndSetMethod = 5,
- MoveAndSetMethodFetchAndSend = 6,
- MoveAndSetMethodSend = 7
- };
+ enum class ALUOperation : u32;
+ enum class BranchCondition : u32;
+ enum class ResultOperation : u32;
- enum class BranchCondition : u32 {
- Zero = 0,
- NotZero = 1,
- };
-
- union Opcode {
- u32 raw;
- BitField<0, 3, Operation> operation;
- BitField<4, 3, ResultOperation> result_operation;
- BitField<4, 1, BranchCondition> branch_condition;
- BitField<5, 1, u32>
- branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
- BitField<7, 1, u32> is_exit;
- BitField<8, 3, u32> dst;
- BitField<11, 3, u32> src_a;
- BitField<14, 3, u32> src_b;
- // The signed immediate overlaps the second source operand and the alu operation.
- BitField<14, 18, s32> immediate;
-
- BitField<17, 5, ALUOperation> alu_operation;
-
- // Bitfield instructions data
- BitField<17, 5, u32> bf_src_bit;
- BitField<22, 5, u32> bf_size;
- BitField<27, 5, u32> bf_dst_bit;
-
- u32 GetBitfieldMask() const {
- return (1 << bf_size) - 1;
- }
-
- s32 GetBranchTarget() const {
- return static_cast<s32>(immediate * sizeof(u32));
- }
- };
+ union Opcode;
union MethodAddress {
u32 raw;
@@ -149,9 +84,10 @@ private:
Engines::Maxwell3D& maxwell3d;
- u32 pc; ///< Current program counter
- std::optional<u32>
- delayed_pc; ///< Program counter to execute at after the delay slot is executed.
+ /// Current program counter
+ u32 pc;
+ /// Program counter to execute at after the delay slot is executed.
+ std::optional<u32> delayed_pc;
static constexpr std::size_t NumMacroRegisters = 8;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index ab71870ab..fe5f08ace 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::DXT23_SRGB>,
MortonCopy<true, PixelFormat::DXT45_SRGB>,
MortonCopy<true, PixelFormat::BC7U_SRGB>,
+ MortonCopy<true, PixelFormat::R4G4B4A4U>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
@@ -101,6 +102,16 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -162,6 +173,17 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::DXT23_SRGB>,
MortonCopy<false, PixelFormat::DXT45_SRGB>,
MortonCopy<false, PixelFormat::BC7U_SRGB>,
+ MortonCopy<false, PixelFormat::R4G4B4A4U>,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
nullptr,
nullptr,
nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a85f730a8..cbcf81414 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
}
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ std::lock_guard lock{pages_mutex};
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..c24a02d71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -9,6 +9,7 @@
#include <cstddef>
#include <map>
#include <memory>
+#include <mutex>
#include <optional>
#include <tuple>
#include <utility>
@@ -230,6 +231,8 @@ private:
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
+
+ std::mutex pages_mutex;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bb972bf37..baec66ff0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1148,7 +1148,7 @@ private:
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
- } else if (std::get_if<TextureAoffi>(&variant)) {
+ } else if (std::holds_alternative<TextureAoffi>(variant)) {
expr += GenerateTextureAoffi(meta->aoffi);
} else {
UNREACHABLE();
@@ -1158,8 +1158,8 @@ private:
return expr + ')';
}
- std::string GenerateTextureArgument(TextureArgument argument) {
- const auto [type, operand] = argument;
+ std::string GenerateTextureArgument(const TextureArgument& argument) {
+ const auto& [type, operand] = argument;
if (operand == nullptr) {
return {};
}
@@ -1235,7 +1235,7 @@ private:
std::string BuildImageValues(Operation operation) {
constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
const std::size_t values_count{meta.values.size()};
std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
@@ -1780,14 +1780,14 @@ private:
return {"0", Type::Int};
}
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
Type::Uint};
}
Expression ImageStore(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
BuildIntegerCoordinates(operation), BuildImageValues(operation));
return {};
@@ -1795,7 +1795,7 @@ private:
template <const std::string_view& opname>
Expression AtomicImage(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
ASSERT(meta.values.size() == 1);
return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
@@ -2246,7 +2246,7 @@ private:
code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
}
- std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
+ std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
}
@@ -2271,17 +2271,15 @@ private:
ShaderWriter code;
};
-static constexpr std::string_view flow_var = "flow_var_";
-
std::string GetFlowVariable(u32 i) {
- return fmt::format("{}{}", flow_var, i);
+ return fmt::format("flow_var_{}", i);
}
class ExprDecompiler {
public:
explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
- void operator()(VideoCommon::Shader::ExprAnd& expr) {
+ void operator()(const ExprAnd& expr) {
inner += "( ";
std::visit(*this, *expr.operand1);
inner += " && ";
@@ -2289,7 +2287,7 @@ public:
inner += ')';
}
- void operator()(VideoCommon::Shader::ExprOr& expr) {
+ void operator()(const ExprOr& expr) {
inner += "( ";
std::visit(*this, *expr.operand1);
inner += " || ";
@@ -2297,17 +2295,17 @@ public:
inner += ')';
}
- void operator()(VideoCommon::Shader::ExprNot& expr) {
+ void operator()(const ExprNot& expr) {
inner += '!';
std::visit(*this, *expr.operand1);
}
- void operator()(VideoCommon::Shader::ExprPredicate& expr) {
+ void operator()(const ExprPredicate& expr) {
const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
inner += decomp.GetPredicate(pred);
}
- void operator()(VideoCommon::Shader::ExprCondCode& expr) {
+ void operator()(const ExprCondCode& expr) {
const Node cc = decomp.ir.GetConditionCode(expr.cc);
std::string target;
@@ -2332,15 +2330,15 @@ public:
inner += target;
}
- void operator()(VideoCommon::Shader::ExprVar& expr) {
+ void operator()(const ExprVar& expr) {
inner += GetFlowVariable(expr.var_index);
}
- void operator()(VideoCommon::Shader::ExprBoolean& expr) {
+ void operator()(const ExprBoolean& expr) {
inner += expr.value ? "true" : "false";
}
- std::string& GetResult() {
+ const std::string& GetResult() const {
return inner;
}
@@ -2353,7 +2351,7 @@ class ASTDecompiler {
public:
explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
- void operator()(VideoCommon::Shader::ASTProgram& ast) {
+ void operator()(const ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
while (current) {
Visit(current);
@@ -2361,7 +2359,7 @@ public:
}
}
- void operator()(VideoCommon::Shader::ASTIfThen& ast) {
+ void operator()(const ASTIfThen& ast) {
ExprDecompiler expr_parser{decomp};
std::visit(expr_parser, *ast.condition);
decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
@@ -2375,7 +2373,7 @@ public:
decomp.code.AddLine("}}");
}
- void operator()(VideoCommon::Shader::ASTIfElse& ast) {
+ void operator()(const ASTIfElse& ast) {
decomp.code.AddLine("else {{");
decomp.code.scope++;
ASTNode current = ast.nodes.GetFirst();
@@ -2387,29 +2385,29 @@ public:
decomp.code.AddLine("}}");
}
- void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) {
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
UNREACHABLE();
}
- void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) {
+ void operator()(const ASTBlockDecoded& ast) {
decomp.VisitBlock(ast.nodes);
}
- void operator()(VideoCommon::Shader::ASTVarSet& ast) {
+ void operator()(const ASTVarSet& ast) {
ExprDecompiler expr_parser{decomp};
std::visit(expr_parser, *ast.condition);
decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
}
- void operator()(VideoCommon::Shader::ASTLabel& ast) {
+ void operator()(const ASTLabel& ast) {
decomp.code.AddLine("// Label_{}:", ast.index);
}
- void operator()(VideoCommon::Shader::ASTGoto& ast) {
+ void operator()([[maybe_unused]] const ASTGoto& ast) {
UNREACHABLE();
}
- void operator()(VideoCommon::Shader::ASTDoWhile& ast) {
+ void operator()(const ASTDoWhile& ast) {
ExprDecompiler expr_parser{decomp};
std::visit(expr_parser, *ast.condition);
decomp.code.AddLine("do {{");
@@ -2423,7 +2421,7 @@ public:
decomp.code.AddLine("}} while({});", expr_parser.GetResult());
}
- void operator()(VideoCommon::Shader::ASTReturn& ast) {
+ void operator()(const ASTReturn& ast) {
const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
if (!is_true) {
ExprDecompiler expr_parser{decomp};
@@ -2443,7 +2441,7 @@ public:
}
}
- void operator()(VideoCommon::Shader::ASTBreak& ast) {
+ void operator()(const ASTBreak& ast) {
const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
if (!is_true) {
ExprDecompiler expr_parser{decomp};
@@ -2458,7 +2456,7 @@ public:
}
}
- void Visit(VideoCommon::Shader::ASTNode& node) {
+ void Visit(const ASTNode& node) {
std::visit(*this, *node->GetInnerData());
}
@@ -2471,9 +2469,9 @@ void GLSLDecompiler::DecompileAST() {
for (u32 i = 0; i < num_flow_variables; i++) {
code.AddLine("bool {} = false;", GetFlowVariable(i));
}
+
ASTDecompiler decompiler{*this};
- VideoCommon::Shader::ASTNode program = ir.GetASTProgram();
- decompiler.Visit(program);
+ decompiler.Visit(ir.GetASTProgram());
}
} // Anonymous namespace
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 173b76c4e..2f9bfd7e4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -111,7 +111,8 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT45_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // BC7U_SRGB
+ true}, // BC7U_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
@@ -120,6 +121,16 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..4bbd17b12 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- system.GetPerfStats().EndSystemFrame();
-
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
- system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
- system.GetPerfStats().BeginSystemFrame();
-
// Restore the rasterizer state
prev_state.AllDirty();
prev_state.Apply();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 4fb1ca372..0d943a826 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1648,32 +1648,32 @@ class ExprDecompiler {
public:
explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
- Id operator()(VideoCommon::Shader::ExprAnd& expr) {
+ Id operator()(const ExprAnd& expr) {
const Id type_def = decomp.GetTypeDefinition(Type::Bool);
const Id op1 = Visit(expr.operand1);
const Id op2 = Visit(expr.operand2);
return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2));
}
- Id operator()(VideoCommon::Shader::ExprOr& expr) {
+ Id operator()(const ExprOr& expr) {
const Id type_def = decomp.GetTypeDefinition(Type::Bool);
const Id op1 = Visit(expr.operand1);
const Id op2 = Visit(expr.operand2);
return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2));
}
- Id operator()(VideoCommon::Shader::ExprNot& expr) {
+ Id operator()(const ExprNot& expr) {
const Id type_def = decomp.GetTypeDefinition(Type::Bool);
const Id op1 = Visit(expr.operand1);
return decomp.Emit(decomp.OpLogicalNot(type_def, op1));
}
- Id operator()(VideoCommon::Shader::ExprPredicate& expr) {
+ Id operator()(const ExprPredicate& expr) {
const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)));
}
- Id operator()(VideoCommon::Shader::ExprCondCode& expr) {
+ Id operator()(const ExprCondCode& expr) {
const Node cc = decomp.ir.GetConditionCode(expr.cc);
Id target;
@@ -1696,15 +1696,15 @@ public:
return decomp.Emit(decomp.OpLoad(decomp.t_bool, target));
}
- Id operator()(VideoCommon::Shader::ExprVar& expr) {
+ Id operator()(const ExprVar& expr) {
return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)));
}
- Id operator()(VideoCommon::Shader::ExprBoolean& expr) {
+ Id operator()(const ExprBoolean& expr) {
return expr.value ? decomp.v_true : decomp.v_false;
}
- Id Visit(VideoCommon::Shader::Expr& node) {
+ Id Visit(const Expr& node) {
return std::visit(*this, *node);
}
@@ -1716,7 +1716,7 @@ class ASTDecompiler {
public:
explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
- void operator()(VideoCommon::Shader::ASTProgram& ast) {
+ void operator()(const ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
while (current) {
Visit(current);
@@ -1724,7 +1724,7 @@ public:
}
}
- void operator()(VideoCommon::Shader::ASTIfThen& ast) {
+ void operator()(const ASTIfThen& ast) {
ExprDecompiler expr_parser{decomp};
const Id condition = expr_parser.Visit(ast.condition);
const Id then_label = decomp.OpLabel();
@@ -1741,33 +1741,33 @@ public:
decomp.Emit(endif_label);
}
- void operator()(VideoCommon::Shader::ASTIfElse& ast) {
+ void operator()([[maybe_unused]] const ASTIfElse& ast) {
UNREACHABLE();
}
- void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) {
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
UNREACHABLE();
}
- void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) {
+ void operator()(const ASTBlockDecoded& ast) {
decomp.VisitBasicBlock(ast.nodes);
}
- void operator()(VideoCommon::Shader::ASTVarSet& ast) {
+ void operator()(const ASTVarSet& ast) {
ExprDecompiler expr_parser{decomp};
const Id condition = expr_parser.Visit(ast.condition);
decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition));
}
- void operator()(VideoCommon::Shader::ASTLabel& ast) {
+ void operator()([[maybe_unused]] const ASTLabel& ast) {
// Do nothing
}
- void operator()(VideoCommon::Shader::ASTGoto& ast) {
+ void operator()([[maybe_unused]] const ASTGoto& ast) {
UNREACHABLE();
}
- void operator()(VideoCommon::Shader::ASTDoWhile& ast) {
+ void operator()(const ASTDoWhile& ast) {
const Id loop_label = decomp.OpLabel();
const Id endloop_label = decomp.OpLabel();
const Id loop_start_block = decomp.OpLabel();
@@ -1790,7 +1790,7 @@ public:
decomp.Emit(endloop_label);
}
- void operator()(VideoCommon::Shader::ASTReturn& ast) {
+ void operator()(const ASTReturn& ast) {
if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
ExprDecompiler expr_parser{decomp};
const Id condition = expr_parser.Visit(ast.condition);
@@ -1820,7 +1820,7 @@ public:
}
}
- void operator()(VideoCommon::Shader::ASTBreak& ast) {
+ void operator()(const ASTBreak& ast) {
if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
ExprDecompiler expr_parser{decomp};
const Id condition = expr_parser.Visit(ast.condition);
@@ -1840,7 +1840,7 @@ public:
}
}
- void Visit(VideoCommon::Shader::ASTNode& node) {
+ void Visit(const ASTNode& node) {
std::visit(*this, *node->GetInnerData());
}
@@ -1856,9 +1856,11 @@ void SPIRVDecompiler::DecompileAST() {
Name(id, fmt::format("flow_var_{}", i));
flow_variables.emplace(i, AddGlobalVariable(id));
}
+
+ const ASTNode program = ir.GetASTProgram();
ASTDecompiler decompiler{*this};
- VideoCommon::Shader::ASTNode program = ir.GetASTProgram();
decompiler.Visit(program);
+
const Id next_block = OpLabel();
Emit(OpBranch(next_block));
Emit(next_block);
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index 436d45f4b..e43aecc18 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -3,6 +3,9 @@
// Refer to the license.txt file included.
#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
@@ -229,7 +232,8 @@ public:
return inner;
}
- std::string inner{};
+private:
+ std::string inner;
};
class ASTPrinter {
@@ -249,7 +253,7 @@ public:
void operator()(const ASTIfThen& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner += Ident() + "if (" + expr_parser.GetResult() + ") {\n";
+ inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
scope++;
ASTNode current = ast.nodes.GetFirst();
while (current) {
@@ -257,11 +261,13 @@ public:
current = current->GetNext();
}
scope--;
- inner += Ident() + "}\n";
+ inner += fmt::format("{}}}\n", Indent());
}
void operator()(const ASTIfElse& ast) {
- inner += Ident() + "else {\n";
+ inner += Indent();
+ inner += "else {\n";
+
scope++;
ASTNode current = ast.nodes.GetFirst();
while (current) {
@@ -269,40 +275,41 @@ public:
current = current->GetNext();
}
scope--;
- inner += Ident() + "}\n";
+
+ inner += Indent();
+ inner += "}\n";
}
void operator()(const ASTBlockEncoded& ast) {
- inner += Ident() + "Block(" + std::to_string(ast.start) + ", " + std::to_string(ast.end) +
- ");\n";
+ inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
}
- void operator()(const ASTBlockDecoded& ast) {
- inner += Ident() + "Block;\n";
+ void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
+ inner += Indent();
+ inner += "Block;\n";
}
void operator()(const ASTVarSet& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner +=
- Ident() + "V" + std::to_string(ast.index) + " := " + expr_parser.GetResult() + ";\n";
+ inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
}
void operator()(const ASTLabel& ast) {
- inner += "Label_" + std::to_string(ast.index) + ":\n";
+ inner += fmt::format("Label_{}:\n", ast.index);
}
void operator()(const ASTGoto& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner += Ident() + "(" + expr_parser.GetResult() + ") -> goto Label_" +
- std::to_string(ast.label) + ";\n";
+ inner +=
+ fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
}
void operator()(const ASTDoWhile& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner += Ident() + "do {\n";
+ inner += fmt::format("{}do {{\n", Indent());
scope++;
ASTNode current = ast.nodes.GetFirst();
while (current) {
@@ -310,32 +317,23 @@ public:
current = current->GetNext();
}
scope--;
- inner += Ident() + "} while (" + expr_parser.GetResult() + ");\n";
+ inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
}
void operator()(const ASTReturn& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner += Ident() + "(" + expr_parser.GetResult() + ") -> " +
- (ast.kills ? "discard" : "exit") + ";\n";
+ inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
+ ast.kills ? "discard" : "exit");
}
void operator()(const ASTBreak& ast) {
ExprPrinter expr_parser{};
std::visit(expr_parser, *ast.condition);
- inner += Ident() + "(" + expr_parser.GetResult() + ") -> break;\n";
+ inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
}
- std::string& Ident() {
- if (memo_scope == scope) {
- return tabs_memo;
- }
- tabs_memo = tabs.substr(0, scope * 2);
- memo_scope = scope;
- return tabs_memo;
- }
-
- void Visit(ASTNode& node) {
+ void Visit(const ASTNode& node) {
std::visit(*this, *node->GetInnerData());
}
@@ -344,16 +342,29 @@ public:
}
private:
+ std::string_view Indent() {
+ if (space_segment_scope == scope) {
+ return space_segment;
+ }
+
+ // Ensure that we don't exceed our view.
+ ASSERT(scope * 2 < spaces.size());
+
+ space_segment = spaces.substr(0, scope * 2);
+ space_segment_scope = scope;
+ return space_segment;
+ }
+
std::string inner{};
- u32 scope{};
+ std::string_view space_segment;
- std::string tabs_memo{};
- u32 memo_scope{};
+ u32 scope{};
+ u32 space_segment_scope{};
- static constexpr std::string_view tabs{" "};
+ static constexpr std::string_view spaces{" "};
};
-std::string ASTManager::Print() {
+std::string ASTManager::Print() const {
ASTPrinter printer{};
printer.Visit(main_node);
return printer.GetResult();
@@ -549,13 +560,13 @@ bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) co
return min->GetParent() == max->GetParent();
}
-void ASTManager::ShowCurrentState(std::string_view state) {
+void ASTManager::ShowCurrentState(std::string_view state) const {
LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
SanityCheck();
}
-void ASTManager::SanityCheck() {
- for (auto& label : labels) {
+void ASTManager::SanityCheck() const {
+ for (const auto& label : labels) {
if (!label->GetParent()) {
LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
}
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index d7bf11821..a2f0044ba 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -328,13 +328,13 @@ public:
void InsertReturn(Expr condition, bool kills);
- std::string Print();
+ std::string Print() const;
void Decompile();
- void ShowCurrentState(std::string_view state);
+ void ShowCurrentState(std::string_view state) const;
- void SanityCheck();
+ void SanityCheck() const;
void Clear();
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 268d1aed0..9d21f45de 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -473,8 +473,8 @@ void DecompileShader(CFGRebuildState& state) {
state.manager->Decompile();
}
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
- u32 start_address,
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
+ std::size_t program_size, u32 start_address,
const CompilerSettings& settings) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 74e54a5c7..37e987d62 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -76,8 +76,8 @@ struct ShaderCharacteristics {
CompilerSettings settings{};
};
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
- u32 start_address,
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
+ std::size_t program_size, u32 start_address,
const CompilerSettings& settings);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 338bab17c..447fb5c1d 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -410,7 +410,7 @@ public:
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
explicit OperationNode(OperationCode code, Meta meta)
- : OperationNode(code, meta, std::vector<Node>{}) {}
+ : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
explicit OperationNode(OperationCode code, std::vector<Node> operands)
: OperationNode(code, Meta{}, std::move(operands)) {}
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 250afc6d6..9a3c05288 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -212,6 +212,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
break;
}
break;
+ case Tegra::Texture::TextureFormat::A4B4G4R4:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::R4G4B4A4U;
+ default:
+ break;
+ }
+ break;
case Tegra::Texture::TextureFormat::R8:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
@@ -252,6 +260,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
default:
break;
}
+ break;
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -350,6 +359,16 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
+ case Tegra::Texture::TextureFormat::ASTC_2D_6X6:
+ return is_srgb ? PixelFormat::ASTC_2D_6X6_SRGB : PixelFormat::ASTC_2D_6X6;
+ case Tegra::Texture::TextureFormat::ASTC_2D_10X10:
+ return is_srgb ? PixelFormat::ASTC_2D_10X10_SRGB : PixelFormat::ASTC_2D_10X10;
+ case Tegra::Texture::TextureFormat::ASTC_2D_12X12:
+ return is_srgb ? PixelFormat::ASTC_2D_12X12_SRGB : PixelFormat::ASTC_2D_12X12;
+ case Tegra::Texture::TextureFormat::ASTC_2D_8X6:
+ return is_srgb ? PixelFormat::ASTC_2D_8X6_SRGB : PixelFormat::ASTC_2D_8X6;
+ case Tegra::Texture::TextureFormat::ASTC_2D_6X5:
+ return is_srgb ? PixelFormat::ASTC_2D_6X5_SRGB : PixelFormat::ASTC_2D_6X5;
case Tegra::Texture::TextureFormat::R16_G16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -510,6 +529,16 @@ bool IsPixelFormatASTC(PixelFormat format) {
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_10X8:
case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
return false;
@@ -530,6 +559,11 @@ bool IsPixelFormatSRGB(PixelFormat format) {
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
return false;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1e1c432a5..97668f802 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -67,27 +67,38 @@ enum class PixelFormat {
DXT23_SRGB = 49,
DXT45_SRGB = 50,
BC7U_SRGB = 51,
- ASTC_2D_4X4_SRGB = 52,
- ASTC_2D_8X8_SRGB = 53,
- ASTC_2D_8X5_SRGB = 54,
- ASTC_2D_5X4_SRGB = 55,
- ASTC_2D_5X5 = 56,
- ASTC_2D_5X5_SRGB = 57,
- ASTC_2D_10X8 = 58,
- ASTC_2D_10X8_SRGB = 59,
+ R4G4B4A4U = 52,
+ ASTC_2D_4X4_SRGB = 53,
+ ASTC_2D_8X8_SRGB = 54,
+ ASTC_2D_8X5_SRGB = 55,
+ ASTC_2D_5X4_SRGB = 56,
+ ASTC_2D_5X5 = 57,
+ ASTC_2D_5X5_SRGB = 58,
+ ASTC_2D_10X8 = 59,
+ ASTC_2D_10X8_SRGB = 60,
+ ASTC_2D_6X6 = 61,
+ ASTC_2D_6X6_SRGB = 62,
+ ASTC_2D_10X10 = 63,
+ ASTC_2D_10X10_SRGB = 64,
+ ASTC_2D_12X12 = 65,
+ ASTC_2D_12X12_SRGB = 66,
+ ASTC_2D_8X6 = 67,
+ ASTC_2D_8X6_SRGB = 68,
+ ASTC_2D_6X5 = 69,
+ ASTC_2D_6X5_SRGB = 70,
MaxColorFormat,
// Depth formats
- Z32F = 60,
- Z16 = 61,
+ Z32F = 71,
+ Z16 = 72,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 62,
- S8Z24 = 63,
- Z32FS8 = 64,
+ Z24S8 = 73,
+ S8Z24 = 74,
+ Z32FS8 = 75,
MaxDepthStencilFormat,
@@ -177,6 +188,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
2, // DXT23_SRGB
2, // DXT45_SRGB
2, // BC7U_SRGB
+ 0, // R4G4B4A4U
2, // ASTC_2D_4X4_SRGB
2, // ASTC_2D_8X8_SRGB
2, // ASTC_2D_8X5_SRGB
@@ -185,6 +197,16 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
2, // ASTC_2D_5X5_SRGB
2, // ASTC_2D_10X8
2, // ASTC_2D_10X8_SRGB
+ 2, // ASTC_2D_6X6
+ 2, // ASTC_2D_6X6_SRGB
+ 2, // ASTC_2D_10X10
+ 2, // ASTC_2D_10X10_SRGB
+ 2, // ASTC_2D_12X12
+ 2, // ASTC_2D_12X12_SRGB
+ 2, // ASTC_2D_8X6
+ 2, // ASTC_2D_8X6_SRGB
+ 2, // ASTC_2D_6X5
+ 2, // ASTC_2D_6X5_SRGB
0, // Z32F
0, // Z16
0, // Z24S8
@@ -261,6 +283,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
4, // DXT23_SRGB
4, // DXT45_SRGB
4, // BC7U_SRGB
+ 1, // R4G4B4A4U
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
@@ -269,6 +292,16 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
5, // ASTC_2D_5X5_SRGB
10, // ASTC_2D_10X8
10, // ASTC_2D_10X8_SRGB
+ 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_SRGB
+ 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_SRGB
+ 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_SRGB
+ 8, // ASTC_2D_8X6
+ 8, // ASTC_2D_8X6_SRGB
+ 6, // ASTC_2D_6X5
+ 6, // ASTC_2D_6X5_SRGB
1, // Z32F
1, // Z16
1, // Z24S8
@@ -285,71 +318,82 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 8, // ASTC_2D_8X8
- 5, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 8, // ASTC_2D_8X8_SRGB
- 5, // ASTC_2D_8X5_SRGB
- 4, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
- 5, // ASTC_2D_5X5_SRGB
- 8, // ASTC_2D_10X8
- 8, // ASTC_2D_10X8_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // RGBX16F
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 5, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 1, // R4G4B4A4U
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 5, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_SRGB
+ 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_SRGB
+ 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_SRGB
+ 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_SRGB
+ 6, // ASTC_2D_8X6
+ 6, // ASTC_2D_8X6_SRGB
+ 5, // ASTC_2D_6X5
+ 5, // ASTC_2D_6X5_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
}};
static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -413,6 +457,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
128, // DXT23_SRGB
128, // DXT45_SRGB
128, // BC7U
+ 16, // R4G4B4A4U
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
@@ -421,6 +466,16 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
128, // ASTC_2D_5X5_SRGB
128, // ASTC_2D_10X8
128, // ASTC_2D_10X8_SRGB
+ 128, // ASTC_2D_6X6
+ 128, // ASTC_2D_6X6_SRGB
+ 128, // ASTC_2D_10X10
+ 128, // ASTC_2D_10X10_SRGB
+ 128, // ASTC_2D_12X12
+ 128, // ASTC_2D_12X12_SRGB
+ 128, // ASTC_2D_8X6
+ 128, // ASTC_2D_8X6_SRGB
+ 128, // ASTC_2D_6X5
+ 128, // ASTC_2D_6X5_SRGB
32, // Z32F
16, // Z16
32, // Z24S8
@@ -504,6 +559,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::Compressed, // DXT23_SRGB
SurfaceCompression::Compressed, // DXT45_SRGB
SurfaceCompression::Compressed, // BC7U_SRGB
+ SurfaceCompression::None, // R4G4B4A4U
SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
@@ -512,6 +568,16 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_10X8
SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_6X6
+ SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_10X10
+ SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_12X12
+ SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_8X6
+ SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_6X5
+ SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
SurfaceCompression::None, // Z32F
SurfaceCompression::None, // Z16
SurfaceCompression::None, // Z24S8
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ca2da8f97..6a92b22d3 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -62,10 +62,10 @@ public:
}
}
- /***
- * `Guard` guarantees that rendertargets don't unregister themselves if the
+ /**
+ * Guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices.
- ***/
+ */
void GuardRenderTargets(bool new_guard) {
guard_render_targets = new_guard;
}
@@ -287,7 +287,7 @@ protected:
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
- // and reading it from a sepparate buffer.
+ // and reading it from a separate buffer.
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
@@ -386,12 +386,13 @@ private:
};
/**
- * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
- * @param untopological, tells the recycler that the texture has no way to match the overlaps
- * due to topological reasons.
+ * Takes care of selecting a proper strategy to deal with a texture recycle.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters on the new surface.
+ * @param gpu_addr The starting address of the new surface.
+ * @param untopological Indicates to the recycler that the texture has no way
+ * to match the overlaps due to topological reasons.
**/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
@@ -402,7 +403,7 @@ private:
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
- for (auto s : overlaps) {
+ for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
@@ -419,16 +420,19 @@ private:
}
/**
- * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
- *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
- *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
- *new surface from that data.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
- * @param preserve_contents, tells if the new surface should be loaded from meory or left blank
- * @param untopological, tells the recycler that the texture has no way to match the overlaps
- * due to topological reasons.
+ * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
+ * strategies: Ignore and Flush.
+ *
+ * - Ignore: Just unregisters all the overlaps and loads the new texture.
+ * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters for the new surface.
+ * @param gpu_addr The starting address of the new surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or left
+ * blank.
+ * @param untopological Indicates to the recycler that the texture has no way to match the
+ * overlaps due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
@@ -465,10 +469,12 @@ private:
}
/**
- * `RebuildSurface` this method takes a single surface and recreates into another that
- * may differ in format, target or width alingment.
- * @param current_surface, the registered surface in the cache which we want to convert.
- * @param params, the new surface params which we'll use to recreate the surface.
+ * Takes a single surface and recreates into another that may differ in
+ * format, target or width alignment.
+ *
+ * @param current_surface The registered surface in the cache which we want to convert.
+ * @param params The new surface params which we'll use to recreate the surface.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
bool is_render) {
@@ -502,12 +508,14 @@ private:
}
/**
- * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
- * params if it's an exact match, we return the main view of the registered surface. If it's
- * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
+ * Takes a single surface and checks with the new surface's params if it's an exact
+ * match, we return the main view of the registered surface. If its formats don't
+ * match, we rebuild the surface. We call this last method a `Mirage`. If formats
* match but the targets don't, we create an overview View of the registered surface.
- * @param current_surface, the registered surface in the cache which we want to convert.
- * @param params, the new surface params which we want to check.
+ *
+ * @param current_surface The registered surface in the cache which we want to convert.
+ * @param params The new surface params which we want to check.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
const SurfaceParams& params, bool is_render) {
@@ -529,13 +537,14 @@ private:
}
/**
- * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
- * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
- * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
- * for them, else we return nothing.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
+ * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
+ * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
+ * of the new surface, if they all match we end up recreating a surface for them,
+ * else we return nothing.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters on the new surface.
+ * @param gpu_addr The starting address of the new surface.
**/
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
@@ -575,7 +584,7 @@ private:
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
return {};
}
- for (auto surface : overlaps) {
+ for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
@@ -584,19 +593,27 @@ private:
}
/**
- * `GetSurface` gets the starting address and parameters of a candidate surface and tries
- * to find a matching surface within the cache. This is done in 3 big steps. The first is to
- * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
- * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
- * memory else we move to step 3. Step 3 consists on figuring the relationship between the
- * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
- * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
- * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
- * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
- * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
- * @param gpu_addr, the starting address of the candidate surface.
- * @param params, the paremeters on the candidate surface.
- * @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
+ * Gets the starting address and parameters of a candidate surface and tries
+ * to find a matching surface within the cache. This is done in 3 big steps:
+ *
+ * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
+ *
+ * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
+ * memory else we move to step 3.
+ *
+ * 3. Consists of figuring out the relationship between the candidate texture and the
+ * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
+ * there's many, we just try to reconstruct a new surface out of them based on the
+ * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
+ * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
+ * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
+ * a new surface.
+ *
+ * @param gpu_addr The starting address of the candidate surface.
+ * @param params The parameters on the candidate surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or
+ * left blank.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents, bool is_render) {
@@ -651,7 +668,7 @@ private:
// Step 3
// Now we need to figure the relationship between the texture and its overlaps
// we do a topological test to ensure we can find some relationship. If it fails
- // inmediatly recycle the texture
+ // immediately recycle the texture
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
@@ -720,12 +737,13 @@ private:
}
/**
- * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries
- * to find a matching surface within the cache that's similar to it. If there are many textures
+ * Gets the starting address and parameters of a candidate surface and tries to find a
+ * matching surface within the cache that's similar to it. If there are many textures
* or the texture found if entirely incompatible, it will fail. If no texture is found, the
* blit will be unsuccessful.
- * @param gpu_addr, the starting address of the candidate surface.
- * @param params, the paremeters on the candidate surface.
+ *
+ * @param gpu_addr The starting address of the candidate surface.
+ * @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
@@ -777,11 +795,14 @@ private:
}
/**
- * `DeduceBestBlit` gets the a source and destination starting address and parameters,
+ * Gets the a source and destination starting address and parameters,
* and tries to deduce if they are supposed to be depth textures. If so, their
* parameters are modified and fixed into so.
- * @param gpu_addr, the starting address of the candidate surface.
- * @param params, the parameters on the candidate surface.
+ *
+ * @param src_params The parameters of the candidate surface.
+ * @param dst_params The parameters of the destination surface.
+ * @param src_gpu_addr The starting address of the candidate surface.
+ * @param dst_gpu_addr The starting address of the destination surface.
**/
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {