summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/macro/macro.cpp35
-rw-r--r--src/video_core/macro/macro.h19
-rw-r--r--src/video_core/macro/macro_hle.cpp113
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp3
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp65
-rw-r--r--src/video_core/macro/macro_jit_x64.h1
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/query_cache.h10
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp50
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h82
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp32
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp3
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h2
-rw-r--r--src/video_core/shader/memory_util.cpp4
-rw-r--r--src/video_core/shader_cache.h10
-rw-r--r--src/video_core/texture_cache/texture_cache.h2
30 files changed, 468 insertions, 228 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 099bb446e..2dc752aa9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -27,6 +27,8 @@ add_library(video_core STATIC
engines/shader_type.h
macro/macro.cpp
macro/macro.h
+ macro/macro_hle.cpp
+ macro/macro_hle.h
macro/macro_interpreter.cpp
macro/macro_interpreter.h
macro/macro_jit_x64.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 308d8b55f..bae1d527c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -47,7 +47,7 @@ public:
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- const auto& memory_manager = system.GPU().MemoryManager();
+ auto& memory_manager = system.GPU().MemoryManager();
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
if (!cpu_addr_opt) {
return {GetEmptyBuffer(size), 0};
@@ -59,7 +59,6 @@ public:
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- auto& memory_manager = system.GPU().MemoryManager();
const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
u8* dest;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ea3c8a963..c01436295 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
- macro_engine->Execute(macro_positions[entry], parameters);
+ macro_engine->Execute(*this, macro_positions[entry], parameters);
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
FlushMMEInlineDraw();
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d5fe25065..ef1618990 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1418,6 +1418,14 @@ public:
return execute_on;
}
+ VideoCore::RasterizerInterface& GetRasterizer() {
+ return rasterizer;
+ }
+
+ const VideoCore::RasterizerInterface& GetRasterizer() const {
+ return rasterizer;
+ }
+
/// Notify a memory write has happened.
void OnMemoryWrite() {
dirty.flags |= dirty.on_write_stores;
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 89077a2d8..ef7dad349 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,23 +2,37 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <boost/container_hash/hash.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/settings.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro.h"
+#include "video_core/macro/macro_hle.h"
#include "video_core/macro/macro_interpreter.h"
#include "video_core/macro/macro_jit_x64.h"
namespace Tegra {
+MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
+ : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
+
+MacroEngine::~MacroEngine() = default;
+
void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
-void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
+void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
+ const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
- compiled_macro->second->Execute(parameters, method);
+ const auto& cache_info = compiled_macro->second;
+ if (cache_info.has_hle_program) {
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
+ }
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
auto macro_code = uploaded_macro_code.find(method);
@@ -26,8 +40,21 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
return;
}
- macro_cache[method] = Compile(macro_code->second);
- macro_cache[method]->Execute(parameters, method);
+ auto& cache_info = macro_cache[method];
+ cache_info.hash = boost::hash_value(macro_code->second);
+ cache_info.lle_program = Compile(macro_code->second);
+
+ auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
+ if (hle_program.has_value()) {
+ cache_info.has_hle_program = true;
+ cache_info.hle_program = std::move(hle_program.value());
+ }
+
+ if (cache_info.has_hle_program) {
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
+ }
}
}
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index b76ed891f..4d00b84b0 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -11,9 +11,11 @@
#include "common/common_types.h"
namespace Tegra {
+
namespace Engines {
class Maxwell3D;
}
+
namespace Macro {
constexpr std::size_t NUM_MACRO_REGISTERS = 8;
enum class Operation : u32 {
@@ -94,6 +96,8 @@ union MethodAddress {
} // namespace Macro
+class HLEMacro;
+
class CachedMacro {
public:
virtual ~CachedMacro() = default;
@@ -107,20 +111,29 @@ public:
class MacroEngine {
public:
- virtual ~MacroEngine() = default;
+ explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
+ virtual ~MacroEngine();
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
// Compiles the macro if its not in the cache, and executes the compiled macro
- void Execute(u32 method, const std::vector<u32>& parameters);
+ void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
protected:
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
private:
- std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache;
+ struct CacheInfo {
+ std::unique_ptr<CachedMacro> lle_program{};
+ std::unique_ptr<CachedMacro> hle_program{};
+ u64 hash{};
+ bool has_hle_program{};
+ };
+
+ std::unordered_map<u32, CacheInfo> macro_cache;
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
+ std::unique_ptr<HLEMacro> hle_macros;
};
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..410f99018
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,113 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro_hle.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra {
+
+namespace {
+// HLE'd functions
+static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
+
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
+ ~(0x3ffffff << 26)));
+ maxwell3d.regs.vb_base_instance = parameters[5];
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.regs.vb_element_base = parameters[3];
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.index_array.first = parameters[4];
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+
+ maxwell3d.regs.vertex_buffer.first = parameters[3];
+ maxwell3d.regs.vertex_buffer.count = parameters[1];
+ maxwell3d.regs.vb_base_instance = parameters[4];
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ maxwell3d.mme_draw.instance_count = count;
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(false, true);
+ }
+ maxwell3d.regs.vertex_buffer.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 element_base = parameters[4];
+ const u32 base_instance = parameters[5];
+ maxwell3d.regs.index_array.first = parameters[3];
+ maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.vb_element_base = element_base;
+ maxwell3d.regs.vb_base_instance = base_instance;
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, element_base);
+ maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.regs.vb_element_base = 0x0;
+ maxwell3d.regs.vb_base_instance = 0x0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+ maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+} // namespace
+
+constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+ std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
+ std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
+ std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
+}};
+
+HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+HLEMacro::~HLEMacro() = default;
+
+std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
+ const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
+ [hash](const auto& pair) { return pair.first == hash; });
+ if (it == hle_funcs.end()) {
+ return std::nullopt;
+ }
+ return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
+}
+
+HLEMacroImpl::~HLEMacroImpl() = default;
+
+HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
+ : maxwell3d(maxwell3d), func(func) {}
+
+void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
+ func(maxwell3d, parameters);
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/macro/macro.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+}
+
+using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
+
+class HLEMacro {
+public:
+ explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
+ ~HLEMacro();
+
+ std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+};
+
+class HLEMacroImpl : public CachedMacro {
+public:
+ explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
+ ~HLEMacroImpl();
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+ HLEFunction func;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 5edff27aa..aa5256419 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,7 +11,8 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index d4a97ec7b..07292702f 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
BRANCH_HOLDER,
});
-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
@@ -54,13 +55,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
const bool is_a_zero = opcode.src_a == 0;
const bool is_b_zero = opcode.src_b == 0;
const bool valid_operation = !is_a_zero && !is_b_zero;
- const bool is_move_operation = !is_a_zero && is_b_zero;
+ [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
const bool has_zero_register = is_a_zero || is_b_zero;
+ const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
+ opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
Xbyak::Reg32 src_a;
Xbyak::Reg32 src_b;
- if (!optimizer.zero_reg_skip) {
+ if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
src_a = Compile_GetRegister(opcode.src_a, RESULT);
src_b = Compile_GetRegister(opcode.src_b, eax);
} else {
@@ -71,7 +74,6 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
src_b = Compile_GetRegister(opcode.src_b, eax);
}
}
- Xbyak::Label skip_carry{};
bool has_emitted = false;
@@ -183,7 +185,8 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
if (next_opcode.has_value()) {
const auto next = *next_opcode;
- if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
+ if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
+ opcode.dst == next.dst) {
return;
}
}
@@ -237,10 +240,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
}
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
- auto dst = Compile_GetRegister(opcode.src_a, eax);
- auto src = Compile_GetRegister(opcode.src_b, RESULT);
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
- shr(src, al);
+ shr(src, dst.cvt8());
if (opcode.bf_size != 0 && opcode.bf_size != 31) {
and_(src, opcode.GetBitfieldMask());
} else if (opcode.bf_size == 0) {
@@ -256,8 +259,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
}
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
- auto dst = Compile_GetRegister(opcode.src_a, eax);
- auto src = Compile_GetRegister(opcode.src_b, RESULT);
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
if (opcode.bf_src_bit != 0) {
shr(src, opcode.bf_src_bit);
@@ -266,16 +269,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
if (opcode.bf_size != 31) {
and_(src, opcode.GetBitfieldMask());
}
- shl(src, al);
- Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
+ shl(src, dst.cvt8());
-static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) {
- return maxwell3d->GetRegisterValue(method);
-}
-
-static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
- maxwell3d->CallMethodFromMME(method_address.address, value);
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
@@ -295,15 +291,27 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
sub(result, opcode.immediate * -1);
}
}
- Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- mov(Common::X64::ABI_PARAM1, qword[STATE]);
- mov(Common::X64::ABI_PARAM2, RESULT);
- Common::X64::CallFarFunction(*this, &Read);
- Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- mov(RESULT, Common::X64::ABI_RETURN.cvt32());
+
+ // Equivalent to Engines::Maxwell3D::GetRegisterValue:
+ if (optimizer.enable_asserts) {
+ Xbyak::Label pass_range_check;
+ cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
+ jb(pass_range_check);
+ int3();
+ L(pass_range_check);
+ }
+ mov(rax, qword[STATE]);
+ mov(RESULT,
+ dword[rax + offsetof(Engines::Maxwell3D, regs) +
+ offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
+
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
+static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+ maxwell3d->CallMethodFromMME(method_address.address, value);
+}
+
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
@@ -435,6 +443,9 @@ void MacroJITx64Impl::Compile() {
// one if our register isn't "dirty"
optimizer.optimize_for_method_move = true;
+ // Enable run-time assertions in JITted code
+ optimizer.enable_asserts = false;
+
// Check to see if we can skip emitting certain instructions
Optimizer_ScanFlags();
@@ -543,7 +554,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
}
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
- auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
+ const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register.
if (reg == 0) {
@@ -551,7 +562,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
}
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
};
- auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
+ const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch:
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 51ec090b8..a180e7428 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -76,6 +76,7 @@ private:
bool zero_reg_skip{};
bool skip_dummy_addimmediate{};
bool optimize_for_method_move{};
+ bool enable_asserts{};
};
OptimizerState optimizer{};
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..ff5505d12 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
return range == inner_size;
}
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
+ const std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
auto& memory = system.Memory();
@@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
}
}
-void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
+void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
auto& memory = system.Memory();
@@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
}
}
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
+ const std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
auto& memory = system.Memory();
@@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
}
}
-void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
+void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
const std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
auto& memory = system.Memory();
@@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
}
}
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
+ const std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlock(src_addr, tmp_buffer.data(), size);
- WriteBlock(dest_addr, tmp_buffer.data(), size);
+ ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
}
-void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
+ const std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
- WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
+ ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..87658e87a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
- void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 2f75f8801..e12dab899 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -220,8 +220,8 @@ private:
return cache_begin < addr_end && addr_begin < cache_end;
};
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
@@ -242,14 +242,14 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
- const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(VAddr addr) {
- const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
@@ -268,7 +268,7 @@ private:
}
static constexpr std::uintptr_t PAGE_SIZE = 4096;
- static constexpr unsigned PAGE_SHIFT = 12;
+ static constexpr unsigned PAGE_BITS = 12;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index 1e96b0310..eb5158407 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -281,14 +281,14 @@ private:
template <const std::string_view& op>
std::string Unary(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
return temporary;
}
template <const std::string_view& op>
std::string Binary(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
Visit(operation[1]));
return temporary;
@@ -296,7 +296,7 @@ private:
template <const std::string_view& op>
std::string Trinary(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
Visit(operation[1]), Visit(operation[2]));
return temporary;
@@ -304,7 +304,7 @@ private:
template <const std::string_view& op, bool unordered>
std::string FloatComparison(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
AddLine("MOV.S {}, 0;", temporary);
AddLine("MOV.S {} (NE.x), -1;", temporary);
@@ -331,7 +331,7 @@ private:
template <const std::string_view& op, bool is_nan>
std::string HalfComparison(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
+ std::string tmp1 = AllocVectorTemporary();
const std::string tmp2 = AllocVectorTemporary();
const std::string op_a = Visit(operation[0]);
const std::string op_b = Visit(operation[1]);
@@ -367,15 +367,14 @@ private:
AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
}
- const std::string result = coord;
- AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord,
+ AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
image_id, ImageType(meta.image.type));
- return fmt::format("{}.x", result);
+ return fmt::format("{}.x", coord);
}
template <const std::string_view& op, const std::string_view& type>
std::string Atomic(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
std::string address;
std::string_view opname;
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
@@ -396,7 +395,7 @@ private:
template <char type>
std::string Negate(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
if constexpr (type == 'F') {
AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
} else {
@@ -407,7 +406,7 @@ private:
template <char type>
std::string Absolute(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
return temporary;
}
@@ -1156,20 +1155,20 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
}
std::string ARBDecompiler::VisitExpression(const Expr& node) {
- const std::string result = AllocTemporary();
if (const auto expr = std::get_if<ExprAnd>(&*node)) {
+ std::string result = AllocTemporary();
AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
VisitExpression(expr->operand2));
return result;
}
if (const auto expr = std::get_if<ExprOr>(&*node)) {
- const std::string result = AllocTemporary();
+ std::string result = AllocTemporary();
AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
VisitExpression(expr->operand2));
return result;
}
if (const auto expr = std::get_if<ExprNot>(&*node)) {
- const std::string result = AllocTemporary();
+ std::string result = AllocTemporary();
AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
return result;
}
@@ -1186,7 +1185,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) {
return expr->value ? "0xffffffff" : "0";
}
if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
- const std::string result = AllocTemporary();
+ std::string result = AllocTemporary();
AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
return result;
}
@@ -1231,13 +1230,13 @@ std::string ARBDecompiler::Visit(const Node& node) {
}
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
return temporary;
}
if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
AddLine("MOV.S {}, -1;", temporary);
@@ -1333,13 +1332,13 @@ std::string ARBDecompiler::Visit(const Node& node) {
} else {
offset_string = Visit(offset);
}
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
return temporary;
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
Visit(gmem->GetBaseAddress()));
AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
@@ -1348,14 +1347,14 @@ std::string ARBDecompiler::Visit(const Node& node) {
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- const std::string temporary = Visit(lmem->GetAddress());
+ std::string temporary = Visit(lmem->GetAddress());
AddLine("SHR.U {}, {}, 2;", temporary, temporary);
AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
return temporary;
}
if (const auto smem = std::get_if<SmemNode>(&*node)) {
- const std::string temporary = Visit(smem->GetAddress());
+ std::string temporary = Visit(smem->GetAddress());
AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
return temporary;
}
@@ -1535,7 +1534,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
}
std::string ARBDecompiler::Select(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
Visit(operation[2]));
return temporary;
@@ -1545,12 +1544,12 @@ std::string ARBDecompiler::FClamp(Operation operation) {
// 1.0f in hex, replace with std::bit_cast on C++20
static constexpr u32 POSITIVE_ONE = 0x3f800000;
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
const Node& value = operation[0];
const Node& low = operation[1];
const Node& high = operation[2];
- const auto imm_low = std::get_if<ImmediateNode>(&*low);
- const auto imm_high = std::get_if<ImmediateNode>(&*high);
+ const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
+ const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
} else {
@@ -1574,7 +1573,7 @@ std::string ARBDecompiler::FCastHalf1(Operation operation) {
}
std::string ARBDecompiler::FSqrt(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
AddLine("RCP.F32 {}, {};", temporary, temporary);
return temporary;
@@ -1588,7 +1587,7 @@ std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
return fmt::format("{}.x", temporary);
}
- const std::string lut = AllocVectorTemporary();
+
AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
@@ -1766,21 +1765,21 @@ std::string ARBDecompiler::LogicalAssign(Operation operation) {
}
std::string ARBDecompiler::LogicalPick2(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
return temporary;
}
std::string ARBDecompiler::LogicalAnd2(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
const std::string op = Visit(operation[0]);
AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
return temporary;
}
std::string ARBDecompiler::FloatOrdered(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
AddLine("MOV.S {}, -1;", temporary);
@@ -1790,7 +1789,7 @@ std::string ARBDecompiler::FloatOrdered(Operation operation) {
}
std::string ARBDecompiler::FloatUnordered(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
AddLine("MOV.S {}, 0;", temporary);
@@ -1800,7 +1799,7 @@ std::string ARBDecompiler::FloatUnordered(Operation operation) {
}
std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
- const std::string temporary = AllocTemporary();
+ std::string temporary = AllocTemporary();
AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
AddLine("MOV.S {}, 0;", temporary);
AddLine("IF CF.x;");
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b31d604e4..1011c7738 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -216,6 +216,7 @@ Device::Device()
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
+ has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
@@ -245,6 +246,7 @@ Device::Device(std::nullptr_t) {
has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
+ has_texture_shadow_lod = true;
has_variable_aoffi = true;
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 145347943..c86e709b1 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -68,6 +68,10 @@ public:
return has_image_load_formatted;
}
+ bool HasTextureShadowLod() const {
+ return has_texture_shadow_lod;
+ }
+
bool HasASTC() const {
return has_astc;
}
@@ -110,6 +114,7 @@ private:
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
+ bool has_texture_shadow_lod{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d6e30b321..2c49aeaac 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
+using Tegra::Shader::TextureType;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
@@ -526,6 +527,9 @@ private:
if (device.HasImageLoadFormatted()) {
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
+ if (device.HasTextureShadowLod()) {
+ code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
+ }
if (device.HasWarpIntrinsics()) {
code.AddLine("#extension GL_NV_gpu_shader5 : require");
code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -909,13 +913,13 @@ private:
return "samplerBuffer";
}
switch (sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
+ case TextureType::Texture1D:
return "sampler1D";
- case Tegra::Shader::TextureType::Texture2D:
+ case TextureType::Texture2D:
return "sampler2D";
- case Tegra::Shader::TextureType::Texture3D:
+ case TextureType::Texture3D:
return "sampler3D";
- case Tegra::Shader::TextureType::TextureCube:
+ case TextureType::TextureCube:
return "samplerCube";
default:
UNREACHABLE();
@@ -1380,8 +1384,19 @@ private:
const std::size_t count = operation.GetOperandsCount();
const bool has_array = meta->sampler.is_array;
const bool has_shadow = meta->sampler.is_shadow;
+ const bool workaround_lod_array_shadow_as_grad =
+ !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube);
+
+ std::string expr = "texture";
+
+ if (workaround_lod_array_shadow_as_grad) {
+ expr += "Grad";
+ } else {
+ expr += function_suffix;
+ }
- std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
} else if (!meta->ptp.empty()) {
@@ -1415,6 +1430,16 @@ private:
expr += ')';
}
+ if (workaround_lod_array_shadow_as_grad) {
+ switch (meta->sampler.type) {
+ case TextureType::Texture2D:
+ return expr + ", vec2(0.0), vec2(0.0))";
+ case TextureType::TextureCube:
+ return expr + ", vec3(0.0), vec3(0.0))";
+ }
+ UNREACHABLE();
+ }
+
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
@@ -2041,8 +2066,19 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(
- operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ std::string expr{};
+
+ if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube)) {
+ LOG_ERROR(Render_OpenGL,
+ "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
+ expr = GenerateTexture(operation, "Lod", {});
+ } else {
+ expr = GenerateTexture(operation, "Lod",
+ {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ }
+
if (meta->sampler.is_shadow) {
expr = "vec4(" + expr + ')';
}
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 994ae98eb..35e329240 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
+ break;
case Maxwell::VertexAttribute::Type::SignedInt:
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (attrib.size) {
@@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
+ break;
case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
@@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
+ break;
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_UNSIGNED_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
+ break;
case Maxwell::VertexAttribute::Type::SignedScaled:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
- return {};
+ break;
}
+ UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
+ attrib.SizeString());
+ return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
return {};
}
@@ -180,33 +169,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
- Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
+ Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
switch (filter_mode) {
- case Tegra::Texture::TextureFilter::Linear: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Nearest:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_LINEAR;
+ return GL_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_LINEAR_MIPMAP_NEAREST;
+ return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_LINEAR_MIPMAP_LINEAR;
+ return GL_NEAREST_MIPMAP_LINEAR;
}
break;
- }
- case Tegra::Texture::TextureFilter::Nearest: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Linear:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_NEAREST;
+ return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_NEAREST_MIPMAP_NEAREST;
+ return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_NEAREST_MIPMAP_LINEAR;
+ return GL_LINEAR_MIPMAP_LINEAR;
}
break;
}
- }
- LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
- return GL_LINEAR;
+ UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
+ static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
+ return GL_NEAREST;
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
- return GL_REPEAT;
}
+ UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
- static_cast<u32>(func));
+ UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
return GL_GREATER;
}
@@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
return GL_FUNC_ADD;
}
@@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
return GL_ZERO;
}
@@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
return GL_ZERO;
}
@@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
return GL_ALWAYS;
}
@@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
return GL_KEEP;
}
@@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW;
}
@@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
return GL_BACK;
}
@@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
return GL_COPY;
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 62e950d31..1f2b6734b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
switch (filter) {
- case Tegra::Texture::TextureFilter::Linear:
- return VK_FILTER_LINEAR;
case Tegra::Texture::TextureFilter::Nearest:
return VK_FILTER_NEAREST;
+ case Tegra::Texture::TextureFilter::Linear:
+ return VK_FILTER_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+ UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
return {};
}
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
switch (mipmap_filter) {
case Tegra::Texture::TextureMipmapFilter::None:
- // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
- // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
- // use an image view with a single mipmap level to emulate this.
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ;
- case Tegra::Texture::TextureMipmapFilter::Linear:
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+ // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
+ // of GL_LINEAR or GL_NEAREST, but they can be emulated using
+ // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
+ // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
+ return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
return {};
}
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
- default:
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
- default:
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..cd9673d1f 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
#include <fmt/format.h>
#include "common/dynamic_library.h"
+#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
- std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ const std::string filename =
+ FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 184b2238a..29001953c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -870,7 +870,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
+ const auto size = static_cast<VkDeviceSize>(binding.buffer_size);
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
@@ -1154,7 +1154,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
sampled_views.push_back(ImageView{std::move(view), image_layout});
}
@@ -1180,7 +1180,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
image_views.push_back(ImageView{std::move(view), image_layout});
}
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index e6f2fa553..616eacc36 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TextureMipmapFilter;
+
namespace Vulkan {
namespace {
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
ci.compareEnable = tsc.depth_compare_enabled;
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
- ci.minLod = tsc.GetMinLod();
- ci.maxLod = tsc.GetMaxLod();
+ ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
+ ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
ci.unnormalizedCoordinates = VK_FALSE;
return device.GetLogical().CreateSampler(ci);
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
}
void VKUpdateDescriptorQueue::Acquire() {
- entries.clear();
-}
+ // Minimum number of entries required.
+ // This is the maximum number of entries a single draw call migth use.
+ static constexpr std::size_t MIN_ENTRIES = 0x400;
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- if (payload.size() + entries.size() >= payload.max_size()) {
+ if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
+ upload_start = &*payload.end();
+}
- // TODO(Rodrigo): Rework to write the payload directly
- const auto payload_start = payload.data() + payload.size();
- for (const auto& entry : entries) {
- if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
- payload.push_back(*image);
- } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
- payload.push_back(*buffer);
- } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
- payload.push_back(*texel);
- } else {
- UNREACHABLE();
- }
- }
-
- scheduler.Record(
- [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, payload_start);
- });
+void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
+ VkDescriptorSet set) {
+ const void* const data = upload_start;
+ const vk::Device* const logical = &device.GetLogical();
+ scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
+ logical->UpdateDescriptorSet(set, update_template, data);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
class VKDevice;
class VKScheduler;
-class DescriptorUpdateEntry {
-public:
- explicit DescriptorUpdateEntry() {}
-
- DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
+struct DescriptorUpdateEntry {
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
- DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
+ DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
- DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
+ DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
-private:
union {
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkSampler sampler, VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
}
void AddImage(VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
- entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+ payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- entries.emplace_back(texel_buffer);
+ payload.emplace_back(texel_buffer);
}
- VkImageLayout* GetLastImageLayout() {
- return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
+ VkImageLayout* LastImageLayout() {
+ return &payload.back().image.imageLayout;
}
-private:
- using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
+ const VkImageLayout* LastImageLayout() const {
+ return &payload.back().image.imageLayout;
+ }
+private:
const VKDevice& device;
VKScheduler& scheduler;
- boost::container::static_vector<Variant, 0x400> entries;
+ const DescriptorUpdateEntry* upload_start = nullptr;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..42eff85d3 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -725,8 +725,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
return supported == VK_TRUE;
}
-VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
- noexcept {
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
VkSurfaceCapabilitiesKHR capabilities;
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
return capabilities;
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..da42ca88e 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -779,7 +779,7 @@ public:
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
- VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+ VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 074f21691..5071c83ca 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add
u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
- u64 unique_identifier = boost::hash_value(code);
+ size_t unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
- return unique_identifier;
+ return static_cast<u64>(unique_identifier);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index a23c23886..2dd270e99 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -19,7 +19,7 @@ namespace VideoCommon {
template <class T>
class ShaderCache {
- static constexpr u64 PAGE_SHIFT = 14;
+ static constexpr u64 PAGE_BITS = 14;
struct Entry {
VAddr addr_start;
@@ -87,8 +87,8 @@ protected:
const VAddr addr_end = addr + size;
Entry* const entry = NewEntry(addr, addr_end, data.get());
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
invalidation_cache[page].push_back(entry);
}
@@ -108,8 +108,8 @@ private:
/// @pre invalidation_mutex is locked
void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
const VAddr addr_end = addr + size;
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
const auto it = invalidation_cache.find(page);
if (it == invalidation_cache.end()) {
continue;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b543fc8c0..85075e868 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1053,7 +1053,7 @@ private:
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
- auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+ auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
if (deduced_src.Failed() || deduced_dst.Failed()) {
return;
}