summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h8
-rw-r--r--src/video_core/engines/kepler_compute.h10
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h7
-rw-r--r--src/video_core/engines/maxwell_3d.cpp7
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/engines/maxwell_dma.cpp4
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/gpu_asynch.cpp2
-rw-r--r--src/video_core/gpu_thread.cpp43
-rw-r--r--src/video_core/gpu_thread.h71
-rw-r--r--src/video_core/macro_interpreter.cpp20
-rw-r--r--src/video_core/memory_manager.cpp51
-rw-r--r--src/video_core/memory_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h11
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp210
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h92
37 files changed, 503 insertions, 204 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 14b76680f..242a0d1cd 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -128,7 +128,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
- renderer_vulkan/vk_stream_buffer.h)
+ renderer_vulkan/vk_stream_buffer.h
+ renderer_vulkan/vk_swapchain.cpp
+ renderer_vulkan/vk_swapchain.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -137,4 +139,4 @@ endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad lz4_static)
+target_link_libraries(video_core PRIVATE glad)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 03b7ee5d8..55966eef1 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,12 +6,13 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : memory_manager(memory_manager), rasterizer{rasterizer} {}
+ : rasterizer{rasterizer}, memory_manager{memory_manager} {}
void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 80523e320..2e51b7f13 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,7 +10,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace VideoCore {
class RasterizerInterface;
@@ -115,10 +118,9 @@ public:
};
} regs{};
- MemoryManager& memory_manager;
-
private:
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 6575afd0f..fb6cdf432 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -9,7 +9,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace Tegra::Engines {
@@ -40,10 +43,11 @@ public:
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
- MemoryManager& memory_manager;
-
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
+
+private:
+ MemoryManager& memory_manager;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index e259bf46b..cd51a31d7 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -15,7 +15,7 @@ namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::~KeplerMemory() = default;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 9181e9d80..78b6c3e45 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -82,8 +85,8 @@ public:
private:
Core::System& system;
- MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
void ProcessData(u32 data);
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index defcfbd3f..3c3ac8f81 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
- macro_interpreter(*this) {
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
+ *this} {
InitializeRegisterDefaults();
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7fbf1026e..b352060a1 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -16,13 +16,16 @@
#include "common/math_util.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
-#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -1093,7 +1096,6 @@ public:
};
State state{};
- MemoryManager& memory_manager;
struct DirtyFlags {
std::bitset<8> color_buffer{0xFF};
@@ -1141,6 +1143,8 @@ private:
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
+
/// Start offsets of each macro in macro_memory
std::unordered_map<u32, u32> macro_offsets;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 5cca5c29a..2426d0067 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
@@ -16,7 +16,7 @@ namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
- : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
+ : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 34c369320..c6b649842 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Core {
class System;
}
+namespace Tegra {
+class MemoryManager;
+}
+
namespace VideoCore {
class RasterizerInterface;
}
@@ -139,13 +142,13 @@ public:
};
} regs{};
- MemoryManager& memory_manager;
-
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ MemoryManager& memory_manager;
+
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 8b355cf7b..db507cf04 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -9,7 +9,7 @@
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
- : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+ : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
GPUAsynch::~GPUAsynch() = default;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c5dc199c5..cc56cf467 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,9 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
- state.DecrementFramesCounter();
renderer.SwapBuffers(std::move(data->framebuffer));
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
@@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
} else {
UNREACHABLE();
}
+ state.signaled_fence = next.fence;
+ state.TrySynchronize();
}
}
}
-ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
- : renderer{renderer}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher),
- std::ref(state)} {}
+ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher)
+ : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
+ synchronization_event = system.CoreTiming().RegisterEvent(
+ "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
ThreadManager::~ThreadManager() {
// Notify GPU thread that a shutdown is pending
@@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() {
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- PushCommand(SubmitListCommand(std::move(entries)));
+ const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
+ const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
+ system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- state.IncrementFramesCounter();
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
- state.WaitForFrames();
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
@@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
if (state.queue.Empty()) {
// It's quicker to invalidate a single region on the CPU if the queue is already empty
- renderer.Rasterizer().InvalidateRegion(addr, size);
+ system.Renderer().Rasterizer().InvalidateRegion(addr, size);
} else {
PushCommand(InvalidateRegionCommand(addr, size));
}
@@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
-void ThreadManager::PushCommand(CommandData&& command_data) {
- state.queue.Push(CommandDataContainer(std::move(command_data)));
+u64 ThreadManager::PushCommand(CommandData&& command_data) {
+ const u64 fence{++state.last_fence};
+ state.queue.Push(CommandDataContainer(std::move(command_data), fence));
state.SignalCommands();
+ return fence;
+}
+
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+void SynchState::WaitForSynchronization(u64 fence) {
+ if (signaled_fence >= fence) {
+ return;
+ }
+
+ // Wait for the GPU to be idle (all commands to be executed)
+ {
+ MICROPROFILE_SCOPE(GPU_wait);
+ std::unique_lock<std::mutex> lock{synchronization_mutex};
+ synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
+ }
}
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 70acb2e79..62bcea5bb 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -19,9 +19,12 @@ struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
+namespace Core {
+class System;
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
namespace VideoCommon::GPUThread {
@@ -75,63 +78,47 @@ using CommandData =
struct CommandDataContainer {
CommandDataContainer() = default;
- CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
+ CommandDataContainer(CommandData&& data, u64 next_fence)
+ : data{std::move(data)}, fence{next_fence} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
+ fence = t.fence;
return *this;
}
CommandData data;
+ u64 fence{};
};
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic_bool is_running{true};
std::atomic_int queued_frame_count{};
- std::mutex frames_mutex;
+ std::mutex synchronization_mutex;
std::mutex commands_mutex;
std::condition_variable commands_condition;
- std::condition_variable frames_condition;
+ std::condition_variable synchronization_condition;
- void IncrementFramesCounter() {
- std::lock_guard lock{frames_mutex};
- ++queued_frame_count;
+ /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
+ /// synchronized. This is entirely empirical.
+ bool IsSynchronized() const {
+ constexpr std::size_t max_queue_gap{5};
+ return queue.Size() <= max_queue_gap;
}
- void DecrementFramesCounter() {
- {
- std::lock_guard lock{frames_mutex};
- --queued_frame_count;
-
- if (queued_frame_count) {
- return;
- }
+ void TrySynchronize() {
+ if (IsSynchronized()) {
+ std::lock_guard<std::mutex> lock{synchronization_mutex};
+ synchronization_condition.notify_one();
}
- frames_condition.notify_one();
}
- void WaitForFrames() {
- {
- std::lock_guard lock{frames_mutex};
- if (!queued_frame_count) {
- return;
- }
- }
-
- // Wait for the GPU to be idle (all commands to be executed)
- {
- std::unique_lock lock{frames_mutex};
- frames_condition.wait(lock, [this] { return !queued_frame_count; });
- }
- }
+ void WaitForSynchronization(u64 fence);
void SignalCommands() {
- {
- std::unique_lock lock{commands_mutex};
- if (queue.Empty()) {
- return;
- }
+ if (queue.Empty()) {
+ return;
}
commands_condition.notify_one();
@@ -144,12 +131,15 @@ struct SynchState final {
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
+ u64 last_fence{};
+ std::atomic<u64> signaled_fence{};
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
- explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+ explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
@@ -170,11 +160,12 @@ public:
private:
/// Pushes a command to be executed by the GPU thread
- void PushCommand(CommandData&& command_data);
+ u64 PushCommand(CommandData&& command_data);
private:
SynchState state;
- VideoCore::RendererBase& renderer;
+ Core::System& system;
+ Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 64f75db43..524d9ea5a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
}
u32 MacroInterpreter::FetchParameter() {
- ASSERT(next_parameter_index < parameters.size());
- return parameters[next_parameter_index++];
+ return parameters.at(next_parameter_index++);
}
u32 MacroInterpreter::GetRegister(u32 register_id) const {
- // Register 0 is supposed to always return 0.
- if (register_id == 0)
- return 0;
-
- ASSERT(register_id < registers.size());
- return registers[register_id];
+ return registers.at(register_id);
}
void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
- // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
- // register.
- if (register_id == 0)
+ // Register 0 is hardwired as the zero register.
+ // Ensure no writes to it actually occur.
+ if (register_id == 0) {
return;
+ }
- ASSERT(register_id < registers.size());
- registers[register_id] = value;
+ registers.at(register_id) = value;
}
void MacroInterpreter::SetMethodAddress(u32 address) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index e76b59842..8417324ff 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -77,16 +77,17 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
return gpu_addr;
}
-GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
// Find the first Free VMA.
- const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
- if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
- return false;
- }
+ const VMAHandle vma_handle{
+ std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
+ if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+ return false;
+ }
- const VAddr vma_end{vma.second.base + vma.second.size};
- return vma_end > region_start && vma_end >= region_start + size;
- })};
+ const VAddr vma_end{vma.second.base + vma.second.size};
+ return vma_end > region_start && vma_end >= region_start + size;
+ })};
if (vma_handle == vma_map.end()) {
return {};
@@ -99,12 +100,12 @@ bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
return (addr >> page_bits) < page_table.pointers.size();
}
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
if (!IsAddressValid(addr)) {
return {};
}
- VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+ const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
if (cpu_addr) {
return cpu_addr + (addr & page_mask);
}
@@ -113,7 +114,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
}
template <typename T>
-T MemoryManager::Read(GPUVAddr addr) {
+T MemoryManager::Read(GPUVAddr addr) const {
if (!IsAddressValid(addr)) {
return {};
}
@@ -165,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
}
}
-template u8 MemoryManager::Read<u8>(GPUVAddr addr);
-template u16 MemoryManager::Read<u16>(GPUVAddr addr);
-template u32 MemoryManager::Read<u32>(GPUVAddr addr);
-template u64 MemoryManager::Read<u64>(GPUVAddr addr);
+template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
+template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
+template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
+template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
@@ -179,8 +180,22 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {};
}
- u8* page_pointer{page_table.pointers[addr >> page_bits]};
- if (page_pointer) {
+ u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer != nullptr) {
+ return page_pointer + (addr & page_mask);
+ }
+
+ LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+ return {};
+}
+
+const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
+
+ const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask);
}
@@ -188,7 +203,7 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {};
}
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
std::memcpy(dest_buffer, GetPointer(src_addr), size);
}
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 34744bb27..178e2f655 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -50,17 +50,18 @@ public:
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
+ std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
template <typename T>
- T Read(GPUVAddr addr);
+ T Read(GPUVAddr addr) const;
template <typename T>
void Write(GPUVAddr addr, T data);
u8* GetPointer(GPUVAddr addr);
+ const u8* GetPointer(GPUVAddr addr) const;
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
+ void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
@@ -127,7 +128,7 @@ private:
void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
/// Finds a free (unmapped region) of the specified size starting at the specified address.
- GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
+ GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
private:
static constexpr u64 page_bits{16};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index fd091c84c..7989ec11b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index da9326253..5842d6213 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,7 +4,6 @@
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 2bcbd3da2..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index 0e2e7dc36..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
#pragma once
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 046fc935b..7ff1e6737 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,7 +17,6 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
@@ -26,7 +25,6 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/video_core.h"
namespace OpenGL {
@@ -318,7 +316,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+ ubo.SetFromRegs(gpu, stage);
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4de565321..54fbf48aa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -12,15 +12,12 @@
#include <optional>
#include <tuple>
#include <utility>
-#include <vector>
#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -29,10 +26,8 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index aba6ce731..7a3280620 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -13,7 +13,6 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
-#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/morton.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e8073579f..ad4fd3ad2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,10 +5,9 @@
#pragma once
#include <array>
-#include <map>
#include <memory>
#include <string>
-#include <unordered_set>
+#include <tuple>
#include <vector>
#include "common/alignment.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 290e654bc..7030db365 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,13 +6,11 @@
#include "common/assert.h"
#include "common/hash.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index fd1c85115..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,21 +5,20 @@
#pragma once
#include <array>
+#include <atomic>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
+#include <vector>
#include <glad/glad.h>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
-#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 11d1169f0..a1a51f226 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -69,10 +69,10 @@ public:
shader_source += '\n';
}
- std::string GenerateTemporal() {
- std::string temporal = "tmp";
- temporal += std::to_string(temporal_index++);
- return temporal;
+ std::string GenerateTemporary() {
+ std::string temporary = "tmp";
+ temporary += std::to_string(temporary_index++);
+ return temporary;
}
std::string GetResult() {
@@ -87,7 +87,7 @@ private:
}
std::string shader_source;
- u32 temporal_index = 1;
+ u32 temporary_index = 1;
};
/// Generates code to use for a swizzle operation.
@@ -426,9 +426,14 @@ private:
std::string Visit(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+ if (operation_index >= operation_decompilers.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
+ return {};
+ }
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
- UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+ UNREACHABLE_MSG("Undefined operation: {}", operation_index);
+ return {};
}
return (this->*decompiler)(*operation);
@@ -540,7 +545,7 @@ private:
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
- const std::string final_offset = code.GenerateTemporal();
+ const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
@@ -587,9 +592,9 @@ private:
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
- const std::string temporal = code.GenerateTemporal();
- code.AddLine(precise + "float " + temporal + " = " + value + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine(precise + "float " + temporary + " = " + value + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index) {
@@ -601,9 +606,9 @@ private:
return Visit(operand);
}
- const std::string temporal = code.GenerateTemporal();
- code.AddLine("float " + temporal + " = " + Visit(operand) + ';');
- return temporal;
+ const std::string temporary = code.GenerateTemporary();
+ code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+ return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
@@ -1196,11 +1201,12 @@ private:
switch (meta->element) {
case 0:
case 1:
- return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element);
+ return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
+ GetSwizzle(meta->element) + "))";
case 2:
return "0";
case 3:
- return "textureQueryLevels(" + sampler + ')';
+ return "itof(textureQueryLevels(" + sampler + "))";
}
UNREACHABLE();
return "0";
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 72aca4938..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,7 +5,6 @@
#pragma once
#include <array>
-#include <set>
#include <string>
#include <utility>
#include <vector>
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 82fc4d44b..d2d979997 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -4,13 +4,13 @@
#include <cstring>
#include <fmt/format.h>
-#include <lz4.h>
#include "common/assert.h"
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
+#include "common/lz4_compression.h"
#include "common/scm_rev.h"
#include "core/core.h"
@@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
return hash;
}
-template <typename T>
-std::vector<u8> CompressData(const T* source, std::size_t source_size) {
- if (source_size > LZ4_MAX_INPUT_SIZE) {
- // Source size exceeds LZ4 maximum input size
- return {};
- }
- const auto source_size_int = static_cast<int>(source_size);
- const int max_compressed_size = LZ4_compressBound(source_size_int);
- std::vector<u8> compressed(max_compressed_size);
- const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
- reinterpret_cast<char*>(compressed.data()),
- source_size_int, max_compressed_size);
- if (compressed_size <= 0) {
- // Compression failed
- return {};
- }
- compressed.resize(compressed_size);
- return compressed;
-}
-
-std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
- std::vector<u8> uncompressed(uncompressed_size);
- const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
- reinterpret_cast<char*>(uncompressed.data()),
- static_cast<int>(compressed.size()),
- static_cast<int>(uncompressed.size()));
- if (static_cast<int>(uncompressed_size) != size_check) {
- // Decompression failed
- return {};
- }
- return uncompressed;
-}
-
} // namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
@@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- dump.binary = DecompressData(compressed_binary, binary_length);
+ dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length);
if (dump.binary.empty()) {
return {};
}
@@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- const std::vector<u8> code = DecompressData(compressed_code, code_size);
+ const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size);
if (code.empty()) {
return {};
}
@@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
- const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+ const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC(
+ reinterpret_cast<const u8*>(code.data()), code.size(), 9)};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
@@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+ const std::vector<u8> compressed_binary =
+ Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9);
+
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
usage.unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7d96649af..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <fmt/format.h>
-#include "common/assert.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fba8e681b..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,12 +4,9 @@
#pragma once
-#include <array>
-#include <string>
#include <vector>
#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& regs = gpu.regs;
- const auto& state = gpu.state;
+using Tegra::Engines::Maxwell3D;
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+ const auto& regs = maxwell.regs;
+ const auto& state = maxwell.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
u32 func = static_cast<u32>(regs.alpha_test_func);
// Normalize the gl variants of opCompare to be the same as the normal variants
- u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never);
+ const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
if (func >= op_gl_variant_base) {
func = func - op_gl_variant_base + 1U;
}
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
// Assign in which stage the position has to be flipped
// (the last stage before the fragment shader).
- if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+ if (maxwell.regs.shader_config[geometry_index].enable) {
+ flip_stage = geometry_index;
} else {
flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..8eef2a920 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
namespace OpenGL::GLShader {
-using Tegra::Engines::Maxwell3D;
-
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-// Not following that rule will cause problems on some AMD drivers.
+/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
+/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+/// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
- void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+
alignas(16) GLvec4 viewport_flip;
struct alignas(16) {
GLuint instance_id;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a01efeb05..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
-#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 388b5ffd5..02a9f5ecb 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "core/memory.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
+ if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
+ return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
+ }
+ const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+ return format.format == vk::Format::eB8G8R8A8Unorm &&
+ format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
+ });
+ return found != formats.end() ? *found : formats[0];
+}
+
+vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
+ // Mailbox doesn't lock the application like fifo (vsync), prefer it
+ const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
+ return mode == vk::PresentModeKHR::eMailbox;
+ });
+ return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
+}
+
+vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+ u32 height) {
+ constexpr auto undefined_size{std::numeric_limits<u32>::max()};
+ if (capabilities.currentExtent.width != undefined_size) {
+ return capabilities.currentExtent;
+ }
+ vk::Extent2D extent = {width, height};
+ extent.width = std::max(capabilities.minImageExtent.width,
+ std::min(capabilities.maxImageExtent.width, extent.width));
+ extent.height = std::max(capabilities.minImageExtent.height,
+ std::min(capabilities.maxImageExtent.height, extent.height));
+ return extent;
+}
+} // namespace
+
+VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
+ : surface{surface}, device{device} {}
+
+VKSwapchain::~VKSwapchain() = default;
+
+void VKSwapchain::Create(u32 width, u32 height) {
+ const auto dev = device.GetLogical();
+ const auto& dld = device.GetDispatchLoader();
+ const auto physical_device = device.GetPhysical();
+
+ const vk::SurfaceCapabilitiesKHR capabilities{
+ physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
+ if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
+ return;
+ }
+
+ dev.waitIdle(dld);
+ Destroy();
+
+ CreateSwapchain(capabilities, width, height);
+ CreateSemaphores();
+ CreateImageViews();
+
+ fences.resize(image_count, nullptr);
+}
+
+void VKSwapchain::AcquireNextImage() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+ dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
+ *present_semaphores[frame_index], {}, &image_index, dld);
+
+ if (auto& fence = fences[image_index]; fence) {
+ fence->Wait();
+ fence->Release();
+ fence = nullptr;
+ }
+}
+
+bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
+ const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
+ const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
+ const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
+ const auto& dld{device.GetDispatchLoader()};
+ const auto present_queue{device.GetPresentQueue()};
+ bool recreated = false;
+
+ const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
+ &swapchain.get(), &image_index, {});
+ switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
+ case vk::Result::eSuccess:
+ break;
+ case vk::Result::eErrorOutOfDateKHR:
+ if (current_width > 0 && current_height > 0) {
+ Create(current_width, current_height);
+ recreated = true;
+ }
+ break;
+ default:
+ LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
+ vk::to_string(result));
+ UNREACHABLE();
+ }
+
+ ASSERT(fences[image_index] == nullptr);
+ fences[image_index] = &fence;
+ frame_index = (frame_index + 1) % image_count;
+ return recreated;
+}
+
+bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
+ // TODO(Rodrigo): Handle framebuffer pixel format changes
+ return framebuffer.width != current_width || framebuffer.height != current_height;
+}
+
+void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+ u32 height) {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+ const auto physical_device{device.GetPhysical()};
+
+ const std::vector<vk::SurfaceFormatKHR> formats{
+ physical_device.getSurfaceFormatsKHR(surface, dld)};
+
+ const std::vector<vk::PresentModeKHR> present_modes{
+ physical_device.getSurfacePresentModesKHR(surface, dld)};
+
+ const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
+ const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
+ extent = ChooseSwapExtent(capabilities, width, height);
+
+ current_width = extent.width;
+ current_height = extent.height;
+
+ u32 requested_image_count{capabilities.minImageCount + 1};
+ if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
+ requested_image_count = capabilities.maxImageCount;
+ }
+
+ vk::SwapchainCreateInfoKHR swapchain_ci(
+ {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
+ extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
+ capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
+ {});
+
+ const u32 graphics_family{device.GetGraphicsFamily()};
+ const u32 present_family{device.GetPresentFamily()};
+ const std::array<u32, 2> queue_indices{graphics_family, present_family};
+ if (graphics_family != present_family) {
+ swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
+ swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
+ swapchain_ci.pQueueFamilyIndices = queue_indices.data();
+ } else {
+ swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
+ }
+
+ swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
+
+ images = dev.getSwapchainImagesKHR(*swapchain, dld);
+ image_count = static_cast<u32>(images.size());
+ image_format = surface_format.format;
+}
+
+void VKSwapchain::CreateSemaphores() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+
+ present_semaphores.resize(image_count);
+ for (std::size_t i = 0; i < image_count; i++) {
+ present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+ }
+}
+
+void VKSwapchain::CreateImageViews() {
+ const auto dev{device.GetLogical()};
+ const auto& dld{device.GetDispatchLoader()};
+
+ image_views.resize(image_count);
+ for (std::size_t i = 0; i < image_count; i++) {
+ const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
+ image_format, {},
+ {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
+ image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+ }
+}
+
+void VKSwapchain::Destroy() {
+ frame_index = 0;
+ present_semaphores.clear();
+ framebuffers.clear();
+ image_views.clear();
+ swapchain.reset();
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Layout {
+struct FramebufferLayout;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+
+class VKSwapchain {
+public:
+ explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
+ ~VKSwapchain();
+
+ /// Creates (or recreates) the swapchain with a given size.
+ void Create(u32 width, u32 height);
+
+ /// Acquires the next image in the swapchain, waits as needed.
+ void AcquireNextImage();
+
+ /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
+ /// recreated. Takes responsability for the ownership of fence.
+ bool Present(vk::Semaphore render_semaphore, VKFence& fence);
+
+ /// Returns true when the framebuffer layout has changed.
+ bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+
+ const vk::Extent2D& GetSize() const {
+ return extent;
+ }
+
+ u32 GetImageCount() const {
+ return image_count;
+ }
+
+ u32 GetImageIndex() const {
+ return image_index;
+ }
+
+ vk::Image GetImageIndex(u32 index) const {
+ return images[index];
+ }
+
+ vk::ImageView GetImageViewIndex(u32 index) const {
+ return *image_views[index];
+ }
+
+ vk::Format GetImageFormat() const {
+ return image_format;
+ }
+
+private:
+ void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
+ void CreateSemaphores();
+ void CreateImageViews();
+
+ void Destroy();
+
+ const vk::SurfaceKHR surface;
+ const VKDevice& device;
+
+ UniqueSwapchainKHR swapchain;
+
+ u32 image_count{};
+ std::vector<vk::Image> images;
+ std::vector<UniqueImageView> image_views;
+ std::vector<UniqueFramebuffer> framebuffers;
+ std::vector<VKFence*> fences;
+ std::vector<UniqueSemaphore> present_semaphores;
+
+ u32 image_index{};
+ u32 frame_index{};
+
+ vk::Format image_format{};
+ vk::Extent2D extent{};
+
+ u32 current_width{};
+ u32 current_height{};
+};
+
+} // namespace Vulkan