diff options
Diffstat (limited to '')
32 files changed, 724 insertions, 267 deletions
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index c55d900e2..6fbee7efa 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) { connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; } +void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) { + gyroscope_zero_drift_mode = drift_mode; +} + +Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const { + return gyroscope_zero_drift_mode; +} + void Controller_NPad::StartLRAssignmentMode() { // Nothing internally is used for lr assignment mode. Since we have the ability to set the // controller types from boot, it doesn't really matter about showing a selection screen diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 931f03430..5d4c58a43 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -58,6 +58,12 @@ public: }; static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size"); + enum class GyroscopeZeroDriftMode : u32 { + Loose = 0, + Standard = 1, + Tight = 2, + }; + enum class NpadHoldType : u64 { Vertical = 0, Horizontal = 1, @@ -117,6 +123,8 @@ public: void ConnectNPad(u32 npad_id); void DisconnectNPad(u32 npad_id); + void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode); + GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const; LedPattern GetLedPattern(u32 npad_id); void SetVibrationEnabled(bool can_vibrate); bool IsVibrationEnabled() const; @@ -324,8 +332,8 @@ private: std::array<Kernel::EventPair, 10> styleset_changed_events; Vibration last_processed_vibration{}; std::array<ControllerHolder, 10> connected_controllers{}; + GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard}; bool can_controllers_vibrate{true}; - std::array<ControllerPad, 10> npad_pad_states{}; bool is_in_lr_assignment_mode{false}; Core::System& system; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 72a050de2..57d5edea7 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -185,8 +185,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {77, nullptr, "GetAccelerometerPlayMode"}, {78, nullptr, "ResetAccelerometerPlayMode"}, {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"}, - {80, nullptr, "GetGyroscopeZeroDriftMode"}, - {81, nullptr, "ResetGyroscopeZeroDriftMode"}, + {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"}, + {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"}, {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"}, {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"}, {91, &Hid::ActivateGesture, "ActivateGesture"}, @@ -230,15 +230,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {211, nullptr, "IsVibrationDeviceMounted"}, {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, - {302, nullptr, "StopConsoleSixAxisSensor"}, - {303, nullptr, "ActivateSevenSixAxisSensor"}, - {304, nullptr, "StartSevenSixAxisSensor"}, + {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"}, + {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"}, + {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"}, {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"}, {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"}, - {307, nullptr, "FinalizeSevenSixAxisSensor"}, + {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"}, {308, nullptr, "SetSevenSixAxisSensorFusionStrength"}, {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, - {310, nullptr, "ResetSevenSixAxisSensorTimestamp"}, + {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"}, {400, nullptr, "IsUsbFullKeyControllerEnabled"}, {401, nullptr, "EnableUsbFullKeyController"}, {402, nullptr, "IsUsbFullKeyControllerConnected"}, @@ -374,6 +374,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto flags{rp.Pop<u32>()}; + LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto unknown{rp.Pop<u32>()}; @@ -413,15 +422,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto handle{rp.Pop<u32>()}; const auto drift_mode{rp.Pop<u32>()}; const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, - "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, - drift_mode, applet_resource_user_id); + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode}); + + LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, + drift_mode, applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>( + static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad) + .GetGyroscopeZeroDriftMode())); +} + +void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard); + + LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -832,33 +885,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } -void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { +void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto handle{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle); + LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } -void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { +void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto applet_resource_user_id{rp.Pop<u64>()}; - const auto unknown{rp.Pop<u32>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}", - applet_resource_user_id, unknown); + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } -void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { +void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto unknown{rp.Pop<u32>()}; + const auto applet_resource_user_id{rp.Pop<u64>()}; - LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown); + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -882,10 +937,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } -void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { +void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - const auto flags{rp.Pop<u32>()}; - LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", + applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto applet_resource_user_id{rp.Pop<u64>()}; + const auto is_palma_all_connectable{rp.Pop<bool>()}; + + LOG_WARNING(Service_HID, + "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}", + applet_resource_user_id, is_palma_all_connectable); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto palma_boost_mode{rp.Pop<bool>()}; + + LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index d481a75f8..6fb048360 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -91,10 +91,14 @@ private: void ActivateTouchScreen(Kernel::HLERequestContext& ctx); void ActivateMouse(Kernel::HLERequestContext& ctx); void ActivateKeyboard(Kernel::HLERequestContext& ctx); + void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); void ActivateGesture(Kernel::HLERequestContext& ctx); void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx); void StartSixAxisSensor(Kernel::HLERequestContext& ctx); + void StopSixAxisSensor(Kernel::HLERequestContext& ctx); void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); + void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); + void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx); void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); @@ -126,12 +130,15 @@ private: void IsVibrationPermitted(Kernel::HLERequestContext& ctx); void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); - void StopSixAxisSensor(Kernel::HLERequestContext& ctx); - void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); - void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); + void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); + void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx); + void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx); void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx); void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); - void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); + void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); + void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx); + void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); + void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); std::shared_ptr<IAppletResource> applet_resource; Core::System& system; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 099bb446e..2dc752aa9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(video_core STATIC engines/shader_type.h macro/macro.cpp macro/macro.h + macro/macro_hle.cpp + macro/macro_hle.h macro/macro_interpreter.cpp macro/macro_interpreter.h macro/macro_jit_x64.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bae1d527c..cf8bdd021 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -41,7 +41,11 @@ class BufferCache { static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; public: - using BufferInfo = std::pair<BufferType, u64>; + struct BufferInfo { + BufferType handle; + u64 offset; + u64 address; + }; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { @@ -50,7 +54,7 @@ public: auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } const VAddr cpu_addr = *cpu_addr_opt; @@ -88,7 +92,7 @@ public: Buffer* const block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { - return {GetEmptyBuffer(size), 0}; + return GetEmptyBuffer(size); } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); @@ -101,7 +105,7 @@ public: } } - return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; + return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -254,27 +258,17 @@ public: committed_flushes.pop_front(); } - virtual BufferType GetEmptyBuffer(std::size_t size) = 0; + virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr<StreamBuffer> stream_buffer_) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, - stream_buffer_handle{stream_buffer->Handle()} {} + std::unique_ptr<StreamBuffer> stream_buffer) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} ~BufferCache() = default; virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) = 0; - - virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) = 0; - - virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) = 0; - virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { return {}; } @@ -336,11 +330,11 @@ private: const VAddr cpu_addr_end = cpu_addr + size; if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); - UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); + block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); + block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } @@ -399,7 +393,7 @@ private: } staging_buffer.resize(size); system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); + block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -436,7 +430,7 @@ private: const std::size_t size = map->end - map->start; staging_buffer.resize(size); - DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); + block->Download(block->Offset(map->start), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -449,7 +443,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {stream_buffer_handle, uploaded_offset}; + return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; } void AlignBuffer(std::size_t alignment) { @@ -464,7 +458,7 @@ private: const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; const VAddr cpu_addr = buffer->CpuAddr(); std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); - CopyBlock(*buffer, *new_buffer, 0, 0, old_size); + new_buffer->CopyFrom(*buffer, 0, 0, old_size); QueueDestruction(std::move(buffer)); const VAddr cpu_addr_end = cpu_addr + new_size - 1; @@ -486,8 +480,8 @@ private: const std::size_t new_size = size_1 + size_2; std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); - CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); - CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); + new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); + new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); QueueDestruction(std::move(first)); QueueDestruction(std::move(second)); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ea3c8a963..c01436295 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. - macro_engine->Execute(macro_positions[entry], parameters); + macro_engine->Execute(*this, macro_positions[entry], parameters); if (mme_draw.current_mode != MMEDrawMode::Undefined) { FlushMMEInlineDraw(); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d5fe25065..ef1618990 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1418,6 +1418,14 @@ public: return execute_on; } + VideoCore::RasterizerInterface& GetRasterizer() { + return rasterizer; + } + + const VideoCore::RasterizerInterface& GetRasterizer() const { + return rasterizer; + } + /// Notify a memory write has happened. void OnMemoryWrite() { dirty.flags |= dirty.on_write_stores; diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..ef7dad349 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,23 +2,37 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <boost/container_hash/hash.hpp> #include "common/assert.h" #include "common/logging/log.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" #include "video_core/macro/macro_jit_x64.h" namespace Tegra { +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) + : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} + +MacroEngine::~MacroEngine() = default; + void MacroEngine::AddCode(u32 method, u32 data) { uploaded_macro_code[method].push_back(data); } -void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, + const std::vector<u32>& parameters) { auto compiled_macro = macro_cache.find(method); if (compiled_macro != macro_cache.end()) { - compiled_macro->second->Execute(parameters, method); + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } else { // Macro not compiled, check if it's uploaded and if so, compile it auto macro_code = uploaded_macro_code.find(method); @@ -26,8 +40,21 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); return; } - macro_cache[method] = Compile(macro_code->second); - macro_cache[method]->Execute(parameters, method); + auto& cache_info = macro_cache[method]; + cache_info.hash = boost::hash_value(macro_code->second); + cache_info.lle_program = Compile(macro_code->second); + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (hle_program.has_value()) { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program.value()); + } + + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + cache_info.lle_program->Execute(parameters, method); + } } } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..4d00b84b0 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -11,9 +11,11 @@ #include "common/common_types.h" namespace Tegra { + namespace Engines { class Maxwell3D; } + namespace Macro { constexpr std::size_t NUM_MACRO_REGISTERS = 8; enum class Operation : u32 { @@ -94,6 +96,8 @@ union MethodAddress { } // namespace Macro +class HLEMacro; + class CachedMacro { public: virtual ~CachedMacro() = default; @@ -107,20 +111,29 @@ public: class MacroEngine { public: - virtual ~MacroEngine() = default; + explicit MacroEngine(Engines::Maxwell3D& maxwell3d); + virtual ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, const std::vector<u32>& parameters); + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); protected: virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; private: - std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; + struct CacheInfo { + std::unique_ptr<CachedMacro> lle_program{}; + std::unique_ptr<CachedMacro> hle_program{}; + u64 hash{}; + bool has_hle_program{}; + }; + + std::unordered_map<u32, CacheInfo> macro_cache; std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; + std::unique_ptr<HLEMacro> hle_macros; }; std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +namespace { +// HLE'd functions +static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & + ~(0x3ffffff << 26))); + maxwell3d.regs.vb_base_instance = parameters[5]; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.regs.vb_element_base = parameters[3]; + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.index_array.first = parameters[4]; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.index_array.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + maxwell3d.regs.vertex_buffer.first = parameters[3]; + maxwell3d.regs.vertex_buffer.count = parameters[1]; + maxwell3d.regs.vb_base_instance = parameters[4]; + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + maxwell3d.mme_draw.instance_count = count; + + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(false, true); + } + maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, + const std::vector<u32>& parameters) { + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_array.first = parameters[3]; + maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? + maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.regs.vb_element_base = element_base; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, element_base); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.regs.draw.topology.Assign( + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); + if (maxwell3d.ShouldExecute()) { + maxwell3d.GetRasterizer().Draw(true, true); + } + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} +} // namespace + +constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ + std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), + std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), + std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), +}}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { + const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), + [hash](const auto& pair) { return pair.first == hash; }); + if (it == hle_funcs.end()) { + return std::nullopt; + } + return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) + : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { + func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <vector> +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + +class HLEMacro { +public: + explicit HLEMacro(Engines::Maxwell3D& maxwell3d); + ~HLEMacro(); + + std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; + +private: + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); + ~HLEMacroImpl(); + + void Execute(const std::vector<u32>& parameters, u32 method) override; + +private: + Engines::Maxwell3D& maxwell3d; + HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..aa5256419 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,7 +11,8 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 30abb66e5..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) + : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { return std::make_unique<MacroJITx64Impl>(maxwell3d, code); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..d9f7b4cc6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { + glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), + data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,39 +73,20 @@ OGLBufferCache::~OGLBufferCache() { } std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(cpu_addr, size); + return std::make_shared<Buffer>(device, cpu_addr, size); } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; -} - -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), - static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..59d95adbc 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,27 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(VAddr cpu_addr, const std::size_t size); + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - GLuint Handle() const { + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + u64 gpu_address = 0; }; using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; @@ -43,7 +55,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -52,22 +64,16 @@ public: protected: std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> - cbufs; + std::array<GLuint, NUM_CBUFS> cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b31d604e4..b6b6659c1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -178,7 +178,7 @@ bool IsASTCSupported() { for (const GLenum format : formats) { for (const GLenum support : required_support) { GLint value; - glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); + glGetInternalformativ(target, format, support, 1, &value); if (value != GL_FULL_SUPPORT) { return false; } @@ -188,16 +188,32 @@ bool IsASTCSupported() { return true; } +/// @brief Returns true when a GL_RENDERER is a Turing GPU +/// @param renderer GL_RENDERER string +bool IsTuring(std::string_view renderer) { + static constexpr std::array<std::string_view, 12> TURING_GPUS = { + "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", + "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", + "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", + }; + return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), + [renderer](std::string_view candidate) { + return renderer.find(candidate) != std::string_view::npos; + }); +} + } // Anonymous namespace Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; + const bool is_turing = is_nvidia && IsTuring(renderer); bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -216,12 +232,21 @@ Device::Device() has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + + // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive + // uniform buffers as "push constants" + has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; + + // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on + // DeleteBuffers. Disable unified memory on these devices. + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; + use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; @@ -245,6 +270,7 @@ Device::Device(std::nullptr_t) { has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; + has_texture_shadow_lod = true; has_variable_aoffi = true; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 145347943..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -68,6 +68,14 @@ public: return has_image_load_formatted; } + bool HasTextureShadowLod() const { + return has_texture_shadow_lod; + } + + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -110,6 +118,8 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..362457ffe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NumSupportedVertexAttributes = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { // avoid OpenGL errors. // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't // assume every shader uses them all. - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexFormat0 + index]) { continue; } @@ -231,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { MICROPROFILE_SCOPE(OpenGL_VB); + const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); + // Upload all guest vertex arrays sequentially to our buffer const auto& regs = gpu.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; } @@ -246,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end >= start); + + const GLuint gl_index = static_cast<GLuint>(index); const u64 size = end - start; if (size == 0) { - glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + if (use_unified_memory) { + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); + } continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, - vertex_array.stride); + const auto info = buffer_cache.UploadMemory(start, size); + if (use_unified_memory) { + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, + info.address + info.offset, size); + } else { + glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); + } } } @@ -268,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() { flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; } @@ -285,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast<GLsizeiptr>(sizeof(ubo))); } @@ -956,8 +968,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -970,24 +981,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const GPUVAddr gpu_addr = buffer.address; - auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); if (device.UseAssemblyShaders()) { UNIMPLEMENTED_IF(use_unified); - if (offset != 0) { + if (info.offset != 0) { const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } if (use_unified) { - glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } } @@ -1023,9 +1035,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast<GLsizeiptr>(size)); } @@ -1712,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d6e30b321..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; +using Tegra::Shader::TextureType; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -526,6 +527,9 @@ private: if (device.HasImageLoadFormatted()) { code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); } + if (device.HasTextureShadowLod()) { + code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); + } if (device.HasWarpIntrinsics()) { code.AddLine("#extension GL_NV_gpu_shader5 : require"); code.AddLine("#extension GL_NV_shader_thread_group : require"); @@ -909,13 +913,13 @@ private: return "samplerBuffer"; } switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: + case TextureType::Texture1D: return "sampler1D"; - case Tegra::Shader::TextureType::Texture2D: + case TextureType::Texture2D: return "sampler2D"; - case Tegra::Shader::TextureType::Texture3D: + case TextureType::Texture3D: return "sampler3D"; - case Tegra::Shader::TextureType::TextureCube: + case TextureType::TextureCube: return "samplerCube"; default: UNREACHABLE(); @@ -1380,8 +1384,19 @@ private: const std::size_t count = operation.GetOperandsCount(); const bool has_array = meta->sampler.is_array; const bool has_shadow = meta->sampler.is_shadow; + const bool workaround_lod_array_shadow_as_grad = + !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube); + + std::string expr = "texture"; + + if (workaround_lod_array_shadow_as_grad) { + expr += "Grad"; + } else { + expr += function_suffix; + } - std::string expr = "texture" + function_suffix; if (!meta->aoffi.empty()) { expr += "Offset"; } else if (!meta->ptp.empty()) { @@ -1415,6 +1430,16 @@ private: expr += ')'; } + if (workaround_lod_array_shadow_as_grad) { + switch (meta->sampler.type) { + case TextureType::Texture2D: + return expr + ", vec2(0.0), vec2(0.0))"; + case TextureType::TextureCube: + return expr + ", vec3(0.0), vec3(0.0))"; + } + UNREACHABLE(); + } + for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); @@ -2041,8 +2066,19 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture( - operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + std::string expr{}; + + if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube)) { + LOG_ERROR(Render_OpenGL, + "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); + expr = GenerateTexture(operation, "Lod", {}); + } else { + expr = GenerateTexture(operation, "Lod", + {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + } + if (meta->sampler.is_shadow) { expr = "vec4(" + expr + ')'; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <deque> +#include <tuple> #include <vector> + #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, - bool use_persistent) +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { gl_buffer.Create(); @@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - if (use_persistent) { - persistent = true; - coherent = prefer_coherent; - const GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); - mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( - gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); - } else { - glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); + static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>( + glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } } OGLStreamBuffer::~OGLStreamBuffer() { - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } + glUnmapNamedBuffer(gl_buffer.handle); gl_buffer.Release(); } @@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a bool invalidate = false; if (buffer_pos + size > buffer_size) { + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); + glInvalidateBufferData(gl_buffer.handle); + buffer_pos = 0; invalidate = true; - - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } } - if (invalidate || !persistent) { - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | - (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); - mapped_ptr = static_cast<u8*>( - glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); - mapped_offset = buffer_pos; - } - - return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); + return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent && size > 0) { - glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); - } - - if (!persistent) { - glUnmapNamedBuffer(gl_buffer.handle); + if (size > 0) { + glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); } buffer_pos += size; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,10 +11,11 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, - bool use_persistent = true); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -33,19 +34,20 @@ public: return gl_buffer.handle; } - GLsizeiptr Size() const { + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { return buffer_size; } private: OGLBuffer gl_buffer; - bool coherent = false; - bool persistent = false; - + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; - GLintptr mapped_offset = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } void RendererOpenGL::AddTelemetryFields() { @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } glBindTextureUnit(0, screen_info.display_texture); glBindSampler(0, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -107,6 +107,9 @@ private: OGLPipeline pipeline; OGLFramebuffer screenshot_framebuffer; + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; + /// Display information for Switch screen ScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..f10f96cd8 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { VkBufferCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; ci.pNext = nullptr; @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp Buffer::~Buffer() = default; -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} - -VKBufferCache::~VKBufferCache() = default; - -std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); -} - -VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { - size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.GetUnusedBuffer(size, false); - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, size, 0); - }); - return *empty.handle; -} - -void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st }); } -void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, - size](vk::CommandBuffer cmdbuf) { + + const VkBuffer handle = Handle(); + scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; + barrier.buffer = handle; barrier.offset = offset; barrier.size = size; @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, + + const VkBuffer dst_buffer = Handle(); + scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t }); } +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, + size); +} + +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); + }); + return {*empty.handle, 0, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..3630aca77 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -25,15 +25,29 @@ class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: - explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, - std::size_t size); + explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, + VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); ~Buffer(); + void Upload(std::size_t offset, std::size_t size, const u8* data) const; + + void Download(std::size_t offset, std::size_t size, u8* data) const; + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) const; + VkBuffer Handle() const { return *buffer.handle; } + u64 Address() const { + return 0; + } + private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; + VKBuffer buffer; }; @@ -44,20 +58,11 @@ public: VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - VkBuffer GetEmptyBuffer(std::size_t size) override; + BufferInfo GetEmptyBuffer(std::size_t size) override; protected: std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - private: const VKDevice& device; VKMemoryManager& memory_manager; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 29001953c..e3714ee6d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() { UNIMPLEMENTED_IF(binding.buffer_offset != 0); const GPUVAddr gpu_addr = binding.Address(); - const auto size = static_cast<VkDeviceSize>(binding.buffer_size); - const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); @@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); continue; } - const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(buffer, offset); + const auto info = buffer_cache.UploadMemory(start, size); + buffer_bindings.AddVertexBinding(info.handle, info.offset); } } @@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; std::tie(buffer, offset) = quad_indexed_pass.Assemble( regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); @@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar break; } const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + VkBuffer buffer = info.handle; + u64 offset = info.offset; auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto [buffer_handle, offset] = + const auto info = buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); - - update_descriptor_queue.AddBuffer(buffer_handle, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { @@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. static constexpr std::size_t dummy_size = 4; - const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); - update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); + const auto info = buffer_cache.GetEmptyBuffer(dummy_size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; } - const auto [buffer, offset] = buffer_cache.UploadMemory( + const auto info = buffer_cache.UploadMemory( actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); - update_descriptor_queue.AddBuffer(buffer, offset, size); + update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,14 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - VkBuffer Handle() const { + VkBuffer Handle() const noexcept { return *buffer; } + u64 Address() const noexcept { + return 0; + } + private: struct Watch final { VKFenceWatch fence; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 60b6ad72a..07778dc3e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: if (component == 0) { return descriptor.b_type; } @@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, } break; } - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; } @@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; + case TextureFormat::BF10GF11RF11: + if (component == 1 || component == 2) { + return 11; + } + if (component == 0) { + return 10; + } + return 0; case TextureFormat::G8R24: if (component == 0) { return 8; @@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return 0; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return 0; } std::size_t GetImageComponentMask(TextureFormat format) { @@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: return std::size_t{R | G | B}; case TextureFormat::R32_G32: case TextureFormat::R16_G16: @@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R8: case TextureFormat::R1: return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return std::size_t{R | G | B | A}; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return std::size_t{R | G | B | A}; } std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, return {std::move(original_value), true}; } default: - UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); return {std::move(original_value), true}; } } @@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { default: break; } - UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", + UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", static_cast<u64>(instr.suatom_d.operation.Value()), static_cast<u64>(instr.suatom_d.operation_type.Value())); return OperationCode::AtomicImageAdd; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 059b96e70..4dedb2549 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include <QShortcut> #include <QStatusBar> #include <QSysInfo> +#include <QUrl> #include <QtConcurrent/QtConcurrent> #include <fmt/format.h> @@ -836,6 +837,7 @@ void GMainWindow::ConnectMenuEvents() { connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame); connect(ui.action_Report_Compatibility, &QAction::triggered, this, &GMainWindow::OnMenuReportCompatibility); + connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage); connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); }); connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); @@ -1807,6 +1809,16 @@ void GMainWindow::OnMenuReportCompatibility() { } } +void GMainWindow::OnOpenModsPage() { + const auto mods_page_url = QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods"); + const QUrl mods_page(mods_page_url); + const bool open = QDesktopServices::openUrl(mods_page); + if (!open) { + QMessageBox::warning(this, tr("Error opening URL"), + tr("Unable to open the URL \"%1\".").arg(mods_page_url)); + } +} + void GMainWindow::ToggleFullscreen() { if (!emulation_running) { return; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 4f4c8ddbe..d55e55cc6 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -181,6 +181,7 @@ private slots: void OnPauseGame(); void OnStopGame(); void OnMenuReportCompatibility(); + void OnOpenModsPage(); /// Called whenever a user selects a game in the game list widget. void OnGameListLoadFile(QString game_path); void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 97c90f50b..b5745dfd5 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -113,6 +113,7 @@ <string>&Help</string> </property> <addaction name="action_Report_Compatibility"/> + <addaction name="action_Open_Mods_Page"/> <addaction name="separator"/> <addaction name="action_About"/> </widget> @@ -256,6 +257,11 @@ <bool>false</bool> </property> </action> + <action name="action_Open_Mods_Page"> + <property name="text"> + <string>Open Mods Page</string> + </property> + </action> <action name="action_Open_yuzu_Folder"> <property name="text"> <string>Open yuzu Folder</string> |