diff options
Diffstat (limited to 'src')
128 files changed, 3058 insertions, 1970 deletions
diff --git a/src/audio_core/renderer/command/resample/upsample.cpp b/src/audio_core/renderer/command/resample/upsample.cpp index 6c3ff31f7..5f7db12ca 100644 --- a/src/audio_core/renderer/command/resample/upsample.cpp +++ b/src/audio_core/renderer/command/resample/upsample.cpp @@ -20,25 +20,25 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, const u32 target_sample_count, const u32 source_sample_count, UpsamplerState* state) { constexpr u32 WindowSize = 10; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow1{ - 51.93359375f, -18.80078125f, 9.73046875f, -5.33203125f, 2.84375f, - -1.41015625f, 0.62109375f, -0.2265625f, 0.0625f, -0.00390625f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc1{ + 0.95376587f, -0.12872314f, 0.060028076f, -0.032470703f, 0.017669678f, + -0.009124756f, 0.004272461f, -0.001739502f, 0.000579834f, -0.000091552734f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow2{ - 105.35546875f, -24.52734375f, 11.9609375f, -6.515625f, 3.52734375f, - -1.796875f, 0.828125f, -0.32421875f, 0.1015625f, -0.015625f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc2{ + 0.8230896f, -0.19161987f, 0.093444824f, -0.05090332f, 0.027557373f, + -0.014038086f, 0.0064697266f, -0.002532959f, 0.00079345703f, -0.00012207031f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow3{ - 122.08203125f, -16.47656250f, 7.68359375f, -4.15625000f, 2.26171875f, - -1.16796875f, 0.54687500f, -0.22265625f, 0.07421875f, -0.01171875f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc3{ + 0.6298828f, -0.19274902f, 0.09725952f, -0.05319214f, 0.028625488f, + -0.014373779f, 0.006500244f, -0.0024719238f, 0.0007324219f, -0.000091552734f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow4{ - 23.73437500f, -9.62109375f, 5.07812500f, -2.78125000f, 1.46875000f, - -0.71484375f, 0.30859375f, -0.10546875f, 0.02734375f, 0.00000000f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc4{ + 0.4057312f, -0.1468811f, 0.07601929f, -0.041656494f, 0.022216797f, + -0.011016846f, 0.004852295f, -0.0017700195f, 0.00048828125f, -0.000030517578f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow5{ - 80.62500000f, -24.67187500f, 12.44921875f, -6.80859375f, 3.66406250f, - -1.83984375f, 0.83203125f, -0.31640625f, 0.09375000f, -0.01171875f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc5{ + 0.1854248f, -0.075164795f, 0.03967285f, -0.021728516f, 0.011474609f, + -0.005584717f, 0.0024108887f, -0.0008239746f, 0.00021362305f, 0.0f, }; if (!state->initialized) { @@ -91,52 +91,31 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); }; - auto calculate_sample = [&state](std::span<const Common::FixedPoint<24, 8>> coeffs1, - std::span<const Common::FixedPoint<24, 8>> coeffs2) -> s32 { + auto calculate_sample = [&state](std::span<const Common::FixedPoint<17, 15>> coeffs1, + std::span<const Common::FixedPoint<17, 15>> coeffs2) -> s32 { auto output_index{state->history_output_index}; - auto start_pos{output_index - state->history_start_index + 1U}; - auto end_pos{10U}; + u64 result{0}; - if (start_pos < 10) { - end_pos = start_pos; - } - - u64 prev_contrib{0}; - u32 coeff_index{0}; - for (; coeff_index < end_pos; coeff_index++, output_index--) { - prev_contrib += static_cast<u64>(state->history[output_index].to_raw()) * - coeffs1[coeff_index].to_raw(); - } + for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) { + result += static_cast<u64>(state->history[output_index].to_raw()) * + coeffs1[coeff_index].to_raw(); - auto end_index{state->history_end_index}; - for (; start_pos < 9; start_pos++, coeff_index++, end_index--) { - prev_contrib += static_cast<u64>(state->history[end_index].to_raw()) * - coeffs1[coeff_index].to_raw(); + output_index = output_index == state->history_start_index ? state->history_end_index + : output_index - 1; } output_index = static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); - start_pos = state->history_end_index - output_index + 1U; - end_pos = 10U; - if (start_pos < 10) { - end_pos = start_pos; - } - - u64 next_contrib{0}; - coeff_index = 0; - for (; coeff_index < end_pos; coeff_index++, output_index++) { - next_contrib += static_cast<u64>(state->history[output_index].to_raw()) * - coeffs2[coeff_index].to_raw(); - } + for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) { + result += static_cast<u64>(state->history[output_index].to_raw()) * + coeffs2[coeff_index].to_raw(); - auto start_index{state->history_start_index}; - for (; start_pos < 9; start_pos++, start_index++, coeff_index++) { - next_contrib += static_cast<u64>(state->history[start_index].to_raw()) * - coeffs2[coeff_index].to_raw(); + output_index = output_index == state->history_end_index ? state->history_start_index + : output_index + 1; } - return static_cast<s32>(((prev_contrib >> 15) + (next_contrib >> 15)) >> 8); + return static_cast<s32>(result >> (8 + 15)); }; switch (state->ratio.to_int_floor()) { @@ -150,23 +129,23 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow3, SincWindow4); + output[write_index] = calculate_sample(WindowedSinc1, WindowedSinc5); break; case 2: - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; case 3: - output[write_index] = calculate_sample(SincWindow5, SincWindow5); + output[write_index] = calculate_sample(WindowedSinc3, WindowedSinc3); break; case 4: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; case 5: - output[write_index] = calculate_sample(SincWindow4, SincWindow3); + output[write_index] = calculate_sample(WindowedSinc5, WindowedSinc1); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 6); @@ -183,11 +162,11 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; case 2: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); @@ -204,12 +183,12 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; case 2: increment(); - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 45332cf95..bd6ac6716 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -38,6 +38,8 @@ add_library(common STATIC common_precompiled_headers.h common_types.h concepts.h + demangle.cpp + demangle.h div_ceil.h dynamic_library.cpp dynamic_library.h @@ -175,7 +177,7 @@ endif() create_target_directory_groups(common) target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads) -target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) +target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd demangle) if (YUZU_USE_PRECOMPILED_HEADERS) target_precompile_headers(common PRIVATE precompiled_headers.h) diff --git a/src/common/demangle.cpp b/src/common/demangle.cpp new file mode 100644 index 000000000..f4246f666 --- /dev/null +++ b/src/common/demangle.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/demangle.h" +#include "common/scope_exit.h" + +namespace llvm { +char* itaniumDemangle(const char* mangled_name, char* buf, size_t* n, int* status); +} + +namespace Common { + +std::string DemangleSymbol(const std::string& mangled) { + auto is_itanium = [](const std::string& name) -> bool { + // A valid Itanium encoding requires 1-4 leading underscores, followed by 'Z'. + auto pos = name.find_first_not_of('_'); + return pos > 0 && pos <= 4 && pos < name.size() && name[pos] == 'Z'; + }; + + if (mangled.empty()) { + return mangled; + } + + char* demangled = nullptr; + SCOPE_EXIT({ std::free(demangled); }); + + if (is_itanium(mangled)) { + demangled = llvm::itaniumDemangle(mangled.c_str(), nullptr, nullptr, nullptr); + } + + if (!demangled) { + return mangled; + } + return demangled; +} + +} // namespace Common diff --git a/src/common/demangle.h b/src/common/demangle.h new file mode 100644 index 000000000..f072d22f3 --- /dev/null +++ b/src/common/demangle.h @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string> + +namespace Common { + +std::string DemangleSymbol(const std::string& mangled); + +} // namespace Common diff --git a/src/common/input.h b/src/common/input.h index fc14fd7bf..d27b1d772 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -292,9 +292,6 @@ class InputDevice { public: virtual ~InputDevice() = default; - // Request input device to update if necessary - virtual void SoftUpdate() {} - // Force input device to update data regardless of the current state virtual void ForceUpdate() {} diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 149e621f9..b1a2aa8b2 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -129,6 +129,10 @@ void UpdateRescalingInfo() { info.up_scale = 1; info.down_shift = 0; break; + case ResolutionSetup::Res3_2X: + info.up_scale = 3; + info.down_shift = 1; + break; case ResolutionSetup::Res2X: info.up_scale = 2; info.down_shift = 0; @@ -149,6 +153,14 @@ void UpdateRescalingInfo() { info.up_scale = 6; info.down_shift = 0; break; + case ResolutionSetup::Res7X: + info.up_scale = 7; + info.down_shift = 0; + break; + case ResolutionSetup::Res8X: + info.up_scale = 8; + info.down_shift = 0; + break; default: ASSERT(false); info.up_scale = 1; @@ -185,6 +197,7 @@ void RestoreGlobalState(bool is_powered_on) { // Renderer values.fsr_sharpening_slider.SetGlobal(true); values.renderer_backend.SetGlobal(true); + values.renderer_force_max_clock.SetGlobal(true); values.vulkan_device.SetGlobal(true); values.aspect_ratio.SetGlobal(true); values.max_anisotropy.SetGlobal(true); @@ -200,6 +213,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_pessimistic_flushes.SetGlobal(true); + values.use_vulkan_driver_pipeline_cache.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 5017951c5..80b2eeabc 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -56,11 +56,14 @@ enum class ResolutionSetup : u32 { Res1_2X = 0, Res3_4X = 1, Res1X = 2, - Res2X = 3, - Res3X = 4, - Res4X = 5, - Res5X = 6, - Res6X = 7, + Res3_2X = 3, + Res2X = 4, + Res3X = 5, + Res4X = 6, + Res5X = 7, + Res6X = 8, + Res7X = 9, + Res8X = 10, }; enum class ScalingFilter : u32 { @@ -415,6 +418,7 @@ struct Values { // Renderer SwitchableSetting<RendererBackend, true> renderer_backend{ RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; + SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; Setting<bool> renderer_debug{false, "debug"}; Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; @@ -451,6 +455,8 @@ struct Values { SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; + SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, + "use_vulkan_driver_pipeline_cache"}; SwitchableSetting<u8> bg_red{0, "bg_red"}; SwitchableSetting<u8> bg_green{0, "bg_green"}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 0252c8c31..5afdeb5ff 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -226,6 +226,7 @@ add_library(core STATIC hle/kernel/k_page_buffer.h hle/kernel/k_page_heap.cpp hle/kernel/k_page_heap.h + hle/kernel/k_page_group.cpp hle/kernel/k_page_group.h hle/kernel/k_page_table.cpp hle/kernel/k_page_table.h diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 2df7b0ee8..8aa7b9641 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -1,14 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#ifndef _MSC_VER -#include <cxxabi.h> -#endif - #include <map> #include <optional> + #include "common/bit_field.h" #include "common/common_types.h" +#include "common/demangle.h" #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/arm/symbols.h" @@ -71,20 +69,8 @@ void ARM_Interface::SymbolicateBacktrace(Core::System& system, std::vector<Backt const auto symbol_set = symbols.find(entry.module); if (symbol_set != symbols.end()) { const auto symbol = Symbols::GetSymbolName(symbol_set->second, entry.offset); - if (symbol.has_value()) { -#ifdef _MSC_VER - // TODO(DarkLordZach): Add demangling of symbol names. - entry.name = *symbol; -#else - int status{-1}; - char* demangled{abi::__cxa_demangle(symbol->c_str(), nullptr, nullptr, &status)}; - if (status == 0 && demangled != nullptr) { - entry.name = demangled; - std::free(demangled); - } else { - entry.name = *symbol; - } -#endif + if (symbol) { + entry.name = Common::DemangleSymbol(*symbol); } } } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 947747d36..2a7570073 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.enable_cycle_counting = true; // Code cache size +#ifdef ARCHITECTURE_arm64 + config.code_cache_size = 128_MiB; +#else config.code_cache_size = 512_MiB; +#endif // Allow memory fault handling to work if (system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 3df943df7..7229fdc2a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.enable_cycle_counting = true; // Code cache size +#ifdef ARCHITECTURE_arm64 + config.code_cache_size = 128_MiB; +#else config.code_cache_size = 512_MiB; +#endif // Allow memory fault handling to work if (system.DebuggerEnabled()) { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 0e7b5f943..6bac6722f 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -142,16 +142,24 @@ void CoreTiming::ScheduleLoopingEvent(std::chrono::nanoseconds start_time, } void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, - std::uintptr_t user_data) { - std::scoped_lock scope{basic_lock}; - const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { - return e.type.lock().get() == event_type.get() && e.user_data == user_data; - }); - - // Removing random items breaks the invariant so we have to re-establish it. - if (itr != event_queue.end()) { - event_queue.erase(itr, event_queue.end()); - std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + std::uintptr_t user_data, bool wait) { + { + std::scoped_lock lk{basic_lock}; + const auto itr = + std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { + return e.type.lock().get() == event_type.get() && e.user_data == user_data; + }); + + // Removing random items breaks the invariant so we have to re-establish it. + if (itr != event_queue.end()) { + event_queue.erase(itr, event_queue.end()); + std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + } + } + + // Force any in-progress events to finish + if (wait) { + std::scoped_lock lk{advance_lock}; } } @@ -190,20 +198,6 @@ u64 CoreTiming::GetClockTicks() const { return CpuCyclesToClockCycles(ticks); } -void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { - std::scoped_lock lock{basic_lock}; - - const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { - return e.type.lock().get() == event_type.get(); - }); - - // Removing random items breaks the invariant so we have to re-establish it. - if (itr != event_queue.end()) { - event_queue.erase(itr, event_queue.end()); - std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - } -} - std::optional<s64> CoreTiming::Advance() { std::scoped_lock lock{advance_lock, basic_lock}; global_timer = GetGlobalTimeNs().count(); diff --git a/src/core/core_timing.h b/src/core/core_timing.h index b5925193c..da366637b 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -98,10 +98,13 @@ public: const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data = 0, bool absolute_time = false); - void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data); + void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data, + bool wait = true); - /// We only permit one event of each type in the queue at a time. - void RemoveEvent(const std::shared_ptr<EventType>& event_type); + void UnscheduleEventWithoutWait(const std::shared_ptr<EventType>& event_type, + std::uintptr_t user_data) { + UnscheduleEvent(event_type, user_data, false); + } void AddTicks(u64 ticks_to_add); diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index a64a9ac64..9c02b7b31 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -11,6 +11,7 @@ #include "common/hex_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/debugger/gdbstub.h" @@ -731,7 +732,25 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) { auto* process = system.CurrentProcess(); auto& page_table = process->PageTable(); - if (command_str == "get info") { + const char* commands = "Commands:\n" + " get fastmem\n" + " get info\n" + " get mappings\n"; + + if (command_str == "get fastmem") { + if (Settings::IsFastmemEnabled()) { + const auto& impl = page_table.PageTableImpl(); + const auto region = reinterpret_cast<uintptr_t>(impl.fastmem_arena); + const auto region_bits = impl.current_address_space_width_in_bits; + const auto region_size = 1ULL << region_bits; + + reply = fmt::format("Region bits: {}\n" + "Host address: {:#x} - {:#x}\n", + region_bits, region, region + region_size - 1); + } else { + reply = "Fastmem is not enabled.\n"; + } + } else if (command_str == "get info") { Loader::AppLoader::Modules modules; system.GetAppLoader().ReadNSOModules(modules); @@ -787,9 +806,10 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) { cur_addr = next_address; } } else if (command_str == "help") { - reply = "Commands:\n get info\n get mappings\n"; + reply = commands; } else { - reply = "Unknown command.\nCommands:\n get info\n get mappings\n"; + reply = "Unknown command.\n"; + reply += commands; } std::span<const u8> reply_span{reinterpret_cast<u8*>(&reply.front()), reply.size()}; diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index 71364c323..a959c9db9 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -10,6 +10,7 @@ namespace Core::HID { constexpr s32 HID_JOYSTICK_MAX = 0x7fff; +constexpr s32 HID_JOYSTICK_MIN = 0x7ffe; constexpr s32 HID_TRIGGER_MAX = 0x7fff; // Use a common UUID for TAS and Virtual Gamepad constexpr Common::UUID TAS_UUID = @@ -798,9 +799,16 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback, return; } + const auto FloatToShort = [](float a) { + if (a > 0) { + return static_cast<s32>(a * HID_JOYSTICK_MAX); + } + return static_cast<s32>(a * HID_JOYSTICK_MIN); + }; + const AnalogStickState stick{ - .x = static_cast<s32>(controller.stick_values[index].x.value * HID_JOYSTICK_MAX), - .y = static_cast<s32>(controller.stick_values[index].y.value * HID_JOYSTICK_MAX), + .x = FloatToShort(controller.stick_values[index].x.value), + .y = FloatToShort(controller.stick_values[index].y.value), }; switch (index) { @@ -1434,16 +1442,6 @@ AnalogSticks EmulatedController::GetSticks() const { return {}; } - // Some drivers like stick from buttons need constant refreshing - for (auto& device : stick_devices) { - if (!device) { - continue; - } - lock.unlock(); - device->SoftUpdate(); - lock.lock(); - } - return controller.analog_stick_state; } diff --git a/src/core/hle/kernel/k_code_memory.cpp b/src/core/hle/kernel/k_code_memory.cpp index 4b1c134d4..d9da1e600 100644 --- a/src/core/hle/kernel/k_code_memory.cpp +++ b/src/core/hle/kernel/k_code_memory.cpp @@ -27,13 +27,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si auto& page_table = m_owner->PageTable(); // Construct the page group. - m_page_group = {}; + m_page_group.emplace(kernel, page_table.GetBlockInfoManager()); // Lock the memory. - R_TRY(page_table.LockForCodeMemory(&m_page_group, addr, size)) + R_TRY(page_table.LockForCodeMemory(std::addressof(*m_page_group), addr, size)) // Clear the memory. - for (const auto& block : m_page_group.Nodes()) { + for (const auto& block : *m_page_group) { std::memset(device_memory.GetPointer<void>(block.GetAddress()), 0xFF, block.GetSize()); } @@ -51,12 +51,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si void KCodeMemory::Finalize() { // Unlock. if (!m_is_mapped && !m_is_owner_mapped) { - const size_t size = m_page_group.GetNumPages() * PageSize; - m_owner->PageTable().UnlockForCodeMemory(m_address, size, m_page_group); + const size_t size = m_page_group->GetNumPages() * PageSize; + m_owner->PageTable().UnlockForCodeMemory(m_address, size, *m_page_group); } // Close the page group. - m_page_group = {}; + m_page_group->Close(); + m_page_group->Finalize(); // Close our reference to our owner. m_owner->Close(); @@ -64,7 +65,7 @@ void KCodeMemory::Finalize() { Result KCodeMemory::Map(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); @@ -74,7 +75,7 @@ Result KCodeMemory::Map(VAddr address, size_t size) { // Map the memory. R_TRY(kernel.CurrentProcess()->PageTable().MapPages( - address, m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite)); + address, *m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite)); // Mark ourselves as mapped. m_is_mapped = true; @@ -84,13 +85,13 @@ Result KCodeMemory::Map(VAddr address, size_t size) { Result KCodeMemory::Unmap(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); // Unmap the memory. - R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, m_page_group, + R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, *m_page_group, KMemoryState::CodeOut)); // Mark ourselves as unmapped. @@ -101,7 +102,7 @@ Result KCodeMemory::Unmap(VAddr address, size_t size) { Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); @@ -125,7 +126,7 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission // Map the memory. R_TRY( - m_owner->PageTable().MapPages(address, m_page_group, KMemoryState::GeneratedCode, k_perm)); + m_owner->PageTable().MapPages(address, *m_page_group, KMemoryState::GeneratedCode, k_perm)); // Mark ourselves as mapped. m_is_owner_mapped = true; @@ -135,13 +136,13 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) { // Validate the size. - R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); + R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); // Lock ourselves. KScopedLightLock lk(m_lock); // Unmap the memory. - R_TRY(m_owner->PageTable().UnmapPages(address, m_page_group, KMemoryState::GeneratedCode)); + R_TRY(m_owner->PageTable().UnmapPages(address, *m_page_group, KMemoryState::GeneratedCode)); // Mark ourselves as unmapped. m_is_owner_mapped = false; diff --git a/src/core/hle/kernel/k_code_memory.h b/src/core/hle/kernel/k_code_memory.h index 2e7e1436a..5b260b385 100644 --- a/src/core/hle/kernel/k_code_memory.h +++ b/src/core/hle/kernel/k_code_memory.h @@ -3,6 +3,8 @@ #pragma once +#include <optional> + #include "common/common_types.h" #include "core/device_memory.h" #include "core/hle/kernel/k_auto_object.h" @@ -49,11 +51,11 @@ public: return m_address; } size_t GetSize() const { - return m_is_initialized ? m_page_group.GetNumPages() * PageSize : 0; + return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0; } private: - KPageGroup m_page_group{}; + std::optional<KPageGroup> m_page_group{}; KProcess* m_owner{}; VAddr m_address{}; KLightLock m_lock; diff --git a/src/core/hle/kernel/k_hardware_timer.cpp b/src/core/hle/kernel/k_hardware_timer.cpp index 6bba79ea0..4dcd53821 100644 --- a/src/core/hle/kernel/k_hardware_timer.cpp +++ b/src/core/hle/kernel/k_hardware_timer.cpp @@ -18,7 +18,8 @@ void KHardwareTimer::Initialize() { } void KHardwareTimer::Finalize() { - this->DisableInterrupt(); + m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this)); + m_wakeup_time = std::numeric_limits<s64>::max(); m_event_type.reset(); } @@ -59,7 +60,8 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) { } void KHardwareTimer::DisableInterrupt() { - m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this)); + m_kernel.System().CoreTiming().UnscheduleEventWithoutWait(m_event_type, + reinterpret_cast<uintptr_t>(this)); m_wakeup_time = std::numeric_limits<s64>::max(); } diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index bd33571da..cd6ea388e 100644 --- a/src/core/hle/kernel/k_memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -223,7 +223,7 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages, // Ensure that we don't leave anything un-freed. ON_RESULT_FAILURE { - for (const auto& it : out->Nodes()) { + for (const auto& it : *out) { auto& manager = this->GetManager(it.GetAddress()); const size_t node_num_pages = std::min<u64>( it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize); @@ -285,7 +285,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op m_has_optimized_process[static_cast<size_t>(pool)], true)); // Open the first reference to the pages. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { PAddr cur_address = block.GetAddress(); size_t remaining_pages = block.GetNumPages(); while (remaining_pages > 0) { @@ -335,7 +335,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Perform optimized memory tracking, if we should. if (optimized) { // Iterate over the allocated blocks. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { // Get the block extents. const PAddr block_address = block.GetAddress(); const size_t block_pages = block.GetNumPages(); @@ -391,7 +391,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 } } else { // Set all the allocated memory. - for (const auto& block : out->Nodes()) { + for (const auto& block : *out) { std::memset(m_system.DeviceMemory().GetPointer<void>(block.GetAddress()), fill_pattern, block.GetSize()); } diff --git a/src/core/hle/kernel/k_page_group.cpp b/src/core/hle/kernel/k_page_group.cpp new file mode 100644 index 000000000..d8c644a33 --- /dev/null +++ b/src/core/hle/kernel/k_page_group.cpp @@ -0,0 +1,121 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/hle/kernel/k_dynamic_resource_manager.h" +#include "core/hle/kernel/k_memory_manager.h" +#include "core/hle/kernel/k_page_group.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/svc_results.h" + +namespace Kernel { + +void KPageGroup::Finalize() { + KBlockInfo* cur = m_first_block; + while (cur != nullptr) { + KBlockInfo* next = cur->GetNext(); + m_manager->Free(cur); + cur = next; + } + + m_first_block = nullptr; + m_last_block = nullptr; +} + +void KPageGroup::CloseAndReset() { + auto& mm = m_kernel.MemoryManager(); + + KBlockInfo* cur = m_first_block; + while (cur != nullptr) { + KBlockInfo* next = cur->GetNext(); + mm.Close(cur->GetAddress(), cur->GetNumPages()); + m_manager->Free(cur); + cur = next; + } + + m_first_block = nullptr; + m_last_block = nullptr; +} + +size_t KPageGroup::GetNumPages() const { + size_t num_pages = 0; + + for (const auto& it : *this) { + num_pages += it.GetNumPages(); + } + + return num_pages; +} + +Result KPageGroup::AddBlock(KPhysicalAddress addr, size_t num_pages) { + // Succeed immediately if we're adding no pages. + R_SUCCEED_IF(num_pages == 0); + + // Check for overflow. + ASSERT(addr < addr + num_pages * PageSize); + + // Try to just append to the last block. + if (m_last_block != nullptr) { + R_SUCCEED_IF(m_last_block->TryConcatenate(addr, num_pages)); + } + + // Allocate a new block. + KBlockInfo* new_block = m_manager->Allocate(); + R_UNLESS(new_block != nullptr, ResultOutOfResource); + + // Initialize the block. + new_block->Initialize(addr, num_pages); + + // Add the block to our list. + if (m_last_block != nullptr) { + m_last_block->SetNext(new_block); + } else { + m_first_block = new_block; + } + m_last_block = new_block; + + R_SUCCEED(); +} + +void KPageGroup::Open() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.Open(it.GetAddress(), it.GetNumPages()); + } +} + +void KPageGroup::OpenFirst() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.OpenFirst(it.GetAddress(), it.GetNumPages()); + } +} + +void KPageGroup::Close() const { + auto& mm = m_kernel.MemoryManager(); + + for (const auto& it : *this) { + mm.Close(it.GetAddress(), it.GetNumPages()); + } +} + +bool KPageGroup::IsEquivalentTo(const KPageGroup& rhs) const { + auto lit = this->begin(); + auto rit = rhs.begin(); + auto lend = this->end(); + auto rend = rhs.end(); + + while (lit != lend && rit != rend) { + if (*lit != *rit) { + return false; + } + + ++lit; + ++rit; + } + + return lit == lend && rit == rend; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_page_group.h b/src/core/hle/kernel/k_page_group.h index 316f172f2..c07f17663 100644 --- a/src/core/hle/kernel/k_page_group.h +++ b/src/core/hle/kernel/k_page_group.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -13,24 +13,23 @@ namespace Kernel { +class KBlockInfoManager; +class KernelCore; class KPageGroup; class KBlockInfo { -private: - friend class KPageGroup; - public: - constexpr KBlockInfo() = default; + constexpr explicit KBlockInfo() : m_next(nullptr) {} - constexpr void Initialize(PAddr addr, size_t np) { + constexpr void Initialize(KPhysicalAddress addr, size_t np) { ASSERT(Common::IsAligned(addr, PageSize)); ASSERT(static_cast<u32>(np) == np); - m_page_index = static_cast<u32>(addr) / PageSize; + m_page_index = static_cast<u32>(addr / PageSize); m_num_pages = static_cast<u32>(np); } - constexpr PAddr GetAddress() const { + constexpr KPhysicalAddress GetAddress() const { return m_page_index * PageSize; } constexpr size_t GetNumPages() const { @@ -39,10 +38,10 @@ public: constexpr size_t GetSize() const { return this->GetNumPages() * PageSize; } - constexpr PAddr GetEndAddress() const { + constexpr KPhysicalAddress GetEndAddress() const { return (m_page_index + m_num_pages) * PageSize; } - constexpr PAddr GetLastAddress() const { + constexpr KPhysicalAddress GetLastAddress() const { return this->GetEndAddress() - 1; } @@ -62,8 +61,8 @@ public: return !(*this == rhs); } - constexpr bool IsStrictlyBefore(PAddr addr) const { - const PAddr end = this->GetEndAddress(); + constexpr bool IsStrictlyBefore(KPhysicalAddress addr) const { + const KPhysicalAddress end = this->GetEndAddress(); if (m_page_index != 0 && end == 0) { return false; @@ -72,11 +71,11 @@ public: return end < addr; } - constexpr bool operator<(PAddr addr) const { + constexpr bool operator<(KPhysicalAddress addr) const { return this->IsStrictlyBefore(addr); } - constexpr bool TryConcatenate(PAddr addr, size_t np) { + constexpr bool TryConcatenate(KPhysicalAddress addr, size_t np) { if (addr != 0 && addr == this->GetEndAddress()) { m_num_pages += static_cast<u32>(np); return true; @@ -90,96 +89,118 @@ private: } private: + friend class KPageGroup; + KBlockInfo* m_next{}; u32 m_page_index{}; u32 m_num_pages{}; }; static_assert(sizeof(KBlockInfo) <= 0x10); -class KPageGroup final { +class KPageGroup { public: - class Node final { + class Iterator { public: - constexpr Node(u64 addr_, std::size_t num_pages_) : addr{addr_}, num_pages{num_pages_} {} + using iterator_category = std::forward_iterator_tag; + using value_type = const KBlockInfo; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + constexpr explicit Iterator(pointer n) : m_node(n) {} + + constexpr bool operator==(const Iterator& rhs) const { + return m_node == rhs.m_node; + } + constexpr bool operator!=(const Iterator& rhs) const { + return !(*this == rhs); + } - constexpr u64 GetAddress() const { - return addr; + constexpr pointer operator->() const { + return m_node; + } + constexpr reference operator*() const { + return *m_node; } - constexpr std::size_t GetNumPages() const { - return num_pages; + constexpr Iterator& operator++() { + m_node = m_node->GetNext(); + return *this; } - constexpr std::size_t GetSize() const { - return GetNumPages() * PageSize; + constexpr Iterator operator++(int) { + const Iterator it{*this}; + ++(*this); + return it; } private: - u64 addr{}; - std::size_t num_pages{}; + pointer m_node{}; }; -public: - KPageGroup() = default; - KPageGroup(u64 address, u64 num_pages) { - ASSERT(AddBlock(address, num_pages).IsSuccess()); + explicit KPageGroup(KernelCore& kernel, KBlockInfoManager* m) + : m_kernel{kernel}, m_manager{m} {} + ~KPageGroup() { + this->Finalize(); } - constexpr std::list<Node>& Nodes() { - return nodes; - } + void CloseAndReset(); + void Finalize(); - constexpr const std::list<Node>& Nodes() const { - return nodes; + Iterator begin() const { + return Iterator{m_first_block}; + } + Iterator end() const { + return Iterator{nullptr}; + } + bool empty() const { + return m_first_block == nullptr; } - std::size_t GetNumPages() const { - std::size_t num_pages = 0; - for (const Node& node : nodes) { - num_pages += node.GetNumPages(); - } - return num_pages; - } - - bool IsEqual(KPageGroup& other) const { - auto this_node = nodes.begin(); - auto other_node = other.nodes.begin(); - while (this_node != nodes.end() && other_node != other.nodes.end()) { - if (this_node->GetAddress() != other_node->GetAddress() || - this_node->GetNumPages() != other_node->GetNumPages()) { - return false; - } - this_node = std::next(this_node); - other_node = std::next(other_node); - } + Result AddBlock(KPhysicalAddress addr, size_t num_pages); + void Open() const; + void OpenFirst() const; + void Close() const; + + size_t GetNumPages() const; + + bool IsEquivalentTo(const KPageGroup& rhs) const; + + bool operator==(const KPageGroup& rhs) const { + return this->IsEquivalentTo(rhs); + } - return this_node == nodes.end() && other_node == other.nodes.end(); + bool operator!=(const KPageGroup& rhs) const { + return !(*this == rhs); } - Result AddBlock(u64 address, u64 num_pages) { - if (!num_pages) { - return ResultSuccess; +private: + KernelCore& m_kernel; + KBlockInfo* m_first_block{}; + KBlockInfo* m_last_block{}; + KBlockInfoManager* m_manager{}; +}; + +class KScopedPageGroup { +public: + explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) { + if (m_pg) { + m_pg->Open(); } - if (!nodes.empty()) { - const auto node = nodes.back(); - if (node.GetAddress() + node.GetNumPages() * PageSize == address) { - address = node.GetAddress(); - num_pages += node.GetNumPages(); - nodes.pop_back(); - } + } + explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {} + ~KScopedPageGroup() { + if (m_pg) { + m_pg->Close(); } - nodes.push_back({address, num_pages}); - return ResultSuccess; } - bool Empty() const { - return nodes.empty(); + void CancelClose() { + m_pg = nullptr; } - void Finalize() {} - private: - std::list<Node> nodes; + const KPageGroup* m_pg{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 612fc76fa..9c7ac22dc 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -100,7 +100,7 @@ constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType a KPageTable::KPageTable(Core::System& system_) : m_general_lock{system_.Kernel()}, - m_map_physical_memory_lock{system_.Kernel()}, m_system{system_} {} + m_map_physical_memory_lock{system_.Kernel()}, m_system{system_}, m_kernel{system_.Kernel()} {} KPageTable::~KPageTable() = default; @@ -373,7 +373,7 @@ Result KPageTable::MapProcessCode(VAddr addr, size_t num_pages, KMemoryState sta m_memory_block_slab_manager); // Allocate and open. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( &pg, num_pages, KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option))); @@ -432,7 +432,7 @@ Result KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, size_t si const size_t num_pages = size / PageSize; // Create page groups for the memory being mapped. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; AddRegionToPages(src_address, num_pages, pg); // Reprotect the source as kernel-read/not mapped. @@ -593,7 +593,7 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) { const size_t size = num_pages * PageSize; // We're making a new group, not adding to an existing one. - R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory); + R_UNLESS(pg.empty(), ResultInvalidCurrentMemory); // Begin traversal. Common::PageTable::TraversalContext context; @@ -640,11 +640,10 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) { R_SUCCEED(); } -bool KPageTable::IsValidPageGroup(const KPageGroup& pg_ll, VAddr addr, size_t num_pages) { +bool KPageTable::IsValidPageGroup(const KPageGroup& pg, VAddr addr, size_t num_pages) { ASSERT(this->IsLockedByCurrentThread()); const size_t size = num_pages * PageSize; - const auto& pg = pg_ll.Nodes(); const auto& memory_layout = m_system.Kernel().MemoryLayout(); // Empty groups are necessarily invalid. @@ -942,9 +941,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add ON_RESULT_FAILURE { if (cur_mapped_addr != dst_addr) { - // HACK: Manually close the pages. - HACK_ClosePages(dst_addr, (cur_mapped_addr - dst_addr) / PageSize); - ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize, KMemoryPermission::None, OperationType::Unmap) .IsSuccess()); @@ -1020,9 +1016,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add // Map the page. R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page)); - // HACK: Manually open the pages. - HACK_OpenPages(start_partial_page, 1); - // Update tracking extents. cur_mapped_addr += PageSize; cur_block_addr += PageSize; @@ -1051,9 +1044,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map, cur_block_addr)); - // HACK: Manually open the pages. - HACK_OpenPages(cur_block_addr, cur_block_size / PageSize); - // Update tracking extents. cur_mapped_addr += cur_block_size; cur_block_addr = next_entry.phys_addr; @@ -1073,9 +1063,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map, cur_block_addr)); - // HACK: Manually open the pages. - HACK_OpenPages(cur_block_addr, last_block_size / PageSize); - // Update tracking extents. cur_mapped_addr += last_block_size; cur_block_addr += last_block_size; @@ -1107,9 +1094,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add // Map the page. R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page)); - - // HACK: Manually open the pages. - HACK_OpenPages(end_partial_page, 1); } // Update memory blocks to reflect our changes @@ -1211,9 +1195,6 @@ Result KPageTable::CleanupForIpcServer(VAddr address, size_t size, KMemoryState const size_t aligned_size = aligned_end - aligned_start; const size_t aligned_num_pages = aligned_size / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(aligned_start, aligned_num_pages); - // Unmap the pages. R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap)); @@ -1501,17 +1482,6 @@ void KPageTable::CleanupForIpcClientOnServerSetupFailure([[maybe_unused]] PageLi } } -void KPageTable::HACK_OpenPages(PAddr phys_addr, size_t num_pages) { - m_system.Kernel().MemoryManager().OpenFirst(phys_addr, num_pages); -} - -void KPageTable::HACK_ClosePages(VAddr virt_addr, size_t num_pages) { - for (size_t index = 0; index < num_pages; ++index) { - const auto paddr = GetPhysicalAddr(virt_addr + (index * PageSize)); - m_system.Kernel().MemoryManager().Close(paddr, 1); - } -} - Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Lock the physical memory lock. KScopedLightLock phys_lk(m_map_physical_memory_lock); @@ -1572,7 +1542,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the new memory. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( &pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0)); @@ -1650,7 +1620,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { KScopedPageTableUpdater updater(this); // Prepare to iterate over the memory. - auto pg_it = pg.Nodes().begin(); + auto pg_it = pg.begin(); PAddr pg_phys_addr = pg_it->GetAddress(); size_t pg_pages = pg_it->GetNumPages(); @@ -1680,9 +1650,6 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { last_unmap_address + 1 - cur_address) / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(cur_address, cur_pages); - // Unmap. ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap) @@ -1703,7 +1670,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Release any remaining unmapped memory. m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages); m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages); - for (++pg_it; pg_it != pg.Nodes().end(); ++pg_it) { + for (++pg_it; pg_it != pg.end(); ++pg_it) { m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(), pg_it->GetNumPages()); m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(), @@ -1731,7 +1698,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Check if we're at the end of the physical block. if (pg_pages == 0) { // Ensure there are more pages to map. - ASSERT(pg_it != pg.Nodes().end()); + ASSERT(pg_it != pg.end()); // Advance our physical block. ++pg_it; @@ -1742,10 +1709,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { // Map whatever we can. const size_t cur_pages = std::min(pg_pages, map_pages); R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite, - OperationType::Map, pg_phys_addr)); - - // HACK: Manually open the pages. - HACK_OpenPages(pg_phys_addr, cur_pages); + OperationType::MapFirst, pg_phys_addr)); // Advance. cur_address += cur_pages * PageSize; @@ -1888,9 +1852,6 @@ Result KPageTable::UnmapPhysicalMemory(VAddr address, size_t size) { last_address + 1 - cur_address) / PageSize; - // HACK: Manually close the pages. - HACK_ClosePages(cur_address, cur_pages); - // Unmap. ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap) .IsSuccess()); @@ -1955,7 +1916,7 @@ Result KPageTable::MapMemory(VAddr dst_address, VAddr src_address, size_t size) R_TRY(dst_allocator_result); // Map the memory. - KPageGroup page_linked_list; + KPageGroup page_linked_list{m_kernel, m_block_info_manager}; const size_t num_pages{size / PageSize}; const KMemoryPermission new_src_perm = static_cast<KMemoryPermission>( KMemoryPermission::KernelRead | KMemoryPermission::NotMapped); @@ -2022,14 +1983,14 @@ Result KPageTable::UnmapMemory(VAddr dst_address, VAddr src_address, size_t size num_dst_allocator_blocks); R_TRY(dst_allocator_result); - KPageGroup src_pages; - KPageGroup dst_pages; + KPageGroup src_pages{m_kernel, m_block_info_manager}; + KPageGroup dst_pages{m_kernel, m_block_info_manager}; const size_t num_pages{size / PageSize}; AddRegionToPages(src_address, num_pages, src_pages); AddRegionToPages(dst_address, num_pages, dst_pages); - R_UNLESS(dst_pages.IsEqual(src_pages), ResultInvalidMemoryRegion); + R_UNLESS(dst_pages.IsEquivalentTo(src_pages), ResultInvalidMemoryRegion); { auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); }); @@ -2060,7 +2021,7 @@ Result KPageTable::MapPages(VAddr addr, const KPageGroup& page_linked_list, VAddr cur_addr{addr}; - for (const auto& node : page_linked_list.Nodes()) { + for (const auto& node : page_linked_list) { if (const auto result{ Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())}; result.IsError()) { @@ -2160,7 +2121,7 @@ Result KPageTable::UnmapPages(VAddr addr, const KPageGroup& page_linked_list) { VAddr cur_addr{addr}; - for (const auto& node : page_linked_list.Nodes()) { + for (const auto& node : page_linked_list) { if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None, OperationType::Unmap)}; result.IsError()) { @@ -2527,13 +2488,13 @@ Result KPageTable::SetHeapSize(VAddr* out, size_t size) { R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); // Allocate pages for the heap extension. - KPageGroup pg; + KPageGroup pg{m_kernel, m_block_info_manager}; R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( &pg, allocation_size / PageSize, KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option))); // Clear all the newly allocated pages. - for (const auto& it : pg.Nodes()) { + for (const auto& it : pg) { std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), m_heap_fill_value, it.GetSize()); } @@ -2610,11 +2571,23 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(size_t needed_num_pages, size_ if (is_map_only) { R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { - KPageGroup page_group; - R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( - &page_group, needed_num_pages, - KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option), 0, 0)); - R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); + // Create a page group tohold the pages we allocate. + KPageGroup pg{m_kernel, m_block_info_manager}; + + R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( + &pg, needed_num_pages, + KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option))); + + // Ensure that the page group is closed when we're done working with it. + SCOPE_EXIT({ pg.Close(); }); + + // Clear all pages. + for (const auto& it : pg) { + std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), + m_heap_fill_value, it.GetSize()); + } + + R_TRY(Operate(addr, needed_num_pages, pg, OperationType::MapGroup)); } // Update the blocks. @@ -2795,19 +2768,28 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, const KPageGroup& page_ ASSERT(num_pages > 0); ASSERT(num_pages == page_group.GetNumPages()); - for (const auto& node : page_group.Nodes()) { - const size_t size{node.GetNumPages() * PageSize}; + switch (operation) { + case OperationType::MapGroup: { + // We want to maintain a new reference to every page in the group. + KScopedPageGroup spg(page_group); + + for (const auto& node : page_group) { + const size_t size{node.GetNumPages() * PageSize}; - switch (operation) { - case OperationType::MapGroup: + // Map the pages. m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress()); - break; - default: - ASSERT(false); - break; + + addr += size; } - addr += size; + // We succeeded! We want to persist the reference to the pages. + spg.CancelClose(); + + break; + } + default: + ASSERT(false); + break; } R_SUCCEED(); @@ -2822,13 +2804,29 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, KMemoryPermission perm, ASSERT(ContainsPages(addr, num_pages)); switch (operation) { - case OperationType::Unmap: + case OperationType::Unmap: { + // Ensure that any pages we track close on exit. + KPageGroup pages_to_close{m_kernel, this->GetBlockInfoManager()}; + SCOPE_EXIT({ pages_to_close.CloseAndReset(); }); + + this->AddRegionToPages(addr, num_pages, pages_to_close); m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize); break; + } + case OperationType::MapFirst: case OperationType::Map: { ASSERT(map_addr); ASSERT(Common::IsAligned(map_addr, PageSize)); m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr); + + // Open references to pages, if we should. + if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) { + if (operation == OperationType::MapFirst) { + m_kernel.MemoryManager().OpenFirst(map_addr, num_pages); + } else { + m_kernel.MemoryManager().Open(map_addr, num_pages); + } + } break; } case OperationType::Separate: { diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index f1ca785d7..0a454b05b 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -107,6 +107,10 @@ public: return *m_page_table_impl; } + KBlockInfoManager* GetBlockInfoManager() { + return m_block_info_manager; + } + bool CanContain(VAddr addr, size_t size, KMemoryState state) const; protected: @@ -261,10 +265,6 @@ private: void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address, size_t size, KMemoryPermission prot_perm); - // HACK: These will be removed once we automatically manage page reference counts. - void HACK_OpenPages(PAddr phys_addr, size_t num_pages); - void HACK_ClosePages(VAddr virt_addr, size_t num_pages); - mutable KLightLock m_general_lock; mutable KLightLock m_map_physical_memory_lock; @@ -488,6 +488,7 @@ private: std::unique_ptr<Common::PageTable> m_page_table_impl; Core::System& m_system; + KernelCore& m_kernel; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_shared_memory.cpp b/src/core/hle/kernel/k_shared_memory.cpp index 0aa68103c..3cf2b5d91 100644 --- a/src/core/hle/kernel/k_shared_memory.cpp +++ b/src/core/hle/kernel/k_shared_memory.cpp @@ -13,10 +13,7 @@ namespace Kernel { KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {} - -KSharedMemory::~KSharedMemory() { - kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemoryMax, size); -} +KSharedMemory::~KSharedMemory() = default; Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_, Svc::MemoryPermission owner_permission_, @@ -49,7 +46,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o R_UNLESS(physical_address != 0, ResultOutOfMemory); //! Insert the result into our page group. - page_group.emplace(physical_address, num_pages); + page_group.emplace(kernel, &kernel.GetSystemSystemResource().GetBlockInfoManager()); + page_group->AddBlock(physical_address, num_pages); // Commit our reservation. memory_reservation.Commit(); @@ -62,7 +60,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o is_initialized = true; // Clear all pages in the memory. - for (const auto& block : page_group->Nodes()) { + for (const auto& block : *page_group) { std::memset(device_memory_.GetPointer<void>(block.GetAddress()), 0, block.GetSize()); } @@ -71,13 +69,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o void KSharedMemory::Finalize() { // Close and finalize the page group. - // page_group->Close(); - // page_group->Finalize(); - - //! HACK: Manually close. - for (const auto& block : page_group->Nodes()) { - kernel.MemoryManager().Close(block.GetAddress(), block.GetNumPages()); - } + page_group->Close(); + page_group->Finalize(); // Release the memory reservation. resource_limit->Release(LimitableResource::PhysicalMemoryMax, size); diff --git a/src/core/hle/kernel/memory_types.h b/src/core/hle/kernel/memory_types.h index 3975507bd..92b8b37ac 100644 --- a/src/core/hle/kernel/memory_types.h +++ b/src/core/hle/kernel/memory_types.h @@ -14,4 +14,7 @@ constexpr std::size_t PageSize{1 << PageBits}; using Page = std::array<u8, PageSize>; +using KPhysicalAddress = PAddr; +using KProcessAddress = VAddr; + } // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 788ee2160..aca442196 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1485,7 +1485,7 @@ static Result MapProcessMemory(Core::System& system, VAddr dst_address, Handle p ResultInvalidMemoryRegion); // Create a new page group. - KPageGroup pg; + KPageGroup pg{system.Kernel(), dst_pt.GetBlockInfoManager()}; R_TRY(src_pt.MakeAndOpenPageGroup( std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess, KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None, diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index d1cbadde4..f4416f5b2 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -312,8 +312,6 @@ void NVFlinger::Compose() { } s64 NVFlinger::GetNextTicks() const { - static constexpr s64 max_hertz = 120LL; - const auto& settings = Settings::values; auto speed_scale = 1.f; if (settings.use_multi_core.GetValue()) { @@ -327,9 +325,11 @@ s64 NVFlinger::GetNextTicks() const { } } - const auto next_ticks = ((1000000000 * (1LL << swap_interval)) / max_hertz); + // As an extension, treat nonpositive swap interval as framerate multiplier. + const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval) + : 60.f / static_cast<f32>(swap_interval); - return static_cast<s64>(speed_scale * static_cast<float>(next_ticks)); + return static_cast<s64>(speed_scale * (1000000000.f / effective_fps)); } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 9b22397db..3828cf272 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -133,7 +133,7 @@ private: /// layers. u32 next_buffer_queue_id = 1; - u32 swap_interval = 1; + s32 swap_interval = 1; /// Event that handles screen composition. std::shared_ptr<Core::Timing::EventType> multi_composition_event; diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp index 447fbffaa..282ea1ff9 100644 --- a/src/core/internal_network/network.cpp +++ b/src/core/internal_network/network.cpp @@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) { return Errno::NETUNREACH; case WSAEMSGSIZE: return Errno::MSGSIZE; + case WSAETIMEDOUT: + return Errno::TIMEDOUT; default: UNIMPLEMENTED_MSG("Unimplemented errno={}", e); return Errno::OTHER; @@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) { return Errno::NETUNREACH; case EMSGSIZE: return Errno::MSGSIZE; + case ETIMEDOUT: + return Errno::TIMEDOUT; default: UNIMPLEMENTED_MSG("Unimplemented errno={}", e); return Errno::OTHER; @@ -226,7 +230,7 @@ Errno GetAndLogLastError() { int e = errno; #endif const Errno err = TranslateNativeError(e); - if (err == Errno::AGAIN) { + if (err == Errno::AGAIN || err == Errno::TIMEDOUT) { return err; } LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 26be74df4..4e605fae4 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -383,6 +383,10 @@ struct Memory::Impl { return; } + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug); + } + // Iterate over a contiguous CPU address space, marking/unmarking the region. // The region is at a granularity of CPU pages. @@ -436,7 +440,7 @@ struct Memory::Impl { } if (Settings::IsFastmemEnabled()) { - const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; + const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); } diff --git a/src/input_common/drivers/sdl_driver.cpp b/src/input_common/drivers/sdl_driver.cpp index 4818bb744..9835d99d2 100644 --- a/src/input_common/drivers/sdl_driver.cpp +++ b/src/input_common/drivers/sdl_driver.cpp @@ -40,25 +40,26 @@ public: } void EnableMotion() { - if (sdl_controller) { - SDL_GameController* controller = sdl_controller.get(); - has_accel = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; - has_gyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; - if (has_accel) { - SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); - } - if (has_gyro) { - SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); - } + if (!sdl_controller) { + return; + } + SDL_GameController* controller = sdl_controller.get(); + if (HasMotion()) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_FALSE); + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_FALSE); + } + has_accel = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + has_gyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + if (has_accel) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } + if (has_gyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } } - bool HasGyro() const { - return has_gyro; - } - - bool HasAccel() const { - return has_accel; + bool HasMotion() const { + return has_gyro || has_accel; } bool UpdateMotion(SDL_ControllerSensorEvent event) { @@ -85,6 +86,20 @@ public: if (time_difference == 0) { return false; } + + // Motion data is invalid + if (motion.accel_x == 0 && motion.gyro_x == 0 && motion.accel_y == 0 && + motion.gyro_y == 0 && motion.accel_z == 0 && motion.gyro_z == 0) { + if (motion_error_count++ < 200) { + return false; + } + // Try restarting the sensor + motion_error_count = 0; + EnableMotion(); + return false; + } + + motion_error_count = 0; motion.delta_timestamp = time_difference * 1000; return true; } @@ -250,6 +265,7 @@ private: mutable std::mutex mutex; u64 last_motion_update{}; + std::size_t motion_error_count{}; bool has_gyro{false}; bool has_accel{false}; bool has_vibration{false}; @@ -942,18 +958,18 @@ MotionMapping SDLDriver::GetMotionMappingForDevice(const Common::ParamPackage& p MotionMapping mapping = {}; joystick->EnableMotion(); - if (joystick->HasGyro() || joystick->HasAccel()) { + if (joystick->HasMotion()) { mapping.insert_or_assign(Settings::NativeMotion::MotionRight, BuildMotionParam(joystick->GetPort(), joystick->GetGUID())); } if (params.Has("guid2")) { joystick2->EnableMotion(); - if (joystick2->HasGyro() || joystick2->HasAccel()) { + if (joystick2->HasMotion()) { mapping.insert_or_assign(Settings::NativeMotion::MotionLeft, BuildMotionParam(joystick2->GetPort(), joystick2->GetGUID())); } } else { - if (joystick->HasGyro() || joystick->HasAccel()) { + if (joystick->HasMotion()) { mapping.insert_or_assign(Settings::NativeMotion::MotionLeft, BuildMotionParam(joystick->GetPort(), joystick->GetGUID())); } diff --git a/src/input_common/drivers/tas_input.cpp b/src/input_common/drivers/tas_input.cpp index f3ade90da..f3cb14c56 100644 --- a/src/input_common/drivers/tas_input.cpp +++ b/src/input_common/drivers/tas_input.cpp @@ -156,10 +156,12 @@ void Tas::RecordInput(u64 buttons, TasAnalog left_axis, TasAnalog right_axis) { }; } -std::tuple<TasState, size_t, size_t> Tas::GetStatus() const { +std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> Tas::GetStatus() const { TasState state; + std::array<size_t, PLAYER_NUMBER> lengths{0}; if (is_recording) { - return {TasState::Recording, 0, record_commands.size()}; + lengths[0] = record_commands.size(); + return {TasState::Recording, record_commands.size(), lengths}; } if (is_running) { @@ -168,7 +170,11 @@ std::tuple<TasState, size_t, size_t> Tas::GetStatus() const { state = TasState::Stopped; } - return {state, current_command, script_length}; + for (size_t i = 0; i < PLAYER_NUMBER; i++) { + lengths[i] = commands[i].size(); + } + + return {state, current_command, lengths}; } void Tas::UpdateThread() { diff --git a/src/input_common/drivers/tas_input.h b/src/input_common/drivers/tas_input.h index 38a27a230..5be66d142 100644 --- a/src/input_common/drivers/tas_input.h +++ b/src/input_common/drivers/tas_input.h @@ -124,7 +124,7 @@ public: * Current playback progress ; * Total length of script file currently loaded or being recorded */ - std::tuple<TasState, size_t, size_t> GetStatus() const; + std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> GetStatus() const; private: enum class TasAxis : u8; diff --git a/src/input_common/helpers/stick_from_buttons.cpp b/src/input_common/helpers/stick_from_buttons.cpp index 82aa6ac2f..f3a0b3419 100644 --- a/src/input_common/helpers/stick_from_buttons.cpp +++ b/src/input_common/helpers/stick_from_buttons.cpp @@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice { public: using Button = std::unique_ptr<Common::Input::InputDevice>; - Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, + Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_, float modifier_scale_, float modifier_angle_) : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)), - right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_), - modifier_angle(modifier_angle_) { + right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)), + modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) { up->SetCallback({ .on_change = [this](const Common::Input::CallbackStatus& callback_) { @@ -48,6 +48,9 @@ public: UpdateModButtonStatus(callback_); }, }); + updater->SetCallback({ + .on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); }, + }); last_x_axis_value = 0.0f; last_y_axis_value = 0.0f; } @@ -248,7 +251,7 @@ public: modifier->ForceUpdate(); } - void SoftUpdate() override { + void SoftUpdate() { Common::Input::CallbackStatus status{ .type = Common::Input::InputType::Stick, .stick_status = GetStatus(), @@ -308,6 +311,7 @@ private: Button left; Button right; Button modifier; + Button updater; float modifier_scale{}; float modifier_angle{}; float angle{}; @@ -331,11 +335,12 @@ std::unique_ptr<Common::Input::InputDevice> StickFromButton::Create( auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine)); auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine)); auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine)); + auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0"); auto modifier_scale = params.Get("modifier_scale", 0.5f); auto modifier_angle = params.Get("modifier_angle", 5.5f); return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left), - std::move(right), std::move(modifier), modifier_scale, - modifier_angle); + std::move(right), std::move(modifier), std::move(updater), + modifier_scale, modifier_angle); } } // namespace InputCommon diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp index edd5287c1..d6e49d2c5 100644 --- a/src/input_common/input_mapping.cpp +++ b/src/input_common/input_mapping.cpp @@ -76,7 +76,7 @@ void MappingFactory::RegisterButton(const MappingData& data) { break; case EngineInputType::Analog: // Ignore mouse axis when mapping buttons - if (data.engine == "mouse") { + if (data.engine == "mouse" && data.index != 4) { return; } new_input.Set("axis", data.index); diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 4dc92f482..e0b2131ed 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -28,6 +28,28 @@ namespace InputCommon { +/// Dummy engine to get periodic updates +class UpdateEngine final : public InputEngine { +public: + explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) { + PreSetController(identifier); + } + + void PumpEvents() { + SetButton(identifier, 0, last_state); + last_state = !last_state; + } + +private: + static constexpr PadIdentifier identifier = { + .guid = Common::UUID{}, + .port = 0, + .pad = 0, + }; + + bool last_state{}; +}; + struct InputSubsystem::Impl { template <typename Engine> void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) { @@ -45,6 +67,7 @@ struct InputSubsystem::Impl { void Initialize() { mapping_factory = std::make_shared<MappingFactory>(); + RegisterEngine("updater", update_engine); RegisterEngine("keyboard", keyboard); RegisterEngine("mouse", mouse); RegisterEngine("touch", touch_screen); @@ -74,6 +97,7 @@ struct InputSubsystem::Impl { } void Shutdown() { + UnregisterEngine(update_engine); UnregisterEngine(keyboard); UnregisterEngine(mouse); UnregisterEngine(touch_screen); @@ -252,6 +276,7 @@ struct InputSubsystem::Impl { } void PumpEvents() const { + update_engine->PumpEvents(); #ifdef HAVE_SDL2 sdl->PumpEvents(); #endif @@ -263,6 +288,7 @@ struct InputSubsystem::Impl { std::shared_ptr<MappingFactory> mapping_factory; + std::shared_ptr<UpdateEngine> update_engine; std::shared_ptr<Keyboard> keyboard; std::shared_ptr<Mouse> mouse; std::shared_ptr<TouchScreen> touch_screen; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db9c94ce8..0cd87a48f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, - ctx.Const(element))); + return ctx.OpLoad( + ctx.F32[1], + ctx.need_input_position_indirect + ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, + ctx.Const(element)) + : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 2c90f2368..c5db19d09 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); } -Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); +Id AddPartitionBase(EmitContext& ctx, Id thread_id) { + const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; + const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; + return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); } } // Anonymous namespace @@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; - const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); - clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; - const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ecb2db494..a0c155fdb 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { U16 = Name(TypeInt(16, false), "u16"); S16 = Name(TypeInt(16, true), "s16"); } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { AddCapability(spv::Capability::Int64); U64 = Name(TypeInt(64, false), "u64"); } @@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); - const Id pointer{is_array - ? OpAccessChain(input_f32, input_position, vertex, masked_index) - : OpAccessChain(input_f32, input_position, masked_index)}; + const Id pointer{[&]() { + if (need_input_position_indirect) { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, u32_zero_value, + masked_index); + else + return OpAccessChain(input_f32, input_position, u32_zero_value, + masked_index); + } else { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, masked_index); + else + return OpAccessChain(input_f32, input_position, masked_index); + } + }()}; const Id result{OpLoad(F32[1], pointer)}; OpReturnValue(result); ++label_index; @@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) { Decorate(layer, spv::Decoration::Flat); } if (loads.AnyComponent(IR::Attribute::PositionX)) { - const bool is_fragment{stage != Stage::Fragment}; - const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], true, built_in); - if (profile.support_geometry_shader_passthrough) { - if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { - Decorate(input_position, spv::Decoration::PassthroughNV); + const bool is_fragment{stage == Stage::Fragment}; + if (!is_fragment && profile.has_broken_spirv_position_input) { + need_input_position_indirect = true; + + const Id input_position_struct = TypeStruct(F32[4]); + input_position = DefineInput(*this, input_position_struct, true); + + MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn, + static_cast<unsigned>(spv::BuiltIn::Position)); + Decorate(input_position_struct, spv::Decoration::Block); + } else { + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord + : spv::BuiltIn::Position}; + input_position = DefineInput(*this, F32[4], true, built_in); + + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } } } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4414a5169..dbc5c55b9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -280,6 +280,7 @@ public: Id write_global_func_u32x2{}; Id write_global_func_u32x4{}; + bool need_input_position_indirect{}; Id input_position{}; std::array<Id, 32> input_generics{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ac159d24b..a42453e90 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( } return mapping; } + +void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, + const Shader::VaryingState& passthrough_mask, + bool passthrough_position, + std::optional<IR::Attribute> passthrough_layer_attr) { + for (u32 i = 0; i < program.output_vertices; i++) { + // Assign generics from input + for (u32 j = 0; j < 32; j++) { + if (!passthrough_mask.Generic(j)) { + continue; + } + + const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_position) { + // Assign position from input + const IR::Attribute attr = IR::Attribute::PositionX; + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_layer_attr) { + // Assign layer + ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), + ir.Imm32(0)); + } + + // Emit vertex + ir.EmitVertex(ir.Imm32(0)); + } + ir.EndPrimitive(ir.Imm32(0)); +} + +u32 GetOutputTopologyVertices(OutputTopology output_topology) { + switch (output_topology) { + case OutputTopology::PointList: + return 1; + case OutputTopology::LineStrip: + return 2; + default: + return 3; + } +} + +void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + EmitGeometryPassthrough( + ir, program, program.info.passthrough, + program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); + } + } + } +} + } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, @@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; if (program.is_geometry_passthrough) { const auto& mask{env.GpPassthroughMask()}; - for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + for (size_t i = 0; i < mask.size() * 32; ++i) { program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; } + + if (!host_info.support_geometry_shader_passthrough) { + program.output_vertices = GetOutputTopologyVertices(program.output_topology); + LowerGeometryPassthrough(program, host_info); + } } break; } @@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::PositionPass(env, program); - Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program, host_info); Optimization::TexturePass(env, program, host_info); if (Settings::values.resolution_info.active) { @@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, IR::Program program; program.stage = Stage::Geometry; program.output_topology = output_topology; - switch (output_topology) { - case OutputTopology::PointList: - program.output_vertices = 1; - break; - case OutputTopology::LineStrip: - program.output_vertices = 2; - break; - default: - program.output_vertices = 3; - break; - } + program.output_vertices = GetOutputTopologyVertices(output_topology); program.is_geometry_passthrough = false; program.info.loads.mask = source_program.info.stores.mask; @@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, node.data.block = current_block; IR::IREmitter ir{*current_block}; - for (u32 i = 0; i < program.output_vertices; i++) { - // Assign generics from input - for (u32 j = 0; j < 32; j++) { - if (!program.info.stores.Generic(j)) { - continue; - } - - const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - } - - // Assign position from input - const IR::Attribute attr = IR::Attribute::PositionX; - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - - // Assign layer - ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), - ir.Imm32(0)); - - // Emit vertex - ir.EmitVertex(ir.Imm32(0)); - } - ir.EndPrimitive(ir.Imm32(0)); + EmitGeometryPassthrough(ir, program, program.info.stores, true, + source_program.info.emulated_layer); IR::Block* return_block{block_pool.Create(inst_pool)}; IR::IREmitter{*return_block}.Epilogue(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index d5d279554..55fc48768 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -15,6 +15,9 @@ struct HostTranslateInfo { bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS + u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs + bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry + ///< passthrough shaders }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 336338e62..9101722ba 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { @@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { @@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + + // Align the offset base to match the host alignment requirements + low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); return ir.ISub(offset, low_cbuf); } @@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program) { +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + const IR::U32 offset{ + StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95..4ffad1172 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -15,7 +15,7 @@ namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program); +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b8841a536..253e0d0bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -55,6 +55,8 @@ struct Profile { /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + /// The Position builtin needs to be wrapped in a struct when used as an input + bool has_broken_spirv_position_input{}; /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 44236b6b1..f93181e1e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -65,6 +65,8 @@ enum class Interpolation { struct ConstantBufferDescriptor { u32 index; u32 count; + + auto operator<=>(const ConstantBufferDescriptor&) const = default; }; struct StorageBufferDescriptor { @@ -72,6 +74,8 @@ struct StorageBufferDescriptor { u32 cbuf_offset; u32 count; bool is_written; + + auto operator<=>(const StorageBufferDescriptor&) const = default; }; struct TextureBufferDescriptor { @@ -84,6 +88,8 @@ struct TextureBufferDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureBufferDescriptor&) const = default; }; using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; @@ -95,6 +101,8 @@ struct ImageBufferDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageBufferDescriptor&) const = default; }; using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; @@ -110,6 +118,8 @@ struct TextureDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureDescriptor&) const = default; }; using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; @@ -122,6 +132,8 @@ struct ImageDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageDescriptor&) const = default; }; using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index 734dbf4b6..1275cca24 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp @@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") { int num = 0; buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); - REQUIRE(num == 0); + REQUIRE(num == 1); REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); buffer.FlushCachedWrites(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index aa271a377..b474eb363 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -85,6 +85,7 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h + invalidation_accumulator.h memory_manager.cpp memory_manager.h precompiled_headers.h @@ -99,6 +100,8 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/blit_image.cpp + renderer_opengl/blit_image.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_compute_pipeline.cpp @@ -190,6 +193,8 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_texture_cache_base.cpp + renderer_vulkan/vk_turbo_mode.cpp + renderer_vulkan/vk_turbo_mode.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 92d77eef2..c47b7d866 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -430,7 +430,7 @@ private: if (query_begin >= SizeBytes() || size < 0) { return; } - u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>(); u64* const state_words = Array<type>(); const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; @@ -483,7 +483,7 @@ private: NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); } // Exclude CPU modified pages when visiting GPU pages - const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); + const u64 word = current_word; u64 page = page_begin; page_begin = 0; @@ -531,7 +531,7 @@ private: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -539,8 +539,7 @@ private: const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } @@ -564,7 +563,7 @@ private: [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -574,8 +573,7 @@ private: u64 begin = std::numeric_limits<u64>::max(); u64 end = 0; for (u64 word_index = word_begin; word_index < word_end; ++word_index) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 06fd40851..627917ab6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); - const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + const u32 alignment = runtime.GetStorageBufferAlignment(); + + const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); + const u32 aligned_size = + Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment); + + const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); if (!cpu_addr || size == 0) { return NULL_BINDING; } - const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); + + const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); const Binding binding{ .cpu_addr = *cpu_addr, - .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), + .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 2437121ce..1d22d25f1 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { LOG_WARNING(HW_GPU, "(STUBBED) called"); break; } + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { + DrawTexture(); + break; + } default: break; } @@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } +void DrawManager::DrawTexture() { + const auto& regs{maxwell3d->regs}; + draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f; + draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f; + const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f; + const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f; + const bool lower_left{regs.window_origin.mode != + Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; + if (lower_left) { + draw_texture_state.dst_y0 -= dst_height; + } + draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; + draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; + draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; + draw_texture_state.src_x1 = + (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + + draw_texture_state.src_x0; + draw_texture_state.src_y1 = + (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + + draw_texture_state.src_y0; + draw_texture_state.src_sampler = regs.draw_texture.src_sampler; + draw_texture_state.src_texture = regs.draw_texture.src_texture; + + maxwell3d->rasterizer->DrawTexture(); +} + void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; switch (regs.primitive_topology_control) { diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 58d1b2d59..7c22c49f1 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,19 @@ public: std::vector<u8> inline_index_draw_indexes; }; + struct DrawTextureState { + f32 dst_x0; + f32 dst_y0; + f32 dst_x1; + f32 dst_y1; + f32 src_x0; + f32 src_y0; + f32 src_x1; + f32 src_y1; + u32 src_sampler; + u32 src_texture; + }; + struct IndirectParams { bool is_indexed; bool include_count; @@ -64,6 +77,10 @@ public: return draw_state; } + const DrawTextureState& GetDrawTextureState() const { + return draw_texture_state; + } + IndirectParams& GetIndirectParams() { return indirect_state; } @@ -81,6 +98,8 @@ private: void DrawIndexSmall(u32 argument); + void DrawTexture(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); @@ -89,6 +108,7 @@ private: Maxwell3D* maxwell3d{}; State draw_state{}; + DrawTextureState draw_texture_state{}; IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index cea1dd8b0..7f5a0c29d 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, x_elements, regs.line_count, regs.dest.BlockHeight(), regs.dest.BlockDepth(), regs.line_length_in); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index e655e7254..a126c359c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/sw_blitter/blitter.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" @@ -20,8 +21,8 @@ namespace Tegra::Engines { using namespace Texture; -Fermi2D::Fermi2D(MemoryManager& memory_manager_) { - sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); +Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} { + sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager); // Nvidia's OpenGL driver seems to assume these values regs.src.depth = 1; regs.dst.depth = 1; @@ -104,6 +105,7 @@ void Fermi2D::Blit() { config.src_x0 = 0; } + memory_manager.FlushCaching(); if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { sw_blitter->Blit(src, regs.dst, config); } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 523fbdec2..705b323e1 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -305,6 +305,7 @@ public: private: VideoCore::RasterizerInterface* rasterizer = nullptr; std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; + MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fbfd1ddd2..ae9da6290 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) { case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): case MAXWELL3D_REG_INDEX(vertex_array_instance_first): case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): case MAXWELL3D_REG_INDEX(wait_for_idle): case MAXWELL3D_REG_INDEX(shadow_ram_control): case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): @@ -467,7 +468,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) { } void Maxwell3D::ProcessFirmwareCall4() { - LOG_WARNING(HW_GPU, "(STUBBED) called"); + LOG_DEBUG(HW_GPU, "(STUBBED) called"); // Firmware call 4 is a blob that changes some registers depending on its parameters. // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. @@ -485,11 +486,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { } void Maxwell3D::ProcessQueryGet() { - // TODO(Subv): Support the other query units. - if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { - LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); - } - switch (regs.report_semaphore.query.operation) { case Regs::ReportSemaphore::Operation::Release: if (regs.report_semaphore.query.short_query != 0) { @@ -649,7 +645,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { const GPUVAddr address{buffer_address + regs.const_buffer.offset}; const size_t copy_size = amount * sizeof(u32); - memory_manager.WriteBlock(address, start_base, copy_size); + memory_manager.WriteBlockCached(address, start_base, copy_size); // Increment the current buffer position. regs.const_buffer.offset += static_cast<u32>(copy_size); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0b2fd2928..c89969bb4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1599,6 +1599,20 @@ public: }; static_assert(sizeof(TIRModulationCoeff) == 0x4); + struct DrawTexture { + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 dx_du; + s64 dy_dv; + u32 src_sampler; + u32 src_texture; + s32 src_x0; + s32 src_y0; + }; + static_assert(sizeof(DrawTexture) == 0x30); + struct ReduceColorThreshold { union { BitField<0, 8, u32> all_hit_once; @@ -2751,7 +2765,7 @@ public: u32 reserved_sw_method2; ///< 0x102C std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030 std::array<u32, 15> spare_nop; ///< 0x1044 - INSERT_PADDING_BYTES_NOINIT(0x30); + DrawTexture draw_texture; ///< 0x1080 std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0 ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0 diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01f70ea9e..7762c7d96 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -69,7 +69,7 @@ void MaxwellDMA::Launch() { if (launch.multi_line_enable) { const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; - + memory_manager.FlushCaching(); if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. CopyBlockLinearToBlockLinear(); @@ -104,6 +104,7 @@ void MaxwellDMA::Launch() { reinterpret_cast<u8*>(tmp_buffer.data()), regs.line_length_in * sizeof(u32)); } else { + memory_manager.FlushCaching(); const auto convert_linear_2_blocklinear_addr = [](u64 address) { return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | ((address & 0x180) >> 1) | ((address & 0x20) << 3); @@ -121,8 +122,8 @@ void MaxwellDMA::Launch() { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), + tmp_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -132,7 +133,7 @@ void MaxwellDMA::Launch() { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock( + memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), tmp_buffer.data(), tmp_buffer.size()); } @@ -141,8 +142,8 @@ void MaxwellDMA::Launch() { std::vector<u8> tmp_buffer(regs.line_length_in); memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), - regs.line_length_in); + memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + regs.line_length_in); } } } @@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyPitchToBlockLinear() { @@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_in); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::FastCopyBlockLinearToPitch() { @@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { regs.src_params.block_size.height, regs.src_params.block_size.depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyBlockLinearToBlockLinear() { @@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.block_size.height, dst.block_size.depth, pitch); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::ReleaseSemaphore() { diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e6dc24f22..e968ae220 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(GLSL_INCLUDES set(SHADER_FILES astc_decoder.comp + blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag @@ -36,7 +37,6 @@ set(SHADER_FILES smaa_blending_weight_calculation.frag smaa_neighborhood_blending.vert smaa_neighborhood_blending.frag - vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp @@ -47,6 +47,7 @@ set(SHADER_FILES vulkan_present_scaleforce_fp16.frag vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp + vulkan_turbo_mode.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag index c0c832296..c0c832296 100644 --- a/src/video_core/host_shaders/vulkan_blit_color_float.frag +++ b/src/video_core/host_shaders/blit_color_float.frag diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert index 2c976b19f..d16d98995 100644 --- a/src/video_core/host_shaders/full_screen_triangle.vert +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -4,13 +4,20 @@ #version 450 #ifdef VULKAN +#define VERTEX_ID gl_VertexIndex #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { #define END_PUSH_CONSTANTS }; #define UNIFORM(n) +#define FLIPY 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define VERTEX_ID gl_VertexID #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS +#define FLIPY -1 #define UNIFORM(n) layout (location = n) uniform +out gl_PerVertex { + vec4 gl_Position; +}; #endif BEGIN_PUSH_CONSTANTS @@ -21,8 +28,8 @@ END_PUSH_CONSTANTS layout(location = 0) out vec2 texcoord; void main() { - float x = float((gl_VertexIndex & 1) << 2); - float y = float((gl_VertexIndex & 2) << 1); - gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + float x = float((VERTEX_ID & 1) << 2); + float y = float((VERTEX_ID & 2) << 1); + gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0); texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); } diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp new file mode 100644 index 000000000..d651001d9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0) buffer ThreadData { + uint data[]; +}; + +uint xorshift32(uint x) { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; +} + +uint getGlobalIndex() { + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y; +} + +void main() { + uint myIndex = xorshift32(getGlobalIndex()); + uint otherIndex = xorshift32(myIndex); + + uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1; + atomicAdd(data[myIndex % data.length()], otherValue); +} diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h new file mode 100644 index 000000000..2c2aaf7bb --- /dev/null +++ b/src/video_core/invalidation_accumulator.h @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <utility> +#include <vector> + +#include "common/common_types.h" + +namespace VideoCommon { + +class InvalidationAccumulator { +public: + InvalidationAccumulator() = default; + ~InvalidationAccumulator() = default; + + void Add(GPUVAddr address, size_t size) { + const auto reset_values = [&]() { + if (has_collected) { + buffer.emplace_back(start_address, accumulated_size); + } + start_address = address; + accumulated_size = size; + last_collection = start_address + size; + }; + if (address >= start_address && address + size <= last_collection) [[likely]] { + return; + } + size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; + address = address & atomicity_mask; + if (!has_collected) [[unlikely]] { + reset_values(); + has_collected = true; + return; + } + if (address != last_collection) [[unlikely]] { + reset_values(); + return; + } + accumulated_size += size; + last_collection += size; + } + + void Clear() { + buffer.clear(); + start_address = 0; + last_collection = 0; + has_collected = false; + } + + bool AnyAccumulated() const { + return has_collected; + } + + template <typename Func> + void Callback(Func&& func) { + if (!has_collected) { + return; + } + buffer.emplace_back(start_address, accumulated_size); + for (auto& [address, size] : buffer) { + func(address, size); + } + } + +private: + static constexpr size_t atomicity_bits = 5; + static constexpr size_t atomicity_size = 1ULL << atomicity_bits; + static constexpr size_t atomicity_size_mask = atomicity_size - 1; + static constexpr size_t atomicity_mask = ~atomicity_size_mask; + GPUVAddr start_address{}; + GPUVAddr last_collection{}; + size_t accumulated_size{}; + bool has_collected{}; + std::vector<std::pair<VAddr, size_t>> buffer; +}; + +} // namespace VideoCommon diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index a5476e795..6272a4652 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -50,38 +50,6 @@ protected: Maxwell3D& maxwell3d; }; -class HLE_DrawArrays final : public HLEMacroImpl { -public: - explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); - maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2], - maxwell3d.regs.global_base_instance_index, 1); - } -}; - -class HLE_DrawIndexed final : public HLEMacroImpl { -public: - explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - maxwell3d.regs.index_buffer.start_addr_high = parameters[1]; - maxwell3d.regs.index_buffer.start_addr_low = parameters[2]; - maxwell3d.regs.index_buffer.format = - static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - - auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); - maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4], - maxwell3d.regs.global_base_vertex_index, - maxwell3d.regs.global_base_instance_index, 1); - } -}; - /* * @note: these macros have two versions, a normal and extended version, with the extended version * also assigning the base vertex/instance. @@ -497,11 +465,6 @@ public: } // Anonymous namespace HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { - builders.emplace(0xDD6A7FA92A7D2674ULL, - std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { - return std::make_unique<HLE_DrawArrays>(maxwell3d__); - })); builders.emplace(0x0D61FC9FAAC9FCADULL, std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { @@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); })); - builders.emplace(0x2DB33AADB741839CULL, - std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { - return std::make_unique<HLE_DrawIndexed>(maxwell3d__); - })); builders.emplace(0x771BB18C62444DA0ULL, std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3a5cdeb39..3bcae3503 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,11 +6,13 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" #include "core/memory.h" +#include "video_core/invalidation_accumulator.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" @@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( - 1, std::memory_order_acq_rel)} { + 1, std::memory_order_acq_rel)}, + accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); + if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { + fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); + } else { + fastmem_arena = nullptr; + } } MemoryManager::~MemoryManager() = default; @@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (size == 0) { return; } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); - - for (const auto& [map_addr, map_size] : submapped_ranges) { - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); - ASSERT(cpu_addr); + GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); - rasterizer->UnmapMemory(*cpu_addr, map_size); + for (const auto& [map_addr, map_size] : page_stash) { + rasterizer->UnmapMemory(map_addr, map_size); } + page_stash.clear(); BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); @@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } } -template <bool is_safe> +template <bool is_safe, bool use_fastmem> void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, @@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if (!IsBigPageContinous(page_index)) [[unlikely]] { - memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if (!IsBigPageContinous(page_index)) [[unlikely]] { + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; @@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const { - ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); + if (fastmem_arena) [[likely]] { + ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); + return; + } + ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + if (fastmem_arena) [[likely]] { + ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + return; + } + ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template <bool is_safe> @@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } +void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, + std::size_t size) { + WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); + accumulator->Add(gpu_dest_addr, size); +} + void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, @@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( GPUVAddr gpu_addr, std::size_t size) const { std::vector<std::pair<GPUVAddr, std::size_t>> result{}; - std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; + GetSubmappedRangeImpl<true>(gpu_addr, size, result); + return result; +} + +template <bool is_gpu_address> +void MemoryManager::GetSubmappedRangeImpl( + GPUVAddr gpu_addr, std::size_t size, + std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + result) const { + std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> + last_segment{}; std::optional<VAddr> old_page_addr{}; const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( }; MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); split(0, 0, 0); - return result; +} + +void MemoryManager::FlushCaching() { + if (!accumulator->AnyAccumulated()) { + return; + } + accumulator->Callback([this](GPUVAddr addr, size_t size) { + GetSubmappedRangeImpl<false>(addr, size, page_stash); + }); + rasterizer->InnerInvalidation(page_stash); + page_stash.clear(); + accumulator->Clear(); } } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 828e13439..2936364f0 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -19,6 +19,10 @@ namespace VideoCore { class RasterizerInterface; } +namespace VideoCommon { +class InvalidationAccumulator; +} + namespace Core { class DeviceMemory; namespace Memory { @@ -80,6 +84,7 @@ public: */ void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** * Checks if a gpu region can be simply read with a pointer. @@ -129,12 +134,14 @@ public: size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size = std::numeric_limits<size_t>::max()) const; + void FlushCaching(); + private: template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; - template <bool is_safe> + template <bool is_safe, bool use_fastmem> void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const; @@ -154,6 +161,12 @@ private: inline bool IsBigPageContinous(size_t big_page_index) const; inline void SetBigPageContinous(size_t big_page_index, bool value); + template <bool is_gpu_address> + void GetSubmappedRangeImpl( + GPUVAddr gpu_addr, std::size_t size, + std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + result) const; + Core::System& system; Core::Memory::Memory& memory; Core::DeviceMemory& device_memory; @@ -201,10 +214,13 @@ private: Common::VirtualBuffer<u32> big_page_table_cpu; std::vector<u64> big_page_continous; + std::vector<std::pair<VAddr, std::size_t>> page_stash{}; + u8* fastmem_arena{}; constexpr static size_t continous_bits = 64; const size_t unique_identifier; + std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; static std::atomic<size_t> unique_identifier_generator; }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f44c7df50..33e2610bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include <functional> #include <optional> #include <span> +#include <utility> #include "common/common_types.h" #include "common/polyfill_thread.h" #include "video_core/cache_types.h" @@ -46,6 +47,9 @@ public: /// Dispatches an indirect draw invocation virtual void DrawIndirect() {} + /// Dispatches an draw texture invocation + virtual void DrawTexture() = 0; + /// Clear the current framebuffer virtual void Clear(u32 layer_count) = 0; @@ -95,6 +99,12 @@ public: virtual void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; + virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { + for (const auto& [cpu_addr, size] : sequences) { + InvalidateRegion(cpu_addr, size); + } + } + /// Notify rasterizer that any caches of the specified region are desync with guest virtual void OnCPUWrite(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 2c11345d7..2b5c7defa 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp RasterizerNull::~RasterizerNull() = default; void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} +void RasterizerNull::DrawTexture() {} void RasterizerNull::Clear(u32 layer_count) {} void RasterizerNull::DispatchCompute() {} void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 2112aa70e..51f896e43 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -31,6 +31,7 @@ public: ~RasterizerNull() override; void Draw(bool is_indexed, u32 instance_count) override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp new file mode 100644 index 000000000..9a560a73b --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <algorithm> + +#include "video_core/host_shaders/blit_color_float_frag.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_) + : program_manager(program_manager_), + full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)), + blit_color_to_color_frag( + CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle); + glProgramUniform2f(full_screen_vert.handle, 0, + static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width), + static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height)); + glProgramUniform2f(full_screen_vert.handle, 1, + static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)); + glViewport(std::min(dst_region.start.x, dst_region.end.x), + std::min(dst_region.start.y, dst_region.end.y), + std::abs(dst_region.end.x - dst_region.start.x), + std::abs(dst_region.end.y - dst_region.start.y)); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); + glBindSampler(0, src_sampler); + glBindTextureUnit(0, src_image_view); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h new file mode 100644 index 000000000..5a2b12d16 --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <glad/glad.h> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; +using VideoCommon::Region2D; + +class ProgramManager; +class Framebuffer; +class ImageView; + +class BlitImageHelper { +public: + explicit BlitImageHelper(ProgramManager& program_manager); + ~BlitImageHelper(); + + void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size); + +private: + ProgramManager& program_manager; + + OGLProgram full_screen_vert; + OGLProgram blit_color_to_color_frag; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a8c3f8b67..bb1962073 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -160,6 +160,10 @@ public: return device.CanReportMemoryUsage(); } + u32 GetStorageBufferAlignment() const { + return static_cast<u32>(device.GetShaderStorageBufferAlignment()); + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index cee5c3247..22ed16ebf 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; + has_draw_texture = GLAD_GL_NV_draw_texture; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2a72d84be..3ff8cad83 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -4,6 +4,8 @@ #pragma once #include <cstddef> +#include <string> + #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/stage.h" @@ -146,6 +148,10 @@ public: return has_sparse_texture_2; } + bool HasDrawTexture() const { + return has_draw_texture; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -216,6 +222,7 @@ private: bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; + bool has_draw_texture{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7d48af8e1..7bced675c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + blit_image(program_manager_) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -139,6 +140,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load void RasterizerOpenGL::Clear(u32 layer_count) { MICROPROFILE_SCOPE(OpenGL_Clears); + gpu_memory->FlushCaching(); const auto& regs = maxwell3d->regs; bool use_color{}; bool use_depth{}; @@ -207,6 +209,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; @@ -318,7 +321,49 @@ void RasterizerOpenGL::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerOpenGL::DrawTexture() { + MICROPROFILE_SCOPE(OpenGL_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + SyncState(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + + if (device.HasDrawTexture()) { + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0, + draw_texture_state.dst_y0, draw_texture_state.dst_x1, + draw_texture_state.dst_y1, 0, + draw_texture_state.src_x0 / static_cast<float>(texture.size.width), + draw_texture_state.src_y0 / static_cast<float>(texture.size.height), + draw_texture_state.src_x1 / static_cast<float>(texture.size.width), + draw_texture_state.src_y1 / static_cast<float>(texture.size.height)); + } else { + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), + sampler->Handle(), dst_region, src_region, texture.size); + } + + ++num_queued_commands; +} + void RasterizerOpenGL::DispatchCompute() { + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; if (!pipeline) { return; @@ -526,6 +571,7 @@ void RasterizerOpenGL::TickFrame() { } bool RasterizerOpenGL::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be4f76c18..0c45832ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/blit_image.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -70,6 +71,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -224,6 +226,8 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; + BlitImageHelper blit_image; + boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 03b6314ff..7dd854e0f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), + .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index d9c29d8b7..98841ae65 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,2 +1,123 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; + +ProgramManager::ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } +} + +void ProgramManager::BindComputeProgram(GLuint program) { + glUseProgram(program); + is_compute_bound = true; +} + +void ProgramManager::BindComputeAssemblyProgram(GLuint program) { + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + UnbindPipeline(); +} + +void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { + static constexpr std::array<GLenum, 5> stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); +} + +void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); +} + +void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, + u32 stage_mask) { + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + UnbindPipeline(); +} + +void ProgramManager::RestoreGuestCompute() {} + +void ProgramManager::BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); +} + +void ProgramManager::UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); +} + +void ProgramManager::UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a84f5aeb3..07ffab77f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,8 +6,6 @@ #include <array> #include <span> -#include <glad/glad.h> - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -16,121 +14,28 @@ namespace OpenGL { class ProgramManager { static constexpr size_t NUM_STAGES = 5; - static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ - GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, - GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, - }; - public: - explicit ProgramManager(const Device& device) { - glCreateProgramPipelines(1, &pipeline.handle); - if (device.UseAssemblyShaders()) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } - } - - void BindComputeProgram(GLuint program) { - glUseProgram(program); - is_compute_bound = true; - } - - void BindComputeAssemblyProgram(GLuint program) { - if (current_assembly_compute_program != program) { - current_assembly_compute_program = program; - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } - UnbindPipeline(); - } - - void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { - static constexpr std::array<GLenum, 5> stage_enums{ - GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, - GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, - }; - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); - } - } - BindPipeline(); - } - - void BindPresentPrograms(GLuint vertex, GLuint fragment) { - if (current_programs[0] != vertex) { - current_programs[0] = vertex; - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); - } - if (current_programs[4] != fragment) { - current_programs[4] = fragment; - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); - } - glUseProgramStages( - pipeline.handle, - GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); - current_programs[1] = 0; - current_programs[2] = 0; - current_programs[3] = 0; - - if (current_stage_mask != 0) { - current_stage_mask = 0; - for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { - glDisable(program_type); - } - } - BindPipeline(); - } + explicit ProgramManager(const Device& device); + + void BindComputeProgram(GLuint program); + + void BindComputeAssemblyProgram(GLuint program); + + void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs); + + void BindPresentPrograms(GLuint vertex, GLuint fragment); void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, - u32 stage_mask) { - const u32 changed_mask = current_stage_mask ^ stage_mask; - current_stage_mask = stage_mask; - - if (changed_mask != 0) { - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (((changed_mask >> stage) & 1) != 0) { - if (((stage_mask >> stage) & 1) != 0) { - glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } else { - glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } - } - } - } - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); - } - } - UnbindPipeline(); - } - - void RestoreGuestCompute() {} + u32 stage_mask); + + void RestoreGuestCompute(); private: - void BindPipeline() { - if (!is_pipeline_bound) { - is_pipeline_bound = true; - glBindProgramPipeline(pipeline.handle); - } - UnbindCompute(); - } - - void UnbindPipeline() { - if (is_pipeline_bound) { - is_pipeline_bound = false; - glBindProgramPipeline(0); - } - UnbindCompute(); - } - - void UnbindCompute() { - if (is_compute_bound) { - is_compute_bound = false; - glUseProgram(0); - } - } + void BindPipeline(); + + void UnbindPipeline(); + + void UnbindCompute(); OGLPipeline pipeline; bool is_pipeline_bound{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bc75680f0..de95f2634 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -442,7 +442,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, screen_info.display_texture); - const auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + if (anti_aliasing > Settings::AntiAliasing::LastAA) { + LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); + anti_aliasing = Settings::AntiAliasing::None; + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } + if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); auto viewport_width = screen_info.texture.width; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139e0..dd00d3edf 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,13 +4,13 @@ #include <algorithm> #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri } void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region) { + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; - const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); - const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); + const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width); + const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast<float>(src_region.start.x), - static_cast<float>(src_region.start.y)}, + .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)}, }; cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -347,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) { .height = is_rescaled ? resolution.ScaleUp(height) : height, }; } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = flags, + .dstAccessMask = flags, + .oldLayout = source_layout, + .newLayout = target_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { + const VkRenderPass render_pass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = render_pass, + .framebuffer = framebuffer_handle, + .renderArea{ + .offset{}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -365,7 +413,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(two_textures_set_layout.address()))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), - blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), @@ -404,6 +452,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView scheduler.InvalidateState(); } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, + src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); + BeginRenderPass(cmdbuf, dst_framebuffer); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679fb4..be8a9a2f6 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@ namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; using VideoCommon::Region2D; class Device; @@ -36,6 +38,10 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size); + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 3d328a250..f8398b511 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -148,7 +148,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe }); } if (!extended_dynamic_state_2_extra) { - dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2); + dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2); } if (!extended_dynamic_state_3_blend) { if (maxwell3d.dirty.flags[Dirty::Blending]) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f502a7d09..2a8d9e377 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -60,24 +60,13 @@ std::string GetDriverVersion(const Device& device) { return GetReadableVersion(version); } -std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { - std::sort(std::begin(available_extensions), std::end(available_extensions)); - - static constexpr std::size_t AverageExtensionSize = 64; - std::string separated_extensions; - separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); - - const auto end = std::end(available_extensions); - for (auto extension = std::begin(available_extensions); extension != end; ++extension) { - if (const bool is_last = extension + 1 == end; is_last) { - separated_extensions += *extension; - } else { - separated_extensions += fmt::format("{},", *extension); - } - } - return separated_extensions; +std::string BuildCommaSeparatedExtensions( + const std::set<std::string, std::less<>>& available_extensions) { + return fmt::format("{}", fmt::join(available_extensions, ",")); } +} // Anonymous namespace + Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, VkSurfaceKHR surface) { const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); @@ -89,7 +78,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl const vk::PhysicalDevice physical_device(devices[device_index], dld); return Device(*instance, physical_device, surface, dld); } -} // Anonymous namespace RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, @@ -98,7 +86,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, - true, Settings::values.renderer_debug.GetValue())), + Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), @@ -109,6 +97,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, state_tracker, scheduler) { + if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { + turbo_mode.emplace(instance, dld); + scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); + } Report(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); @@ -116,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, } RendererVulkan::~RendererVulkan() { + scheduler.RegisterOnSubmit([] {}); void(device.GetLogical().WaitIdle()); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index e7bfecb20..009e75e0d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -13,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -31,6 +32,9 @@ class GPU; namespace Vulkan { +Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, + VkSurfaceKHR surface); + class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, @@ -74,6 +78,7 @@ private: Swapchain swapchain; BlitScreen blit_screen; RasterizerVulkan rasterizer; + std::optional<TurboMode> turbo_mode; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 487d8b416..1cfb4c2ff 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -330,12 +330,19 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } +u32 BufferCacheRuntime::GetStorageBufferAlignment() const { + return static_cast<u32>(device.GetStorageBufferAlignment()); +} + void BufferCacheRuntime::Finish() { scheduler.Finish(); } void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { + return; + } static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -394,6 +401,9 @@ void BufferCacheRuntime::PostCopyBarrier() { } void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { + if (dest_buffer == VK_NULL_HANDLE) { + return; + } static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -473,6 +483,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); }); } else { + if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { + ReserveNullBuffer(); + buffer = *null_buffer; + offset = 0; + } scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffer(index, buffer, offset); }); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 183b33632..06539c733 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -73,6 +73,8 @@ public: bool CanReportMemoryUsage() const; + u32 GetStorageBufferAlignment() const; + [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 04a3a861e..2a0f0dbf0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor; using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; -ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_, + DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) - : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, pipeline_cache(pipeline_cache_), + update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, + pipeline = device.GetLogical().CreateComputePipeline( + { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = + device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + *pipeline_cache); + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index d70837fc5..78d77027f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -28,7 +28,8 @@ class Scheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache, + DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, @@ -46,6 +47,7 @@ public: private: const Device& device; + vk::PipelineCache& pipeline_cache; UpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d11383bf1..f91bb5a1d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m GraphicsPipeline::GraphicsPipeline( Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify, + const Device& device_, DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, const std::array<const Shader::Info*, NUM_STAGES>& infos) - : key{key_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + pipeline_cache(pipeline_cache_), scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -644,12 +645,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = dynamic.primitive_restart_enable != 0 && - ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && - device.IsTopologyListPrimitiveRestartSupported()) || - SupportsPrimitiveRestart(input_assembly_topology) || - (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && - device.IsPatchListPrimitiveRestartSupported())), + .primitiveRestartEnable = + dynamic.primitive_restart_enable != 0 && + ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && + device.IsTopologyListPrimitiveRestartSupported()) || + SupportsPrimitiveRestart(input_assembly_topology) || + (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && + device.IsPatchListPrimitiveRestartSupported())) + ? VK_TRUE + : VK_FALSE, }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, @@ -699,7 +703,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .cullMode = static_cast<VkCullModeFlags>( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE), + .depthBiasEnable = (dynamic.depth_bias_enable != 0 ? VK_TRUE : VK_FALSE), .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -894,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stageCount = static_cast<u32>(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = render_pass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); + pipeline = device.GetLogical().CreateGraphicsPipeline( + { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }, + *pipeline_cache); } void GraphicsPipeline::Validate() { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1ed2967be..67c657d0e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -70,16 +70,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, - TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, - const Device& device, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - PipelineStatistics* pipeline_statistics, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array<vk::ShaderModule, NUM_STAGES> stages, - const std::array<const Shader::Info*, NUM_STAGES>& infos); + explicit GraphicsPipeline( + Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify, + const Device& device, DescriptorPool& descriptor_pool, + UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, + PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -133,6 +131,7 @@ private: const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; + vk::PipelineCache& pipeline_cache; Scheduler& scheduler; UpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3046b72ab..7e69b11d8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; constexpr u32 CACHE_VERSION = 10; +constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; template <typename Container> auto MakeSpan(Container& container) { @@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -329,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .need_declared_frag_colors = false, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, + .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, @@ -341,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), + .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), }; if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { @@ -362,7 +367,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device }; } -PipelineCache::~PipelineCache() = default; +PipelineCache::~PipelineCache() { + if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) { + SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, + CACHE_VERSION); + } +} GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -418,6 +428,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = base_dir / "vulkan.bin"; + if (use_vulkan_pipeline_cache) { + vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin"; + vulkan_pipeline_cache = + LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION); + } + struct { std::mutex mutex; size_t total{}; @@ -496,6 +512,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(stop_loading); + if (use_vulkan_pipeline_cache) { + SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, + CACHE_VERSION); + } + if (state.statistics) { state.statistics->Report(); } @@ -616,10 +637,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, - &shader_notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, statistics, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique<GraphicsPipeline>( + scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -689,13 +710,108 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, - thread_worker, statistics, &shader_notify, - program.info, std::move(spv_module)); + return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool, + update_descriptor_queue, thread_worker, statistics, + &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } +void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename, + const vk::PipelineCache& pipeline_cache, + u32 cache_version) try { + std::ofstream file(filename, std::ios::binary); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size()) + .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); + + size_t cache_size = 0; + std::vector<char> cache_data; + if (pipeline_cache) { + pipeline_cache.Read(&cache_size, nullptr); + cache_data.resize(cache_size); + pipeline_cache.Read(&cache_size, cache_data.data()); + } + file.write(cache_data.data(), cache_size); + + LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}", + Common::FS::PathToUTF8String(filename)); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename, + u32 expected_cache_version) { + const auto create_pipeline_cache = [this](size_t data_size, const void* data) { + VkPipelineCacheCreateInfo pipeline_cache_ci = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .initialDataSize = data_size, + .pInitialData = data}; + return device.GetLogical().CreatePipelineCache(pipeline_cache_ci); + }; + try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return create_pipeline_cache(0, nullptr); + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array<char, 8> magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); + if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file"); + } + if (cache_version != expected_cache_version) { + LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return create_pipeline_cache(0, nullptr); + } + + static constexpr size_t header_size = magic_number.size() + sizeof(cache_version); + const size_t cache_size = static_cast<size_t>(end) - header_size; + std::vector<char> cache_data(cache_size); + file.read(cache_data.data(), cache_size); + + LOG_INFO(Render_Vulkan, + "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename)); + + return create_pipeline_cache(cache_size, cache_data.data()); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } + + return create_pipeline_cache(0, nullptr); + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b4f593ef5..5171912d7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -135,6 +135,12 @@ private: PipelineStatistics* statistics, bool build_in_parallel); + void SerializeVulkanPipelineCache(const std::filesystem::path& filename, + const vk::PipelineCache& pipeline_cache, u32 cache_version); + + vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename, + u32 expected_cache_version); + const Device& device; Scheduler& scheduler; DescriptorPool& descriptor_pool; @@ -144,6 +150,7 @@ private: TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; bool use_asynchronous_shaders{}; + bool use_vulkan_pipeline_cache{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; @@ -158,6 +165,9 @@ private: std::filesystem::path pipeline_cache_filename; + std::filesystem::path vulkan_pipeline_cache_filename; + vk::PipelineCache vulkan_pipeline_cache; + Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; DynamicFeatures dynamic_features; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 242bf9602..86ef0daeb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -265,10 +266,40 @@ void RasterizerVulkan::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerVulkan::DrawTexture() { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + UpdateDynamicStates(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), + texture.ImageHandle(), sampler->Handle(), dst_region, src_region, + texture.size); +} + void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -393,6 +424,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { void RasterizerVulkan::DispatchCompute() { FlushWork(); + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { @@ -481,6 +513,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache } } +void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { + { + std::scoped_lock lock{texture_cache.mutex}; + for (const auto& [addr, size] : sequences) { + texture_cache.WriteMemory(addr, size); + } + } + { + std::scoped_lock lock{buffer_cache.mutex}; + for (const auto& [addr, size] : sequences) { + buffer_cache.WriteMemory(addr, size); + } + } + { + for (const auto& [addr, size] : sequences) { + query_cache.InvalidateRegion(addr, size); + pipeline_cache.InvalidateRegion(addr, size); + } + } +} + void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; @@ -605,6 +658,7 @@ void RasterizerVulkan::TickFrame() { } bool RasterizerVulkan::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c661e5b19..a0508b57c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -79,6 +80,7 @@ public: VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c2e53a5d5..e03685af1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; + + if (on_submit) { + on_submit(); + } + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3858c506c..bd4cb0f7e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include <condition_variable> #include <cstddef> +#include <functional> #include <memory> #include <thread> #include <utility> @@ -66,6 +67,11 @@ public: query_cache = &query_cache_; } + // Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function<void()>&& func) { + on_submit = std::move(func); + } + /// Send work to a separate thread. template <typename T> void Record(T&& command) { @@ -216,6 +222,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr<CommandChunk> chunk; + std::function<void()> on_submit; State state; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp new file mode 100644 index 000000000..c42594149 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -0,0 +1,222 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/literals.h" +#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +using namespace Common::Literals; + +TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + } + m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); +} + +TurboMode::~TurboMode() = default; + +void TurboMode::QueueSubmitted() { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + m_submission_cv.notify_one(); +} + +void TurboMode::Run(std::stop_token stop_token) { + auto& dld = m_device.GetLogical(); + + // Allocate buffer. 2MiB should be sufficient. + auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 2_MiB, + .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + + // Commit some device local memory for the buffer. + auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + + // Create the descriptor pool to contain our descriptor. + constexpr VkDescriptorPoolSize pool_size{ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + }; + + auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &pool_size, + }); + + // Create the descriptor set layout from the pool. + constexpr VkDescriptorSetLayoutBinding layout_binding{ + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; + + auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &layout_binding, + }); + + // Actually create the descriptor set. + auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + }); + + // Create the shader. + auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV); + + // Create the pipeline layout. + auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + + // Actually create the pipeline. + const VkPipelineShaderStageCreateInfo shader_stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + + // Create a fence to wait on. + auto fence = dld.CreateFence(VkFenceCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }); + + // Create a command pool to allocate a command buffer from. + auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = m_device.GetGraphicsFamily(), + }); + + // Create a single command buffer. + auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); + auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; + + while (!stop_token.stop_requested()) { + // Reset the fence. + fence.Reset(); + + // Update descriptor set. + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + + const VkWriteDescriptorSet buffer_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set[0], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + dld.UpdateDescriptorSets(std::array{buffer_write}, {}); + + // Set up the command buffer. + cmdbuf.Begin(VkCommandBufferBeginInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); + + // Clear the buffer. + cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0); + + // Bind descriptor set. + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, {}); + + // Bind the pipeline. + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + + // Dispatch. + cmdbuf.Dispatch(64, 64, 1); + + // Finish. + cmdbuf.End(); + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, + }; + + m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence); + + // Wait for completion. + fence.Wait(); + + // Wait for the next graphics queue submission if necessary. + std::unique_lock lk{m_submission_lock}; + Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { + return (std::chrono::steady_clock::now() - m_submission_time) <= + std::chrono::milliseconds{100}; + }); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h new file mode 100644 index 000000000..99b5ac50b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <chrono> +#include <mutex> + +#include "common/polyfill_thread.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class TurboMode { +public: + explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); + ~TurboMode(); + + void QueueSubmitted(); + +private: + void Run(std::stop_token stop_token); + + Device m_device; + MemoryAllocator m_allocator; + std::mutex m_submission_lock; + std::condition_variable_any m_submission_cv; + std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; + + std::jthread m_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87152c8e9..1b01990a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { } template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { + const auto image_view_id = VisitImageView(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, index); + return slot_image_views[image_view_id]; +} + +template <class P> void TextureCache<P>::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4eea1f609..485eaabaa 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -129,6 +129,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Get the imageview from the graphics descriptor table in the specified index + [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; + /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 85f1d13e0..5fa0d9620 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -57,7 +57,7 @@ NsightAftermathTracker::NsightAftermathTracker() { if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, - ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { + ShaderDebugInfoCallback, CrashDumpDescriptionCallback, nullptr, this))) { LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); return; } @@ -83,7 +83,7 @@ void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const { std::scoped_lock lock{mutex}; - GFSDK_Aftermath_ShaderHash hash; + GFSDK_Aftermath_ShaderBinaryHash hash; if (!GFSDK_Aftermath_SUCCEED( GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); @@ -121,8 +121,8 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 json_size = 0; if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, - GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, - this, &json_size))) { + GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, this, + &json_size))) { LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); return; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5c5bfa18d..23d922e5d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -74,30 +74,6 @@ enum class NvidiaArchitecture { VoltaOrOlder, }; -constexpr std::array REQUIRED_EXTENSIONS{ - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, -#ifdef _WIN32 - VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, -#endif -#ifdef __unix__ - VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, -#endif -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{ - VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, - VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, -}; - -constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{ - VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, -}; - template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -286,24 +262,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica return format_properties; } -std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) { - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector<std::string> supported_extensions; - supported_extensions.reserve(extensions.size()); - for (const auto& extension : extensions) { - supported_extensions.emplace_back(extension.extensionName); - } - return supported_extensions; -} - -bool IsExtensionSupported(std::span<const std::string> supported_extensions, - std::string_view extension) { - return std::ranges::find(supported_extensions, extension) != supported_extensions.end(); -} - NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, - std::span<const std::string> exts) { - if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { + const std::set<std::string, std::less<>>& exts) { + if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; shading_rate_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; @@ -316,423 +277,55 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, return NvidiaArchitecture::AmpereOrNewer; } } - if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { + if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { return NvidiaArchitecture::Turing; } return NvidiaArchitecture::VoltaOrOlder; } + +std::vector<const char*> ExtensionListForVulkan( + const std::set<std::string, std::less<>>& extensions) { + std::vector<const char*> output; + for (const auto& extension : extensions) { + output.push_back(extension.c_str()); + } + return output; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions( - physical)}, + : instance{instance_}, dld{dld_}, physical{physical_}, format_properties(GetFormatProperties(physical)) { - CheckSuitability(surface != nullptr); - SetupFamilies(surface); - SetupFeatures(); - SetupProperties(); - - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(surface != nullptr); - - VkPhysicalDeviceFeatures2 features2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = nullptr, - .features{ - .robustBufferAccess = true, - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = true, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = true, - .drawIndirectFirstInstance = true, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = is_depth_bounds_supported, - .wideLines = true, - .largePoints = true, - .alphaToOne = false, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = is_optimal_astc_supported, - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = is_shader_storage_image_multisample, - .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = true, - .shaderCullDistance = true, - .shaderFloat64 = is_shader_float64_supported, - .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = is_shader_int16_supported, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }, - }; - const void* first_next = &features2; - void** next = &features2.pNext; - - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, - .pNext = nullptr, - .timelineSemaphore = true, - }; - SetNext(next, timeline_semaphore); - - VkPhysicalDevice16BitStorageFeatures bit16_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer16BitAccess = true, - .uniformAndStorageBuffer16BitAccess = true, - .storagePushConstant16 = false, - .storageInputOutput16 = false, - }; - SetNext(next, bit16_storage); - - VkPhysicalDevice8BitStorageFeatures bit8_storage{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, - .pNext = nullptr, - .storageBuffer8BitAccess = true, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = false, - }; - SetNext(next, bit8_storage); - - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, - .pNext = nullptr, - .robustBufferAccess2 = true, - .robustImageAccess2 = true, - .nullDescriptor = true, - }; - SetNext(next, robustness2); - - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, - .pNext = nullptr, - .hostQueryReset = true, - }; - SetNext(next, host_query_reset); - - VkPhysicalDeviceVariablePointerFeatures variable_pointers{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, - .pNext = nullptr, - .variablePointersStorageBuffer = VK_TRUE, - .variablePointers = VK_TRUE, - }; - SetNext(next, variable_pointers); - - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES, - .pNext = nullptr, - .shaderDemoteToHelperInvocation = true, - }; - SetNext(next, demote); - - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES, - .pNext = nullptr, - .shaderDrawParameters = true, - }; - SetNext(next, draw_parameters); - - VkPhysicalDeviceShaderFloat16Int8Features float16_int8; - if (is_int8_supported || is_float16_supported) { - float16_int8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, - .pNext = nullptr, - .shaderFloat16 = is_float16_supported, - .shaderInt8 = is_int8_supported, - }; - SetNext(next, float16_int8); - } - if (!is_float16_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); - } - if (!is_int8_supported) { - LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); - } - - if (!nv_viewport_swizzle) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); - } - - if (!nv_viewport_array2) { - LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); - } - - if (!nv_geometry_shader_passthrough) { - LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); - } + // Get suitability and device properties. + const bool is_suitable = GetSuitability(surface != nullptr); - VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout; - if (khr_uniform_buffer_standard_layout) { - std430_layout = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, - .pNext = nullptr, - .uniformBufferStandardLayout = true, - }; - SetNext(next, std430_layout); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); - } - - VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; - if (ext_index_type_uint8) { - index_type_uint8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, - .pNext = nullptr, - .indexTypeUint8 = true, - }; - SetNext(next, index_type_uint8); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); - } - - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart; - if (is_topology_list_restart_supported || is_patch_list_restart_supported) { - primitive_topology_list_restart = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT, - .pNext = nullptr, - .primitiveTopologyListRestart = is_topology_list_restart_supported, - .primitiveTopologyPatchListRestart = is_patch_list_restart_supported, - }; - SetNext(next, primitive_topology_list_restart); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart"); - } - - VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; - if (ext_transform_feedback) { - transform_feedback = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, - .pNext = nullptr, - .transformFeedback = true, - .geometryStreams = true, - }; - SetNext(next, transform_feedback); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); - } - - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; - if (ext_custom_border_color) { - custom_border = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, - .pNext = nullptr, - .customBorderColors = VK_TRUE, - .customBorderColorWithoutFormat = VK_TRUE, - }; - SetNext(next, custom_border); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); - } - - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - if (ext_extended_dynamic_state) { - dynamic_state = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState = VK_TRUE, - }; - SetNext(next, dynamic_state); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); - } - - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state_2; - if (ext_extended_dynamic_state_2) { - dynamic_state_2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState2 = VK_TRUE, - .extendedDynamicState2LogicOp = ext_extended_dynamic_state_2_extra ? VK_TRUE : VK_FALSE, - .extendedDynamicState2PatchControlPoints = VK_FALSE, - }; - SetNext(next, dynamic_state_2); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 2"); - } - - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3; - if (ext_extended_dynamic_state_3) { - dynamic_state_3 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, - .pNext = nullptr, - .extendedDynamicState3TessellationDomainOrigin = VK_FALSE, - .extendedDynamicState3DepthClampEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3PolygonMode = VK_FALSE, - .extendedDynamicState3RasterizationSamples = VK_FALSE, - .extendedDynamicState3SampleMask = VK_FALSE, - .extendedDynamicState3AlphaToCoverageEnable = VK_FALSE, - .extendedDynamicState3AlphaToOneEnable = VK_FALSE, - .extendedDynamicState3LogicOpEnable = - ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEnable = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorBlendEquation = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3ColorWriteMask = - ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE, - .extendedDynamicState3RasterizationStream = VK_FALSE, - .extendedDynamicState3ConservativeRasterizationMode = VK_FALSE, - .extendedDynamicState3ExtraPrimitiveOverestimationSize = VK_FALSE, - .extendedDynamicState3DepthClipEnable = VK_FALSE, - .extendedDynamicState3SampleLocationsEnable = VK_FALSE, - .extendedDynamicState3ColorBlendAdvanced = VK_FALSE, - .extendedDynamicState3ProvokingVertexMode = VK_FALSE, - .extendedDynamicState3LineRasterizationMode = VK_FALSE, - .extendedDynamicState3LineStippleEnable = VK_FALSE, - .extendedDynamicState3DepthClipNegativeOneToOne = VK_FALSE, - .extendedDynamicState3ViewportWScalingEnable = VK_FALSE, - .extendedDynamicState3ViewportSwizzle = VK_FALSE, - .extendedDynamicState3CoverageToColorEnable = VK_FALSE, - .extendedDynamicState3CoverageToColorLocation = VK_FALSE, - .extendedDynamicState3CoverageModulationMode = VK_FALSE, - .extendedDynamicState3CoverageModulationTableEnable = VK_FALSE, - .extendedDynamicState3CoverageModulationTable = VK_FALSE, - .extendedDynamicState3CoverageReductionMode = VK_FALSE, - .extendedDynamicState3RepresentativeFragmentTestEnable = VK_FALSE, - .extendedDynamicState3ShadingRateImageEnable = VK_FALSE, - }; - SetNext(next, dynamic_state_3); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 3"); - } - - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - if (ext_line_rasterization) { - line_raster = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, - .pNext = nullptr, - .rectangularLines = VK_TRUE, - .bresenhamLines = VK_FALSE, - .smoothLines = VK_TRUE, - .stippledRectangularLines = VK_FALSE, - .stippledBresenhamLines = VK_FALSE, - .stippledSmoothLines = VK_FALSE, - }; - SetNext(next, line_raster); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); - } - - if (!ext_conservative_rasterization) { - LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); - } - - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - if (ext_provoking_vertex) { - provoking_vertex = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .pNext = nullptr, - .provokingVertexLast = VK_TRUE, - .transformFeedbackPreservesProvokingVertex = VK_TRUE, - }; - SetNext(next, provoking_vertex); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); - } - - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; - if (ext_vertex_input_dynamic_state) { - vertex_input_dynamic = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, - .pNext = nullptr, - .vertexInputDynamicState = VK_TRUE, - }; - SetNext(next, vertex_input_dynamic); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); - } - - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - if (ext_shader_atomic_int64) { - atomic_int64 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES, - .pNext = nullptr, - .shaderBufferInt64Atomics = VK_TRUE, - .shaderSharedInt64Atomics = VK_TRUE, - }; - SetNext(next, atomic_int64); - } + const VkDriverId driver_id = properties.driver.driverID; + const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; + const bool is_amd_driver = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; + const bool is_amd = is_amd_driver || is_radv; + const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; - if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) { - workgroup_layout = { - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, - .pNext = nullptr, - .workgroupMemoryExplicitLayout = VK_TRUE, - .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, - .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, - .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, - }; - SetNext(next, workgroup_layout); - } else if (khr_workgroup_memory_explicit_layout) { - // TODO(lat9nq): Find a proper fix for this - LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a " - "yuzu bug when host driver does not support 16-bit integers"); - khr_workgroup_memory_explicit_layout = false; + if (is_mvk && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway"); + } else if (!is_suitable) { + throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - if (khr_pipeline_executable_properties) { - LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times"); - executable_properties = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR, - .pNext = nullptr, - .pipelineExecutableInfo = VK_TRUE, - }; - SetNext(next, executable_properties); - } - - if (!ext_depth_range_unrestricted) { - LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); - } + SetupFamilies(surface); + const auto queue_cis = GetDeviceQueueCreateInfos(); - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - if (ext_depth_clip_control) { - depth_clip_control_features = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT, - .pNext = nullptr, - .depthClipControl = VK_TRUE, - }; - SetNext(next, depth_clip_control_features); - } + // GetSuitability has already configured the linked list of features for us. + // Reuse it here. + const void* first_next = &features2; - VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{}; + if (Settings::values.enable_nsight_aftermath && extensions.device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); diagnostics_nv = { @@ -744,33 +337,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; first_next = &diagnostics_nv; } - logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); - is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; - is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; - is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || - properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); + is_optimal_astc_supported = ComputeIsOptimalAstcSupported(); + is_warp_potentially_bigger = !extensions.subgroup_size_control || + properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize; + + is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; + is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || + properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; + + supports_d24_depth = + IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); CollectPhysicalMemoryInfo(); - CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { - const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; - + if (is_nvidia) { + const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); switch (arch) { case NvidiaArchitecture::AmpereOrNewer: - LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; break; case NvidiaArchitecture::Turing: break; case NvidiaArchitecture::VoltaOrOlder: if (nv_major_version < 527) { - LOG_WARNING(Render_Vulkan, - "Blacklisting Volta and older from VK_KHR_push_descriptor"); - khr_push_descriptor = false; + LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); + extensions.push_descriptor = false; + loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } break; } @@ -779,75 +378,75 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR cant_blit_msaa = true; } } - const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; - if (ext_extended_dynamic_state && is_radv) { + if (extensions.extended_dynamic_state && is_radv) { // Mask driver version variant - const u32 version = (properties.driverVersion << 3) >> 3; + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { LOG_WARNING(Render_Vulkan, "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); - ext_extended_dynamic_state = false; + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); } } - if (ext_vertex_input_dynamic_state && is_radv) { + if (extensions.extended_dynamic_state2 && is_radv) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { + LOG_WARNING( + Render_Vulkan, + "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2"); + features.extended_dynamic_state2.extendedDynamicState2 = false; + features.extended_dynamic_state2.extendedDynamicState2LogicOp = false; + features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false; + extensions.extended_dynamic_state2 = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); + } + } + if (extensions.vertex_input_dynamic_state && is_radv) { // TODO(ameerj): Blacklist only offending driver versions // TODO(ameerj): Confirm if RDNA1 is affected const bool is_rdna2 = - IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); + supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); if (is_rdna2) { LOG_WARNING(Render_Vulkan, "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); - ext_vertex_input_dynamic_state = false; + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - if (ext_extended_dynamic_state_2 && is_radv) { - const u32 version = (properties.driverVersion << 3) >> 3; - if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { - LOG_WARNING( - Render_Vulkan, - "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2"); - ext_extended_dynamic_state_2 = false; - ext_extended_dynamic_state_2_extra = false; - } - } - sets_per_pool = 64; - const bool is_amd = - driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; - if (is_amd) { + sets_per_pool = 64; + if (is_amd_driver) { // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. sets_per_pool = 96; // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { - LOG_WARNING( - Render_Vulkan, - "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); + if (!features.shader_float16_int8.shaderFloat16) { + LOG_WARNING(Render_Vulkan, + "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); has_broken_cube_compatibility = true; } } - const bool is_amd_or_radv = is_amd || is_radv; - if (ext_sampler_filter_minmax && is_amd_or_radv) { + if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. - if (!is_float16_supported) { + if (!features.shader_float16_int8.shaderFloat16) { LOG_WARNING(Render_Vulkan, - "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); - ext_sampler_filter_minmax = false; + "AMD GCN4 and earlier have broken VK_EXT_sampler_filter_minmax"); + extensions.sampler_filter_minmax = false; + loaded_extensions.erase(VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME); } } - const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; - const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - if (ext_vertex_input_dynamic_state && is_intel_windows) { - const u32 version = (properties.driverVersion << 3) >> 3; + if (extensions.vertex_input_dynamic_state && is_intel_windows) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { - LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); - ext_vertex_input_dynamic_state = false; + LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state"); + extensions.vertex_input_dynamic_state = false; + loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); } } - if (is_float16_supported && is_intel_windows) { + if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Intel has broken float16 math"); + features.shader_float16_int8.shaderFloat16 = false; } if (is_intel_windows) { LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); @@ -857,10 +456,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); must_emulate_bgr565 = true; } + if (is_mvk) { + LOG_WARNING(Render_Vulkan, + "MVK driver breaks when using more than 16 vertex attributes/bindings"); + properties.properties.limits.maxVertexInputAttributes = + std::min(properties.properties.limits.maxVertexInputAttributes, 16U); + properties.properties.limits.maxVertexInputBindings = + std::min(properties.properties.limits.maxVertexInputBindings, 16U); + } - supports_d24_depth = - IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), + first_next, dld); graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); @@ -915,7 +521,7 @@ void Device::SaveShader(std::span<const u32> spirv) const { } } -bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { +bool Device::ComputeIsOptimalAstcSupported() const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, @@ -933,7 +539,7 @@ bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) co VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, }; - if (!features.textureCompressionASTC_LDR) { + if (!features.features.textureCompressionASTC_LDR) { return false; } const auto format_feature_usage{ @@ -971,7 +577,7 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want } std::string Device::GetDriverName() const { - switch (driver_id) { + switch (properties.driver.driverID) { case VK_DRIVER_ID_AMD_PROPRIETARY: return "AMD"; case VK_DRIVER_ID_AMD_OPEN_SOURCE: @@ -987,507 +593,336 @@ std::string Device::GetDriverName() const { case VK_DRIVER_ID_MESA_LLVMPIPE: return "LAVAPIPE"; default: - return vendor_name; + return properties.driver.driverName; } } -static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) { - std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; +bool Device::ShouldBoostClocks() const { + const auto driver_id = properties.driver.driverID; + const auto vendor_id = properties.properties.vendorID; + const auto device_id = properties.properties.deviceID; - if (available_version < VK_API_VERSION_1_2) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_2.end()); - } + const bool validated_driver = + driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || + driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || + driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; - if (available_version < VK_API_VERSION_1_3) { - extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(), - REQUIRED_EXTENSIONS_BEFORE_1_3.end()); - } + const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; - return extensions; + return validated_driver && !is_steam_deck; } -void Device::CheckSuitability(bool requires_swapchain) const { - std::vector<const char*> required_extensions = - ExtensionsRequiredForInstanceVersion(instance_version); - std::vector<const char*> available_extensions; +bool Device::GetSuitability(bool requires_swapchain) { + // Assume we will be suitable. + bool suitable = true; - if (requires_swapchain) { - required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - } + // Configure properties. + properties.properties = physical.GetProperties(); + + // Set instance version. + instance_version = properties.properties.apiVersion; + // Minimum of API version 1.1 is required. (This is well-supported.) + ASSERT(instance_version >= VK_API_VERSION_1_1); + + // Get available extensions. auto extension_properties = physical.EnumerateDeviceExtensionProperties(); + // Get the set of supported extensions. + supported_extensions.clear(); for (const VkExtensionProperties& property : extension_properties) { - available_extensions.push_back(property.extensionName); + supported_extensions.insert(property.extensionName); } - bool has_all_required_extensions = true; - for (const char* requirement_name : required_extensions) { - const bool found = - std::ranges::any_of(available_extensions, [&](const char* extension_name) { - return std::strcmp(requirement_name, extension_name) == 0; - }); + // Generate list of extensions to load. + loaded_extensions.clear(); - if (!found) { - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); - has_all_required_extensions = false; - } +#define EXTENSION(prefix, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ } - - if (!has_all_required_extensions) { - throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); +#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \ + if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \ + loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \ + extensions.var_name = true; \ } - struct LimitTuple { - u32 minimum; - u32 value; - const char* name; - }; - const VkPhysicalDeviceLimits& limits{properties.limits}; - const std::array limits_report{ - LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, - LimitTuple{16, limits.maxViewports, "maxViewports"}, - LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"}, - LimitTuple{8, limits.maxClipDistances, "maxClipDistances"}, - }; - for (const auto& tuple : limits_report) { - if (tuple.value < tuple.minimum) { - LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name, - tuple.minimum, tuple.value); - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); - } + if (instance_version < VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE_EXTENSION); + } + if (instance_version < VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE_EXTENSION); } - VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{}; - demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES; - demote.pNext = nullptr; - VkPhysicalDeviceVariablePointerFeatures variable_pointers{}; - variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES; - variable_pointers.pNext = &demote; + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); + FOR_EACH_VK_EXTENSION(EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); +#endif - VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; - robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &variable_pointers; +#undef FEATURE_EXTENSION +#undef EXTENSION - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{}; - timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; - timeline_semaphore.pNext = &robustness2; + // Some extensions are mandatory. Check those. +#define CHECK_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \ + suitable = false; \ + } - VkPhysicalDevice16BitStorageFeatures bit16_storage{}; - bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; - bit16_storage.pNext = &timeline_semaphore; +#define LOG_EXTENSION(extension_name) \ + if (!loaded_extensions.contains(extension_name)) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \ + } - VkPhysicalDevice8BitStorageFeatures bit8_storage{}; - bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; - bit8_storage.pNext = &bit16_storage; + FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); + FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); +#ifdef _WIN32 + FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION); +#else + FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION); +#endif - VkPhysicalDeviceHostQueryResetFeatures host_query_reset{}; - host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; - host_query_reset.pNext = &bit8_storage; + if (requires_swapchain) { + CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } - VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{}; - draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES; - draw_parameters.pNext = &host_query_reset; +#undef LOG_EXTENSION +#undef CHECK_EXTENSION - VkPhysicalDeviceFeatures2 features2{}; + // Generate the linked list of features to test. features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - features2.pNext = &draw_parameters; - physical.GetFeatures2(features2); + // Set next pointer. + void** next = &features2.pNext; - const VkPhysicalDeviceFeatures& features{features2.features}; - std::array feature_report{ - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), - std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.imageCubeArray, "imageCubeArray"), - std::make_pair(features.independentBlend, "independentBlend"), - std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"), - std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"), - std::make_pair(features.depthClamp, "depthClamp"), - std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), - std::make_pair(features.largePoints, "largePoints"), - std::make_pair(features.multiViewport, "multiViewport"), - std::make_pair(features.depthBiasClamp, "depthBiasClamp"), - std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), - std::make_pair(features.wideLines, "wideLines"), - std::make_pair(features.geometryShader, "geometryShader"), - std::make_pair(features.tessellationShader, "tessellationShader"), - std::make_pair(features.sampleRateShading, "sampleRateShading"), - std::make_pair(features.dualSrcBlend, "dualSrcBlend"), - std::make_pair(features.logicOp, "logicOp"), - std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), - std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), - std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), - std::make_pair(features.shaderStorageImageWriteWithoutFormat, - "shaderStorageImageWriteWithoutFormat"), - std::make_pair(features.shaderClipDistance, "shaderClipDistance"), - std::make_pair(features.shaderCullDistance, "shaderCullDistance"), - std::make_pair(variable_pointers.variablePointers, "variablePointers"), - std::make_pair(variable_pointers.variablePointersStorageBuffer, - "variablePointersStorageBuffer"), - std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), - std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), - std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), - std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), - std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"), - std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"), - std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess, - "uniformAndStorageBuffer16BitAccess"), - std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"), - std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess, - "uniformAndStorageBuffer8BitAccess"), - std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"), - std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"), - }; + // Test all features we know about. If the feature is not available in core at our + // current API version, and was not enabled by an extension, skip testing the feature. + // We set the structure sType explicitly here as it is zeroed by the constructor. +#define FEATURE(prefix, struct_name, macro_name, var_name) \ + features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \ + SetNext(next, features.var_name); - bool has_all_required_features = true; - for (const auto& [is_supported, name] : feature_report) { - if (!is_supported) { - LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); - has_all_required_features = false; - } +#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \ + if (extensions.var_name) { \ + features.var_name.sType = \ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \ + SetNext(next, features.var_name); \ } - if (!has_all_required_features) { - throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_EXT(EXT_FEATURE); + if (instance_version >= VK_API_VERSION_1_2) { + FOR_EACH_VK_FEATURE_1_2(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_2(EXT_FEATURE); } -} - -std::vector<const char*> Device::LoadExtensions(bool requires_surface) { - std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version); - if (requires_surface) { - extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (instance_version >= VK_API_VERSION_1_3) { + FOR_EACH_VK_FEATURE_1_3(FEATURE); + } else { + FOR_EACH_VK_FEATURE_1_3(EXT_FEATURE); } - bool has_khr_shader_float16_int8{}; - bool has_khr_workgroup_memory_explicit_layout{}; - bool has_khr_pipeline_executable_properties{}; - bool has_khr_image_format_list{}; - bool has_khr_swapchain_mutable_format{}; - bool has_ext_subgroup_size_control{}; - bool has_ext_transform_feedback{}; - bool has_ext_custom_border_color{}; - bool has_ext_extended_dynamic_state{}; - bool has_ext_extended_dynamic_state_2{}; - bool has_ext_extended_dynamic_state_3{}; - bool has_ext_shader_atomic_int64{}; - bool has_ext_provoking_vertex{}; - bool has_ext_vertex_input_dynamic_state{}; - bool has_ext_line_rasterization{}; - bool has_ext_primitive_topology_list_restart{}; - bool has_ext_depth_clip_control{}; - for (const std::string& extension : supported_extensions) { - const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, - bool push) { - if (extension != name) { - return; - } - if (push) { - extensions.push_back(name); - } - if (status) { - status->get() = true; - } - }; - test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); - test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); - test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, - true); - test(khr_uniform_buffer_standard_layout, - VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); - test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); - test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); - test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true); - test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); - test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); - test(has_ext_primitive_topology_list_restart, - VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true); - test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); - test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, - true); - test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); - test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); - test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, - true); - test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false); - test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); - test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); - test(has_ext_extended_dynamic_state_2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, - false); - test(has_ext_extended_dynamic_state_3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME, - false); - test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, true); - test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); - test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, - false); - test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); - test(has_khr_workgroup_memory_explicit_layout, - VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); - test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, - false); - test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); - test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true); - if (Settings::values.enable_nsight_aftermath) { - test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, - true); - } - if (Settings::values.renderer_shader_feedback) { - test(has_khr_pipeline_executable_properties, - VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false); - } - } - VkPhysicalDeviceFeatures2 features{}; - features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - - VkPhysicalDeviceProperties2 physical_properties{}; - physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - - if (has_khr_shader_float16_int8) { - VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features; - float16_int8_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; - float16_int8_features.pNext = nullptr; - features.pNext = &float16_int8_features; - - physical.GetFeatures2(features); - is_float16_supported = float16_int8_features.shaderFloat16; - is_int8_supported = float16_int8_features.shaderInt8; - extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); - } - if (has_ext_subgroup_size_control) { - VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features; - subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES; - subgroup_features.pNext = nullptr; - features.pNext = &subgroup_features; - physical.GetFeatures2(features); - - VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties; - subgroup_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; - subgroup_properties.pNext = nullptr; - physical_properties.pNext = &subgroup_properties; - physical.GetProperties2(physical_properties); +#undef EXT_FEATURE +#undef FEATURE - is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + // Perform the feature test. + physical.GetFeatures2(features2); + features.features = features2.features; - if (subgroup_features.subgroupSizeControl && - subgroup_properties.minSubgroupSize <= GuestWarpSize && - subgroup_properties.maxSubgroupSize >= GuestWarpSize) { - extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; - ext_subgroup_size_control = true; - } - } else { - is_warp_potentially_bigger = true; + // Some features are mandatory. Check those. +#define CHECK_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \ + suitable = false; \ } - if (has_ext_provoking_vertex) { - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; - provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; - provoking_vertex.pNext = nullptr; - features.pNext = &provoking_vertex; - physical.GetFeatures2(features); - - if (provoking_vertex.provokingVertexLast && - provoking_vertex.transformFeedbackPreservesProvokingVertex) { - extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - ext_provoking_vertex = true; - } - } - if (has_ext_vertex_input_dynamic_state) { - VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; - vertex_input.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; - vertex_input.pNext = nullptr; - features.pNext = &vertex_input; - physical.GetFeatures2(features); - - if (vertex_input.vertexInputDynamicState) { - extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - ext_vertex_input_dynamic_state = true; - } - } - if (has_ext_shader_atomic_int64) { - VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; - atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES; - atomic_int64.pNext = nullptr; - features.pNext = &atomic_int64; - physical.GetFeatures2(features); - - if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { - extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); - ext_shader_atomic_int64 = true; - } - } - if (has_ext_transform_feedback) { - VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; - tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - tfb_features.pNext = nullptr; - features.pNext = &tfb_features; - physical.GetFeatures2(features); - - VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; - tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; - tfb_properties.pNext = nullptr; - physical_properties.pNext = &tfb_properties; - physical.GetProperties2(physical_properties); - if (tfb_features.transformFeedback && tfb_features.geometryStreams && - tfb_properties.maxTransformFeedbackStreams >= 4 && - tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && - tfb_properties.transformFeedbackDraw) { - extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); - ext_transform_feedback = true; - } - } - if (has_ext_custom_border_color) { - VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; - border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - border_features.pNext = nullptr; - features.pNext = &border_features; - physical.GetFeatures2(features); - - if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) { - extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - ext_custom_border_color = true; - } - } - if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; - extended_dynamic_state.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - extended_dynamic_state.pNext = nullptr; - features.pNext = &extended_dynamic_state; - physical.GetFeatures2(features); - - if (extended_dynamic_state.extendedDynamicState) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - ext_extended_dynamic_state = true; - } - } - if (has_ext_extended_dynamic_state_2) { - VkPhysicalDeviceExtendedDynamicState2FeaturesEXT extended_dynamic_state_2; - extended_dynamic_state_2.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT; - extended_dynamic_state_2.pNext = nullptr; - features.pNext = &extended_dynamic_state_2; - physical.GetFeatures2(features); - - if (extended_dynamic_state_2.extendedDynamicState2) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - ext_extended_dynamic_state_2 = true; - ext_extended_dynamic_state_2_extra = - extended_dynamic_state_2.extendedDynamicState2LogicOp; - } +#define LOG_FEATURE(feature, name) \ + if (!features.feature.name) { \ + LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \ } - if (has_ext_extended_dynamic_state_3) { - VkPhysicalDeviceExtendedDynamicState3FeaturesEXT extended_dynamic_state_3; - extended_dynamic_state_3.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT; - extended_dynamic_state_3.pNext = nullptr; - features.pNext = &extended_dynamic_state_3; - physical.GetFeatures2(features); - - ext_extended_dynamic_state_3_blend = - extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && - extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && - extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; - - ext_extended_dynamic_state_3_enables = - extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && - extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; - - ext_extended_dynamic_state_3 = - ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; - if (ext_extended_dynamic_state_3) { - extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - } + + FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE); + FOR_EACH_VK_MANDATORY_FEATURE(CHECK_FEATURE); + +#undef LOG_FEATURE +#undef CHECK_FEATURE + + // Generate linked list of properties. + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + // Set next pointer. + next = &properties2.pNext; + + // Get driver info. + properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + SetNext(next, properties.driver); + + // Retrieve relevant extension properties. + if (extensions.shader_float_controls) { + properties.float_controls.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; + SetNext(next, properties.float_controls); } - if (has_ext_line_rasterization) { - VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; - line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; - line_raster.pNext = nullptr; - features.pNext = &line_raster; - physical.GetFeatures2(features); - if (line_raster.rectangularLines && line_raster.smoothLines) { - extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); - ext_line_rasterization = true; - } + if (extensions.push_descriptor) { + properties.push_descriptor.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + SetNext(next, properties.push_descriptor); } - if (has_ext_depth_clip_control) { - VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; - depth_clip_control_features.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; - depth_clip_control_features.pNext = nullptr; - features.pNext = &depth_clip_control_features; - physical.GetFeatures2(features); - - if (depth_clip_control_features.depthClipControl) { - extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); - ext_depth_clip_control = true; - } + if (extensions.subgroup_size_control) { + properties.subgroup_size_control.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; + SetNext(next, properties.subgroup_size_control); } - if (has_khr_workgroup_memory_explicit_layout) { - VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; - layout.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; - layout.pNext = nullptr; - features.pNext = &layout; - physical.GetFeatures2(features); - - if (layout.workgroupMemoryExplicitLayout && - layout.workgroupMemoryExplicitLayout8BitAccess && - layout.workgroupMemoryExplicitLayout16BitAccess && - layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { - extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); - khr_workgroup_memory_explicit_layout = true; - } + if (extensions.transform_feedback) { + properties.transform_feedback.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + SetNext(next, properties.transform_feedback); } - if (has_khr_pipeline_executable_properties) { - VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; - executable_properties.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; - executable_properties.pNext = nullptr; - features.pNext = &executable_properties; - physical.GetFeatures2(features); - - if (executable_properties.pipelineExecutableInfo) { - extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); - khr_pipeline_executable_properties = true; + + // Perform the property fetch. + physical.GetProperties2(properties2); + properties.properties = properties2.properties; + + // Unload extensions if feature support is insufficient. + RemoveUnsuitableExtensions(); + + // Check limits. + struct Limit { + u32 minimum; + u32 value; + const char* name; + }; + + const VkPhysicalDeviceLimits& limits{properties.properties.limits}; + const std::array limits_report{ + Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"}, + Limit{16, limits.maxViewports, "maxViewports"}, + Limit{8, limits.maxColorAttachments, "maxColorAttachments"}, + Limit{8, limits.maxClipDistances, "maxClipDistances"}, + }; + + for (const auto& [min, value, name] : limits_report) { + if (value < min) { + LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", name, min, value); + suitable = false; } } - if (has_ext_primitive_topology_list_restart) { - VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{}; - primitive_topology_list_restart.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT; - primitive_topology_list_restart.pNext = nullptr; - features.pNext = &primitive_topology_list_restart; - physical.GetFeatures2(features); - - is_topology_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyListRestart; - is_patch_list_restart_supported = - primitive_topology_list_restart.primitiveTopologyPatchListRestart; - } - if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { - extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); - extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); - khr_swapchain_mutable_format = true; - } - if (khr_push_descriptor) { - VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; - push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; - push_descriptor.pNext = nullptr; - physical_properties.pNext = &push_descriptor; - physical.GetProperties2(physical_properties); + // Return whether we were suitable. + return suitable; +} - max_push_descriptors = push_descriptor.maxPushDescriptors; +void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) { + if (loaded_extensions.contains(extension_name) && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name); + loaded_extensions.erase(extension_name); } - return extensions; +} + +void Device::RemoveUnsuitableExtensions() { + // VK_EXT_custom_border_color + extensions.custom_border_color = features.custom_border_color.customBorderColors && + features.custom_border_color.customBorderColorWithoutFormat; + RemoveExtensionIfUnsuitable(extensions.custom_border_color, + VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + + // VK_EXT_depth_clip_control + extensions.depth_clip_control = features.depth_clip_control.depthClipControl; + RemoveExtensionIfUnsuitable(extensions.depth_clip_control, + VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state + extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state, + VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state2 + extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state2, + VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); + + // VK_EXT_extended_dynamic_state3 + dynamic_state3_blending = + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable && + features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation && + features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask; + dynamic_state3_enables = + features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable && + features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable; + + extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables; + dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3; + dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3; + RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state3, + VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); + + // VK_EXT_provoking_vertex + extensions.provoking_vertex = + features.provoking_vertex.provokingVertexLast && + features.provoking_vertex.transformFeedbackPreservesProvokingVertex; + RemoveExtensionIfUnsuitable(extensions.provoking_vertex, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + + // VK_KHR_shader_atomic_int64 + extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && + features.shader_atomic_int64.shaderSharedInt64Atomics; + RemoveExtensionIfUnsuitable(extensions.shader_atomic_int64, + VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + + // VK_EXT_shader_demote_to_helper_invocation + extensions.shader_demote_to_helper_invocation = + features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation; + RemoveExtensionIfUnsuitable(extensions.shader_demote_to_helper_invocation, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + + // VK_EXT_subgroup_size_control + extensions.subgroup_size_control = + features.subgroup_size_control.subgroupSizeControl && + properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize && + properties.subgroup_size_control.maxSubgroupSize >= GuestWarpSize; + RemoveExtensionIfUnsuitable(extensions.subgroup_size_control, + VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + + // VK_EXT_transform_feedback + extensions.transform_feedback = + features.transform_feedback.transformFeedback && + features.transform_feedback.geometryStreams && + properties.transform_feedback.maxTransformFeedbackStreams >= 4 && + properties.transform_feedback.maxTransformFeedbackBuffers > 0 && + properties.transform_feedback.transformFeedbackQueries && + properties.transform_feedback.transformFeedbackDraw; + RemoveExtensionIfUnsuitable(extensions.transform_feedback, + VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + + // VK_EXT_vertex_input_dynamic_state + extensions.vertex_input_dynamic_state = + features.vertex_input_dynamic_state.vertexInputDynamicState; + RemoveExtensionIfUnsuitable(extensions.vertex_input_dynamic_state, + VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + + // VK_KHR_pipeline_executable_properties + if (Settings::values.renderer_shader_feedback.GetValue()) { + extensions.pipeline_executable_properties = + features.pipeline_executable_properties.pipelineExecutableInfo; + RemoveExtensionIfUnsuitable(extensions.pipeline_executable_properties, + VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); + } else { + extensions.pipeline_executable_properties = false; + loaded_extensions.erase(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); + } + + // VK_KHR_workgroup_memory_explicit_layout + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; + RemoveExtensionIfUnsuitable(extensions.workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); } void Device::SetupFamilies(VkSurfaceKHR surface) { @@ -1517,55 +952,12 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } - graphics_family = *graphics; - present_family = *present; -} - -void Device::SetupFeatures() { - const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; - is_depth_bounds_supported = features.depthBounds; - is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; - is_shader_float64_supported = features.shaderFloat64; - is_shader_int64_supported = features.shaderInt64; - is_shader_int16_supported = features.shaderInt16; - is_shader_storage_image_multisample = features.shaderStorageImageMultisample; - is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(features); - - const VkPhysicalDeviceLimits& limits{properties.limits}; - max_vertex_input_attributes = limits.maxVertexInputAttributes; - max_vertex_input_bindings = limits.maxVertexInputBindings; -} - -void Device::SetupProperties() { - float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; - - VkPhysicalDeviceProperties2KHR properties2{}; - properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - properties2.pNext = &float_controls; - - physical.GetProperties2(properties2); -} - -void Device::CollectTelemetryParameters() { - VkPhysicalDeviceDriverProperties driver{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, - .pNext = nullptr, - .driverID = {}, - .driverName = {}, - .driverInfo = {}, - .conformanceVersion = {}, - }; - - VkPhysicalDeviceProperties2 device_properties{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = &driver, - .properties = {}, - }; - physical.GetProperties2(device_properties); - - driver_id = driver.driverID; - vendor_name = driver.driverName; + if (graphics) { + graphics_family = *graphics; + } + if (present) { + present_family = *present; + } } u64 Device::GetDeviceMemoryUsage() const { @@ -1583,7 +975,8 @@ u64 Device::GetDeviceMemoryUsage() const { void Device::CollectPhysicalMemoryInfo() { VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; - const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); + const auto mem_info = + physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr); const auto& mem_properties = mem_info.memoryProperties; const size_t num_properties = mem_properties.memoryHeapCount; device_access_memory = 0; @@ -1599,7 +992,7 @@ void Device::CollectPhysicalMemoryInfo() { if (is_heap_local) { local_memory += mem_properties.memoryHeaps[element].size; } - if (ext_memory_budget) { + if (extensions.memory_budget) { device_initial_usage += budget.heapUsage[element]; device_access_memory += budget.heapBudget[element]; continue; @@ -1615,7 +1008,7 @@ void Device::CollectPhysicalMemoryInfo() { } void Device::CollectToolingInfo() { - if (!ext_tooling_info) { + if (!extensions.tooling_info) { return; } auto tools{physical.GetPhysicalDeviceToolProperties()}; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 920a8f4e3..0662a2d9f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -3,6 +3,7 @@ #pragma once +#include <set> #include <span> #include <string> #include <unordered_map> @@ -11,6 +12,156 @@ #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +// Define all features which may be used by the implementation here. +// Vulkan version in the macro describes the minimum version required for feature availability. +// If the Vulkan version is lower than the required version, the named extension is required. +#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \ + FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \ + FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \ + FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \ + FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \ + FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \ + FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \ + uniform_buffer_standard_layout) \ + FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer) + +#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \ + FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \ + FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \ + FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore) + +#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \ + FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \ + shader_demote_to_helper_invocation) + +// Define all features which may be used by the implementation and require an extension here. +#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \ + FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \ + FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \ + FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ + FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ + FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ + FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ + FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ + primitive_topology_list_restart) \ + FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ + FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \ + FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \ + FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \ + FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \ + pipeline_executable_properties) \ + FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \ + workgroup_memory_explicit_layout) + +// Define miscellaneous extensions which may be used by the implementation here. +#define FOR_EACH_VK_EXTENSION(EXTENSION) \ + EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ + EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ + EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ + EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \ + EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \ + EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \ + EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \ + EXTENSION(EXT, TOOLING_INFO, tooling_info) \ + EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ + EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \ + EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ + EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \ + EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ + EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ + EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ + EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ + EXTENSION(KHR, SWAPCHAIN, swapchain) \ + EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ + EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ + EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ + EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ + EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) + +#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \ + EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32) + +// Define extensions which must be supported. +#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME) + +#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \ + EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME) + +// Define extensions where the absence of the extension may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ + EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME) + +// Define features which must be supported. +#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \ + FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \ + FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \ + FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \ + FEATURE_NAME(features, depthBiasClamp) \ + FEATURE_NAME(features, depthClamp) \ + FEATURE_NAME(features, drawIndirectFirstInstance) \ + FEATURE_NAME(features, dualSrcBlend) \ + FEATURE_NAME(features, fillModeNonSolid) \ + FEATURE_NAME(features, fragmentStoresAndAtomics) \ + FEATURE_NAME(features, geometryShader) \ + FEATURE_NAME(features, imageCubeArray) \ + FEATURE_NAME(features, independentBlend) \ + FEATURE_NAME(features, largePoints) \ + FEATURE_NAME(features, logicOp) \ + FEATURE_NAME(features, multiDrawIndirect) \ + FEATURE_NAME(features, multiViewport) \ + FEATURE_NAME(features, occlusionQueryPrecise) \ + FEATURE_NAME(features, robustBufferAccess) \ + FEATURE_NAME(features, samplerAnisotropy) \ + FEATURE_NAME(features, sampleRateShading) \ + FEATURE_NAME(features, shaderClipDistance) \ + FEATURE_NAME(features, shaderCullDistance) \ + FEATURE_NAME(features, shaderImageGatherExtended) \ + FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \ + FEATURE_NAME(features, tessellationShader) \ + FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \ + FEATURE_NAME(features, wideLines) \ + FEATURE_NAME(host_query_reset, hostQueryReset) \ + FEATURE_NAME(robustness2, nullDescriptor) \ + FEATURE_NAME(robustness2, robustBufferAccess2) \ + FEATURE_NAME(robustness2, robustImageAccess2) \ + FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ + FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ + FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ + FEATURE_NAME(variable_pointer, variablePointers) \ + FEATURE_NAME(variable_pointer, variablePointersStorageBuffer) + +// Define features where the absence of the feature may result in a degraded experience. +#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \ + FEATURE_NAME(custom_border_color, customBorderColors) \ + FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \ + FEATURE_NAME(index_type_uint8, indexTypeUint8) \ + FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ + FEATURE_NAME(provoking_vertex, provokingVertexLast) \ + FEATURE_NAME(shader_float16_int8, shaderFloat16) \ + FEATURE_NAME(shader_float16_int8, shaderInt8) \ + FEATURE_NAME(transform_feedback, transformFeedback) \ + FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \ + FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState) + namespace Vulkan { class NsightAftermathTracker; @@ -88,67 +239,69 @@ public: /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. u32 ApiVersion() const { - return properties.apiVersion; + return properties.properties.apiVersion; } /// Returns the current driver version provided in Vulkan-formatted version numbers. u32 GetDriverVersion() const { - return properties.driverVersion; + return properties.properties.driverVersion; } /// Returns the device name. std::string_view GetModelName() const { - return properties.deviceName; + return properties.properties.deviceName; } /// Returns the driver ID. VkDriverIdKHR GetDriverID() const { - return driver_id; + return properties.driver.driverID; } + bool ShouldBoostClocks() const; + /// Returns uniform buffer alignment requeriment. VkDeviceSize GetUniformBufferAlignment() const { - return properties.limits.minUniformBufferOffsetAlignment; + return properties.properties.limits.minUniformBufferOffsetAlignment; } /// Returns storage alignment requeriment. VkDeviceSize GetStorageBufferAlignment() const { - return properties.limits.minStorageBufferOffsetAlignment; + return properties.properties.limits.minStorageBufferOffsetAlignment; } /// Returns the maximum range for storage buffers. VkDeviceSize GetMaxStorageBufferRange() const { - return properties.limits.maxStorageBufferRange; + return properties.properties.limits.maxStorageBufferRange; } /// Returns the maximum size for push constants. VkDeviceSize GetMaxPushConstantsSize() const { - return properties.limits.maxPushConstantsSize; + return properties.properties.limits.maxPushConstantsSize; } /// Returns the maximum size for shared memory. u32 GetMaxComputeSharedMemorySize() const { - return properties.limits.maxComputeSharedMemorySize; + return properties.properties.limits.maxComputeSharedMemorySize; } /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { - return float_controls; + return properties.float_controls; } /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { - return is_optimal_astc_supported; + return features.features.textureCompressionASTC_LDR; } /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { - return is_float16_supported; + return features.shader_float16_int8.shaderFloat16; } /// Returns true if the device supports int8 natively. bool IsInt8Supported() const { - return is_int8_supported; + return features.shader_float16_int8.shaderInt8; } /// Returns true if the device warp size can potentially be bigger than guest's warp size. @@ -158,32 +311,32 @@ public: /// Returns true if the device can be forced to use the guest warp size. bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { - return guest_warp_stages & stage; + return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; } /// Returns the maximum number of push descriptors. u32 MaxPushDescriptors() const { - return max_push_descriptors; + return properties.push_descriptor.maxPushDescriptors; } /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { - return is_formatless_image_load_supported; + return features.features.shaderStorageImageReadWithoutFormat; } /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { - return is_shader_int64_supported; + return features.features.shaderInt64; } /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { - return is_shader_int16_supported; + return features.features.shaderInt16; } // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { - return is_depth_bounds_supported; + return features.features.depthBounds; } /// Returns true when blitting from and to depth stencil images is supported. @@ -193,151 +346,151 @@ public: /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { - return nv_viewport_swizzle; + return extensions.viewport_swizzle; } /// Returns true if the device supports VK_NV_viewport_array2. bool IsNvViewportArray2Supported() const { - return nv_viewport_array2; + return extensions.viewport_array2; } /// Returns true if the device supports VK_NV_geometry_shader_passthrough. bool IsNvGeometryShaderPassthroughSupported() const { - return nv_geometry_shader_passthrough; + return extensions.geometry_shader_passthrough; } /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { - return khr_uniform_buffer_standard_layout; + return extensions.uniform_buffer_standard_layout; } /// Returns true if the device supports VK_KHR_push_descriptor. bool IsKhrPushDescriptorSupported() const { - return khr_push_descriptor; + return extensions.push_descriptor; } /// Returns true if VK_KHR_pipeline_executable_properties is enabled. bool IsKhrPipelineExecutablePropertiesEnabled() const { - return khr_pipeline_executable_properties; + return extensions.pipeline_executable_properties; } /// Returns true if VK_KHR_swapchain_mutable_format is enabled. bool IsKhrSwapchainMutableFormatEnabled() const { - return khr_swapchain_mutable_format; + return extensions.swapchain_mutable_format; } /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { - return khr_workgroup_memory_explicit_layout; + return extensions.workgroup_memory_explicit_layout; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsTopologyListPrimitiveRestartSupported() const { - return is_topology_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyListRestart; } /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsPatchListPrimitiveRestartSupported() const { - return is_patch_list_restart_supported; + return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart; } /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { - return ext_index_type_uint8; + return extensions.index_type_uint8; } /// Returns true if the device supports VK_EXT_sampler_filter_minmax. bool IsExtSamplerFilterMinmaxSupported() const { - return ext_sampler_filter_minmax; + return extensions.sampler_filter_minmax; } /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { - return ext_depth_range_unrestricted; + return extensions.depth_range_unrestricted; } /// Returns true if the device supports VK_EXT_depth_clip_control. bool IsExtDepthClipControlSupported() const { - return ext_depth_clip_control; + return extensions.depth_clip_control; } /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. bool IsExtShaderViewportIndexLayerSupported() const { - return ext_shader_viewport_index_layer; + return extensions.shader_viewport_index_layer; } /// Returns true if the device supports VK_EXT_subgroup_size_control. bool IsExtSubgroupSizeControlSupported() const { - return ext_subgroup_size_control; + return extensions.subgroup_size_control; } /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { - return ext_transform_feedback; + return extensions.transform_feedback; } /// Returns true if the device supports VK_EXT_custom_border_color. bool IsExtCustomBorderColorSupported() const { - return ext_custom_border_color; + return extensions.custom_border_color; } /// Returns true if the device supports VK_EXT_extended_dynamic_state. bool IsExtExtendedDynamicStateSupported() const { - return ext_extended_dynamic_state; + return extensions.extended_dynamic_state; } /// Returns true if the device supports VK_EXT_extended_dynamic_state2. bool IsExtExtendedDynamicState2Supported() const { - return ext_extended_dynamic_state_2; + return extensions.extended_dynamic_state2; } bool IsExtExtendedDynamicState2ExtrasSupported() const { - return ext_extended_dynamic_state_2_extra; + return features.extended_dynamic_state2.extendedDynamicState2LogicOp; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3Supported() const { - return ext_extended_dynamic_state_3; + return extensions.extended_dynamic_state3; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3BlendingSupported() const { - return ext_extended_dynamic_state_3_blend; + return dynamic_state3_blending; } /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3EnablesSupported() const { - return ext_extended_dynamic_state_3_enables; + return dynamic_state3_enables; } /// Returns true if the device supports VK_EXT_line_rasterization. bool IsExtLineRasterizationSupported() const { - return ext_line_rasterization; + return extensions.line_rasterization; } /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { - return ext_vertex_input_dynamic_state; + return extensions.vertex_input_dynamic_state; } /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { - return ext_shader_stencil_export; + return extensions.shader_stencil_export; } /// Returns true if the device supports VK_EXT_conservative_rasterization. bool IsExtConservativeRasterizationSupported() const { - return ext_conservative_rasterization; + return extensions.conservative_rasterization; } /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { - return ext_provoking_vertex; + return extensions.provoking_vertex; } /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { - return ext_shader_atomic_int64; + return extensions.shader_atomic_int64; } /// Returns the minimum supported version of SPIR-V. @@ -345,7 +498,7 @@ public: if (instance_version >= VK_API_VERSION_1_3) { return 0x00010600U; } - if (khr_spirv_1_4) { + if (extensions.spirv_1_4) { return 0x00010400U; } return 0x00010000U; @@ -363,11 +516,11 @@ public: /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { - return vendor_name; + return properties.driver.driverName; } /// Returns the list of available extensions. - const std::vector<std::string>& GetAvailableExtensions() const { + const std::set<std::string, std::less<>>& GetAvailableExtensions() const { return supported_extensions; } @@ -376,7 +529,7 @@ public: } bool CanReportMemoryUsage() const { - return ext_memory_budget; + return extensions.memory_budget; } u64 GetDeviceMemoryUsage() const; @@ -397,33 +550,30 @@ public: return must_emulate_bgr565; } + bool HasNullDescriptor() const { + return features.robustness2.nullDescriptor; + } + u32 GetMaxVertexInputAttributes() const { - return max_vertex_input_attributes; + return properties.properties.limits.maxVertexInputAttributes; } u32 GetMaxVertexInputBindings() const { - return max_vertex_input_bindings; + return properties.properties.limits.maxVertexInputBindings; } private: - /// Checks if the physical device is suitable. - void CheckSuitability(bool requires_swapchain) const; + /// Checks if the physical device is suitable and configures the object state + /// with all necessary info about its properties. + bool GetSuitability(bool requires_swapchain); - /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(bool requires_surface); + // Remove extensions which have incomplete feature support. + void RemoveUnsuitableExtensions(); + void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name); /// Sets up queue families. void SetupFamilies(VkSurfaceKHR surface); - /// Sets up device features. - void SetupFeatures(); - - /// Sets up device properties. - void SetupProperties(); - - /// Collects telemetry information from the device. - void CollectTelemetryParameters(); - /// Collects information about attached tools. void CollectToolingInfo(); @@ -434,90 +584,93 @@ private: std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + bool ComputeIsOptimalAstcSupported() const; /// Returns true if the device natively supports blitting depth stencil images. bool TestDepthStencilBlits() const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - VkDriverIdKHR driver_id{}; ///< Driver ID. - VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. - u64 device_access_memory{}; ///< Total size of device local memory in bytes. - u32 max_push_descriptors{}; ///< Maximum number of push descriptors - u32 sets_per_pool{}; ///< Sets per Description Pool - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetic. - bool is_int8_supported{}; ///< Support for int8 arithmetic. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. - bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. - bool is_depth_bounds_supported{}; ///< Support for depth bounds. - bool is_shader_float64_supported{}; ///< Support for float64. - bool is_shader_int64_supported{}; ///< Support for int64. - bool is_shader_int16_supported{}; ///< Support for int16. - bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. - bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. - bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list - ///< topologies. - bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. - bool is_integrated{}; ///< Is GPU an iGPU. - bool is_virtual{}; ///< Is GPU a virtual GPU. - bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. - bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. - bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. - bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. - bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. - bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. - bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. - bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. - bool khr_pipeline_executable_properties{}; ///< Support for executable properties. - bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. - bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. - bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. - bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. - bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. - bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. - bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - bool supports_d24_depth{}; ///< Supports D24 depth buffers. - bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. - bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. - u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline - u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline +private: + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan instance version. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + + struct Extensions { +#define EXTENSION(prefix, macro_name, var_name) bool var_name{}; +#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE); + FOR_EACH_VK_FEATURE_1_2(FEATURE); + FOR_EACH_VK_FEATURE_1_3(FEATURE); + FOR_EACH_VK_FEATURE_EXT(FEATURE); + FOR_EACH_VK_EXTENSION(EXTENSION); + FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); + +#undef EXTENSION +#undef FEATURE + }; + + struct Features { +#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features var_name{}; +#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \ + VkPhysicalDevice##struct_name##Features##prefix var_name{}; + + FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE); + FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE); + FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT); + +#undef FEATURE_CORE +#undef FEATURE_EXT + + VkPhysicalDeviceFeatures features{}; + }; + + struct Properties { + VkPhysicalDeviceDriverProperties driver{}; + VkPhysicalDeviceFloatControlsProperties float_controls{}; + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; + VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; + + VkPhysicalDeviceProperties properties{}; + }; + + Extensions extensions{}; + Features features{}; + Properties properties{}; + + VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceProperties2 properties2{}; + + // Misc features + bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_integrated{}; ///< Is GPU an iGPU. + bool is_virtual{}; ///< Is GPU a virtual GPU. + bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. + bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 sets_per_pool{}; ///< Sets per Description Pool // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. - std::vector<size_t> valid_heap_memory; ///< Heaps used. + std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. + std::set<std::string, std::less<>> loaded_extensions; ///< Loaded Vulkan extensions. + std::vector<size_t> valid_heap_memory; ///< Heaps used. /// Format properties dictionary. std::unordered_map<VkFormat, VkFormatProperties> format_properties; diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 562039b56..b6d83e446 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -32,7 +32,7 @@ namespace Vulkan { namespace { [[nodiscard]] std::vector<const char*> RequiredExtensions( - Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { + Core::Frontend::WindowSystemType window_type, bool enable_validation) { std::vector<const char*> extensions; extensions.reserve(6); switch (window_type) { @@ -65,7 +65,7 @@ namespace { if (window_type != Core::Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - if (enable_debug_utils) { + if (enable_validation) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -95,9 +95,9 @@ namespace { return true; } -[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { +[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { std::vector<const char*> layers; - if (enable_layers) { + if (enable_validation) { layers.push_back("VK_LAYER_KHRONOS_validation"); } return layers; @@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, Core::Frontend::WindowSystemType window_type, - bool enable_debug_utils, bool enable_layers) { + bool enable_validation) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); @@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); + const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); if (!AreExtensionsSupported(dld, extensions)) { throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - std::vector<const char*> layers = Layers(enable_layers); + std::vector<const char*> layers = Layers(enable_validation); RemoveUnavailableLayers(dld, layers); const u32 available_version = vk::AvailableVersion(dld); diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h index 40419d802..b59b92f83 100644 --- a/src/video_core/vulkan_common/vulkan_instance.h +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -17,8 +17,7 @@ namespace Vulkan { * @param dld Dispatch table to load function pointers into * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) * @param window_type Window system type's enabled extension - * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not - * @param enable_layers Whether to enable Vulkan validation layers or not + * @param enable_validation Whether to enable Vulkan validation layers or not * * @return A new Vulkan instance * @throw vk::Exception on failure @@ -26,6 +25,6 @@ namespace Vulkan { [[nodiscard]] vk::Instance CreateInstance( const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, - bool enable_debug_utils = false, bool enable_layers = false); + bool enable_validation = false); } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 861767c13..486d4dfaf 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -96,8 +96,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdDrawIndexed); X(vkCmdDrawIndirect); X(vkCmdDrawIndexedIndirect); - X(vkCmdDrawIndirectCountKHR); - X(vkCmdDrawIndexedIndirectCountKHR); + X(vkCmdDrawIndirectCount); + X(vkCmdDrawIndexedIndirectCount); X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); @@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCreateGraphicsPipelines); X(vkCreateImage); X(vkCreateImageView); + X(vkCreatePipelineCache); X(vkCreatePipelineLayout); X(vkCreateQueryPool); X(vkCreateRenderPass); @@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkDestroyImage); X(vkDestroyImageView); X(vkDestroyPipeline); + X(vkDestroyPipelineCache); X(vkDestroyPipelineLayout); X(vkDestroyQueryPool); X(vkDestroyRenderPass); @@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkGetEventStatus); X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); + X(vkGetPipelineCacheData); X(vkGetMemoryFdKHR); #ifdef _WIN32 X(vkGetMemoryWin32HandleKHR); @@ -218,6 +221,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { if (!dld.vkResetQueryPool) { Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device); } + + // Support for draw indirect with count is optional in Vulkan 1.2 + if (!dld.vkCmdDrawIndirectCount) { + Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device); + Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device); + } #undef X } @@ -431,6 +440,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex dld.vkDestroyPipeline(device, handle, nullptr); } +void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipelineCache(device, handle, nullptr); +} + void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { dld.vkDestroyPipelineLayout(device, handle, nullptr); } @@ -651,6 +664,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); } +void PipelineCache::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name); +} + void Semaphore::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); } @@ -746,21 +763,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout( return DescriptorSetLayout(object, handle, *dld); } +PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const { + VkPipelineCache cache; + Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache)); + return PipelineCache(cache, handle, *dld); +} + PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { VkPipelineLayout object; Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); return PipelineLayout(object, handle, *dld); } -Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { +Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, + VkPipelineCache cache) const { VkPipeline object; - Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); + Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object)); return Pipeline(object, handle, *dld); } -Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { +Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci, + VkPipelineCache cache) const { VkPipeline object; - Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); + Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object)); return Pipeline(object, handle, *dld); } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index accfad8c1..e86f661cb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -215,8 +215,8 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; - PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{}; - PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{}; + PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; + PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; @@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; PFN_vkCreateImage vkCreateImage{}; PFN_vkCreateImageView vkCreateImageView{}; + PFN_vkCreatePipelineCache vkCreatePipelineCache{}; PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; PFN_vkCreateQueryPool vkCreateQueryPool{}; PFN_vkCreateRenderPass vkCreateRenderPass{}; @@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkDestroyImage vkDestroyImage{}; PFN_vkDestroyImageView vkDestroyImageView{}; PFN_vkDestroyPipeline vkDestroyPipeline{}; + PFN_vkDestroyPipelineCache vkDestroyPipelineCache{}; PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; PFN_vkDestroyQueryPool vkDestroyQueryPool{}; PFN_vkDestroyRenderPass vkDestroyRenderPass{}; @@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkGetEventStatus vkGetEventStatus{}; PFN_vkGetFenceStatus vkGetFenceStatus{}; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; + PFN_vkGetPipelineCacheData vkGetPipelineCacheData{}; PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; #ifdef _WIN32 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; @@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; @@ -773,6 +777,18 @@ public: void SetObjectNameEXT(const char* name) const; }; +class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> { + using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + VkResult Read(size_t* size, void* data) const noexcept { + return dld->vkGetPipelineCacheData(owner, handle, size, data); + } +}; + class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; @@ -844,11 +860,15 @@ public: DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; + PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const; + PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; - Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; + Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, + VkPipelineCache cache = nullptr) const; - Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; + Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci, + VkPipelineCache cache = nullptr) const; Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; @@ -1045,15 +1065,15 @@ public: void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset, - draw_count, stride); + dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, + draw_count, stride); } void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { - dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, - count_offset, draw_count, stride); + dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, + count_offset, draw_count, stride); } void ClearAttachments(Span<VkClearAttachment> attachments, diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 0eb377926..f05f3186c 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later <string></string> <key>CSResourcesFileMapped</key> <true/> + <key>LSApplicationCategoryType</key> + <string>public.app-category.games</string> <key>LSRequiresCarbon</key> <true/> <key>NSHumanReadableCopyright</key> diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 3d560f303..d65991734 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -96,9 +96,9 @@ void EmuThread::run() { m_is_running.store(false); m_is_running.notify_all(); - emit DebugModeEntered(); + EmulationPaused(lk); Common::CondvarWait(m_should_run_cv, lk, stop_token, [&] { return m_should_run; }); - emit DebugModeLeft(); + EmulationResumed(lk); } } @@ -111,6 +111,21 @@ void EmuThread::run() { #endif } +// Unlock while emitting signals so that the main thread can +// continue pumping events. + +void EmuThread::EmulationPaused(std::unique_lock<std::mutex>& lk) { + lk.unlock(); + emit DebugModeEntered(); + lk.lock(); +} + +void EmuThread::EmulationResumed(std::unique_lock<std::mutex>& lk) { + lk.unlock(); + emit DebugModeLeft(); + lk.lock(); +} + #ifdef HAS_OPENGL class OpenGLSharedContext : public Core::Frontend::GraphicsContext { public: diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index eca16b313..092c6206f 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -92,6 +92,10 @@ public: } private: + void EmulationPaused(std::unique_lock<std::mutex>& lk); + void EmulationResumed(std::unique_lock<std::mutex>& lk); + +private: Core::System& m_system; std::stop_source m_stop_source; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9425b5bd..fd3bb30e1 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -70,28 +70,28 @@ const std::array<int, 2> Config::default_ringcon_analogs{{ // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off const std::array<UISettings::Shortcut, 22> Config::default_hotkeys{{ - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Mute/Unmute")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+M"), QStringLiteral("Home+Dpad_Right"), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Down")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("-"), QStringLiteral("Home+Dpad_Down"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Up")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("="), QStringLiteral("Home+Dpad_Up"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Capture Screenshot")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+P"), QStringLiteral("Screenshot"), Qt::WidgetWithChildrenShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Adapting Filter")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F8"), QStringLiteral("Home+L"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Docked Mode")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F10"), QStringLiteral("Home+X"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change GPU Accuracy")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F9"), QStringLiteral("Home+R"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Continue/Pause Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F4"), QStringLiteral("Home+Plus"), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Fullscreen")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Esc"), QStringLiteral(""), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit yuzu")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+Q"), QStringLiteral("Home+Minus"), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Fullscreen")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F11"), QStringLiteral("Home+B"), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load File")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+O"), QStringLiteral(""), Qt::WidgetWithChildrenShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load/Remove Amiibo")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F2"), QStringLiteral("Home+A"), Qt::WidgetWithChildrenShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F6"), QStringLiteral(""), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F5"), QStringLiteral(""), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Record")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F7"), QStringLiteral(""), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Reset")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F6"), QStringLiteral(""), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Start/Stop")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F5"), QStringLiteral(""), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Filter Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F"), QStringLiteral(""), Qt::WindowShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Framerate Limit")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+U"), QStringLiteral("Home+Y"), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Mouse Panning")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F9"), QStringLiteral(""), Qt::ApplicationShortcut}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Status Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+S"), QStringLiteral(""), Qt::WindowShortcut}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Mute/Unmute")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+M"), QStringLiteral("Home+Dpad_Right"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Down")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("-"), QStringLiteral("Home+Dpad_Down"), Qt::ApplicationShortcut, true}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Up")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("="), QStringLiteral("Home+Dpad_Up"), Qt::ApplicationShortcut, true}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Capture Screenshot")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+P"), QStringLiteral("Screenshot"), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Adapting Filter")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F8"), QStringLiteral("Home+L"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Docked Mode")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F10"), QStringLiteral("Home+X"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change GPU Accuracy")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F9"), QStringLiteral("Home+R"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Continue/Pause Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F4"), QStringLiteral("Home+Plus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Fullscreen")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Esc"), QStringLiteral(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit yuzu")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+Q"), QStringLiteral("Home+Minus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Fullscreen")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F11"), QStringLiteral("Home+B"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load File")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+O"), QStringLiteral(""), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load/Remove Amiibo")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F2"), QStringLiteral("Home+A"), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F6"), QStringLiteral(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("F5"), QStringLiteral(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Record")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F7"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Reset")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F6"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Start/Stop")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F5"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Filter Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F"), QStringLiteral(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Framerate Limit")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+U"), QStringLiteral("Home+Y"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Mouse Panning")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+F9"), QStringLiteral(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Status Bar")), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")), {QStringLiteral("Ctrl+S"), QStringLiteral(""), Qt::WindowShortcut, false}}, }}; // clang-format on @@ -690,6 +690,7 @@ void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); ReadGlobalSetting(Settings::values.renderer_backend); + ReadGlobalSetting(Settings::values.renderer_force_max_clock); ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); @@ -709,6 +710,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_pessimistic_flushes); + ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -745,7 +747,7 @@ void Config::ReadShortcutValues() { for (const auto& [name, group, shortcut] : default_hotkeys) { qt_config->beginGroup(group); qt_config->beginGroup(name); - // No longer using ReadSetting for shortcut.second as it innacurately returns a value of 1 + // No longer using ReadSetting for shortcut.second as it inaccurately returns a value of 1 // for WidgetWithChildrenShortcut which is a value of 3. Needed to fix shortcuts the open // a file dialog in windowed mode UISettings::values.shortcuts.push_back( @@ -754,7 +756,7 @@ void Config::ReadShortcutValues() { {ReadSetting(QStringLiteral("KeySeq"), shortcut.keyseq).toString(), ReadSetting(QStringLiteral("Controller_KeySeq"), shortcut.controller_keyseq) .toString(), - shortcut.context}}); + shortcut.context, ReadSetting(QStringLiteral("Repeat"), shortcut.repeat).toBool()}}); qt_config->endGroup(); qt_config->endGroup(); } @@ -1305,6 +1307,9 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), static_cast<u32>(Settings::values.renderer_backend.GetDefault()), Settings::values.renderer_backend.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault())); WriteGlobalSetting(Settings::values.vulkan_device); WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), @@ -1348,6 +1353,7 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_pessimistic_flushes); + WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); @@ -1387,6 +1393,7 @@ void Config::SaveShortcutValues() { WriteSetting(QStringLiteral("Controller_KeySeq"), shortcut.controller_keyseq, default_hotkey.controller_keyseq); WriteSetting(QStringLiteral("Context"), shortcut.context, default_hotkey.context); + WriteSetting(QStringLiteral("Repeat"), shortcut.repeat, default_hotkey.repeat); qt_config->endGroup(); qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configuration_shared.cpp b/src/yuzu/configuration/configuration_shared.cpp index 97fb664bf..ac42cc7fc 100644 --- a/src/yuzu/configuration/configuration_shared.cpp +++ b/src/yuzu/configuration/configuration_shared.cpp @@ -92,3 +92,13 @@ void ConfigurationShared::InsertGlobalItem(QComboBox* combobox, int global_index combobox->insertItem(ConfigurationShared::USE_GLOBAL_INDEX, use_global_text); combobox->insertSeparator(ConfigurationShared::USE_GLOBAL_SEPARATOR_INDEX); } + +int ConfigurationShared::GetComboboxIndex(int global_setting_index, const QComboBox* combobox) { + if (Settings::IsConfiguringGlobal()) { + return combobox->currentIndex(); + } + if (combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + return global_setting_index; + } + return combobox->currentIndex() - ConfigurationShared::USE_GLOBAL_OFFSET; +} diff --git a/src/yuzu/configuration/configuration_shared.h b/src/yuzu/configuration/configuration_shared.h index e597dcdb5..04c88758c 100644 --- a/src/yuzu/configuration/configuration_shared.h +++ b/src/yuzu/configuration/configuration_shared.h @@ -69,4 +69,7 @@ void SetColoredComboBox(QComboBox* combobox, QWidget* target, int global); // Adds the "Use Global Configuration" selection and separator to the beginning of a QComboBox void InsertGlobalItem(QComboBox* combobox, int global_index); +// Returns the correct index of a QComboBox taking into account global configuration +int GetComboboxIndex(int global_setting_index, const QComboBox* combobox); + } // namespace ConfigurationShared diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index aa02cc63c..bb9910a53 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -366,6 +366,11 @@ </item> <item> <property name="text"> + <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string> + </property> + </item> + <item> + <property name="text"> <string>2X (1440p/2160p)</string> </property> </item> @@ -389,6 +394,16 @@ <string>6X (4320p/6480p)</string> </property> </item> + <item> + <property name="text"> + <string>7X (5040p/7560p)</string> + </property> + </item> + <item> + <property name="text"> + <string>8X (5760p/8640p)</string> + </property> + </item> </widget> </item> </layout> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 01f074699..cc0155a2c 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -22,13 +22,17 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !system.IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); + ui->renderer_force_max_clock->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); + ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); + ui->use_vulkan_driver_pipeline_cache->setChecked( + Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setCurrentIndex( @@ -41,6 +45,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { &Settings::values.max_anisotropy); ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, !Settings::values.gpu_accuracy.UsingGlobal()); + ConfigurationShared::SetHighlight(ui->renderer_force_max_clock, + !Settings::values.renderer_force_max_clock.UsingGlobal()); ConfigurationShared::SetHighlight(ui->af_label, !Settings::values.max_anisotropy.UsingGlobal()); } @@ -48,6 +54,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, + ui->renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); @@ -58,6 +67,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ui->use_fast_gpu_time, use_fast_gpu_time); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, ui->use_pessimistic_flushes, use_pessimistic_flushes); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, + ui->use_vulkan_driver_pipeline_cache, + use_vulkan_driver_pipeline_cache); } void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -76,18 +88,25 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { // Disable if not global (only happens during game) if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); + ui->renderer_force_max_clock->setEnabled( + Settings::values.renderer_force_max_clock.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); ui->use_pessimistic_flushes->setEnabled( Settings::values.use_pessimistic_flushes.UsingGlobal()); + ui->use_vulkan_driver_pipeline_cache->setEnabled( + Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); return; } + ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, + Settings::values.renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, @@ -97,6 +116,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, Settings::values.use_pessimistic_flushes, use_pessimistic_flushes); + ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, + Settings::values.use_vulkan_driver_pipeline_cache, + use_vulkan_driver_pipeline_cache); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 12e816905..df557d585 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -36,10 +36,12 @@ private: std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; + ConfigurationShared::CheckState renderer_force_max_clock; ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_pessimistic_flushes; + ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 87a121471..061885e30 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -70,6 +70,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="renderer_force_max_clock"> + <property name="toolTip"> + <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> + </property> + <property name="text"> + <string>Force maximum clocks (Vulkan only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_vsync"> <property name="toolTip"> <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> @@ -110,6 +120,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> + <property name="toolTip"> + <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> + </property> + <property name="text"> + <string>Use Vulkan pipeline cache</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="af_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 183cbe562..c40d980c9 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -1466,6 +1466,12 @@ void ConfigureInputPlayer::mousePressEvent(QMouseEvent* event) { input_subsystem->GetMouse()->PressButton(0, 0, 0, 0, button); } +void ConfigureInputPlayer::wheelEvent(QWheelEvent* event) { + const int x = event->angleDelta().x(); + const int y = event->angleDelta().y(); + input_subsystem->GetMouse()->MouseWheelChange(x, y); +} + void ConfigureInputPlayer::keyPressEvent(QKeyEvent* event) { if (!input_setter || !event) { return; diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index 6d1876f2b..99a9c875d 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -116,6 +116,9 @@ private: /// Handle mouse button press events. void mousePressEvent(QMouseEvent* event) override; + /// Handle mouse wheel move events. + void wheelEvent(QWheelEvent* event) override; + /// Handle key press events. void keyPressEvent(QKeyEvent* event) override; diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index 94049f2f4..9ea4c02da 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -31,6 +31,9 @@ constexpr std::array<u32, 7> LOCALE_BLOCKLIST{ }; static bool IsValidLocale(u32 region_index, u32 language_index) { + if (region_index >= LOCALE_BLOCKLIST.size()) { + return false; + } return ((LOCALE_BLOCKLIST.at(region_index) >> language_index) & 1) == 0; } @@ -55,8 +58,11 @@ ConfigureSystem::ConfigureSystem(Core::System& system_, QWidget* parent) }); const auto locale_check = [this](int index) { - const bool valid_locale = - IsValidLocale(ui->combo_region->currentIndex(), ui->combo_language->currentIndex()); + const auto region_index = ConfigurationShared::GetComboboxIndex( + Settings::values.region_index.GetValue(true), ui->combo_region); + const auto language_index = ConfigurationShared::GetComboboxIndex( + Settings::values.language_index.GetValue(true), ui->combo_language); + const bool valid_locale = IsValidLocale(region_index, language_index); ui->label_warn_invalid_locale->setVisible(!valid_locale); if (!valid_locale) { ui->label_warn_invalid_locale->setText( diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index 8f02880a7..a7f086258 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h @@ -42,13 +42,7 @@ private: std::unique_ptr<Ui::ConfigureSystem> ui; bool enabled = false; - int language_index = 0; - int region_index = 0; - int time_zone_index = 0; - int sound_index = 0; - ConfigurationShared::CheckState use_rng_seed; - ConfigurationShared::CheckState use_custom_rtc; Core::System& system; }; diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp index 13723f6e5..6530186c1 100644 --- a/src/yuzu/hotkeys.cpp +++ b/src/yuzu/hotkeys.cpp @@ -21,7 +21,7 @@ void HotkeyRegistry::SaveHotkeys() { {hotkey.first, group.first, UISettings::ContextualShortcut({hotkey.second.keyseq.toString(), hotkey.second.controller_keyseq, - hotkey.second.context})}); + hotkey.second.context, hotkey.second.repeat})}); } } } @@ -47,6 +47,7 @@ void HotkeyRegistry::LoadHotkeys() { hk.controller_shortcut->disconnect(); hk.controller_shortcut->SetKey(hk.controller_keyseq); } + hk.repeat = shortcut.shortcut.repeat; } } @@ -57,8 +58,7 @@ QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action hk.shortcut = new QShortcut(hk.keyseq, widget, nullptr, nullptr, hk.context); } - hk.shortcut->setAutoRepeat(false); - + hk.shortcut->setAutoRepeat(hk.repeat); return hk.shortcut; } diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h index dc5b7f628..848239c35 100644 --- a/src/yuzu/hotkeys.h +++ b/src/yuzu/hotkeys.h @@ -115,6 +115,7 @@ private: QShortcut* shortcut = nullptr; ControllerShortcut* controller_shortcut = nullptr; Qt::ShortcutContext context = Qt::WindowShortcut; + bool repeat; }; using HotkeyMap = std::map<QString, Hotkey>; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 524650144..571eacf9f 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1839,9 +1839,11 @@ void GMainWindow::OnEmulationStopTimeExpired() { void GMainWindow::OnEmulationStopped() { shutdown_timer.stop(); - emu_thread->disconnect(); - emu_thread->wait(); - emu_thread = nullptr; + if (emu_thread) { + emu_thread->disconnect(); + emu_thread->wait(); + emu_thread.reset(); + } if (shutdown_dialog) { shutdown_dialog->deleteLater(); @@ -2229,8 +2231,10 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } switch (target) { - case GameListRemoveTarget::GlShaderCache: case GameListRemoveTarget::VkShaderCache: + RemoveVulkanDriverPipelineCache(program_id); + [[fallthrough]]; + case GameListRemoveTarget::GlShaderCache: RemoveTransferableShaderCache(program_id, target); break; case GameListRemoveTarget::AllShaderCache: @@ -2271,6 +2275,22 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTa } } +void GMainWindow::RemoveVulkanDriverPipelineCache(u64 program_id) { + static constexpr std::string_view target_file_name = "vulkan_pipelines.bin"; + + const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); + const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); + const auto target_file = shader_cache_folder_path / target_file_name; + + if (!Common::FS::Exists(target_file)) { + return; + } + if (!Common::FS::RemoveFile(target_file)) { + QMessageBox::warning(this, tr("Error Removing Vulkan Driver Pipeline Cache"), + tr("Failed to remove the driver pipeline cache.")); + } +} + void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); @@ -3011,6 +3031,8 @@ void GMainWindow::OnStopGame() { if (OnShutdownBegin()) { OnShutdownBeginDialog(); + } else { + OnEmulationStopped(); } } @@ -3708,15 +3730,36 @@ void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_vie } } +std::string GMainWindow::CreateTASFramesString( + std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const { + std::string string = ""; + size_t maxPlayerIndex = 0; + for (size_t i = 0; i < frames.size(); i++) { + if (frames[i] != 0) { + if (maxPlayerIndex != 0) + string += ", "; + while (maxPlayerIndex++ != i) + string += "0, "; + string += std::to_string(frames[i]); + } + } + return string; +} + QString GMainWindow::GetTasStateDescription() const { auto [tas_status, current_tas_frame, total_tas_frames] = input_subsystem->GetTas()->GetStatus(); + std::string tas_frames_string = CreateTASFramesString(total_tas_frames); switch (tas_status) { case InputCommon::TasInput::TasState::Running: - return tr("TAS state: Running %1/%2").arg(current_tas_frame).arg(total_tas_frames); + return tr("TAS state: Running %1/%2") + .arg(current_tas_frame) + .arg(QString::fromStdString(tas_frames_string)); case InputCommon::TasInput::TasState::Recording: - return tr("TAS state: Recording %1").arg(total_tas_frames); + return tr("TAS state: Recording %1").arg(total_tas_frames[0]); case InputCommon::TasInput::TasState::Stopped: - return tr("TAS state: Idle %1/%2").arg(current_tas_frame).arg(total_tas_frames); + return tr("TAS state: Idle %1/%2") + .arg(current_tas_frame) + .arg(QString::fromStdString(tas_frames_string)); default: return tr("TAS State: Invalid"); } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index db318485d..0f61abc7a 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -12,6 +12,7 @@ #include "common/announce_multiplayer_room.h" #include "common/common_types.h" +#include "input_common/drivers/tas_input.h" #include "yuzu/compatibility_list.h" #include "yuzu/hotkeys.h" @@ -266,6 +267,9 @@ private: void changeEvent(QEvent* event) override; void closeEvent(QCloseEvent* event) override; + std::string CreateTASFramesString( + std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const; + #ifdef __unix__ void SetupSigInterrupts(); static void HandleSigInterrupt(int); @@ -347,6 +351,7 @@ private: void RemoveUpdateContent(u64 program_id, InstalledEntryType type); void RemoveAddOnContent(u64 program_id, InstalledEntryType type); void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); + void RemoveVulkanDriverPipelineCache(u64 program_id); void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index 2006b883e..db43b7033 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -22,6 +22,7 @@ struct ContextualShortcut { QString keyseq; QString controller_keyseq; int context; + bool repeat; }; struct Shortcut { diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 1e45e57bc..527017282 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -296,6 +296,7 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); + ReadSetting("Renderer", Settings::values.renderer_force_max_clock); ReadSetting("Renderer", Settings::values.renderer_debug); ReadSetting("Renderer", Settings::values.renderer_shader_feedback); ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); @@ -321,6 +322,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); + ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); ReadSetting("Renderer", Settings::values.bg_red); ReadSetting("Renderer", Settings::values.bg_green); |