diff options
37 files changed, 675 insertions, 169 deletions
diff --git a/externals/dynarmic b/externals/dynarmic -Subproject 57b987c185ae6677861cbf781f08ed1649b0543 +Subproject a3cd05577c9b6c51f0f345d0e915b6feab68fe1 diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index c381dbe1d..5ef38a337 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -7,9 +7,12 @@ add_library(audio_core STATIC audio_out.h audio_renderer.cpp audio_renderer.h + behavior_info.cpp + behavior_info.h buffer.h codec.cpp codec.h + common.h null_sink.h sink.h sink_details.cpp diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 7a9dc61d4..d18ef6940 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -6,6 +6,7 @@ #include "audio_core/audio_out.h" #include "audio_core/audio_renderer.h" #include "audio_core/codec.h" +#include "audio_core/common.h" #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory std::size_t instance_number) : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), effects(params.effect_count), memory{memory_} { - + behavior_info.SetUserRevision(params.revision); audio_out = std::make_unique<AudioCore::AudioOut>(); stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, fmt::format("AudioRenderer-Instance{}", instance_number), @@ -109,17 +110,17 @@ Stream::State AudioRenderer::GetStreamState() const { return stream->GetState(); } -static constexpr u32 VersionFromRevision(u32_le rev) { - // "REV7" -> 7 - return ((rev >> 24) & 0xff) - 0x30; -} - -std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { +ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { // Copy UpdateDataHeader struct UpdateDataHeader config{}; std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader)); u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4); + if (!behavior_info.UpdateInput(input_params, sizeof(UpdateDataHeader))) { + LOG_ERROR(Audio, "Failed to update behavior info input parameters"); + return Audren::ERR_INVALID_PARAMETERS; + } + // Copy MemoryPoolInfo structs std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count); std::memcpy(mem_pool_info.data(), @@ -173,8 +174,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_ // Copy output header UpdateDataHeader response_data{worker_params}; std::vector<u8> output_params(response_data.total_size); - const auto audren_revision = VersionFromRevision(config.revision); - if (audren_revision >= 5) { + if (behavior_info.IsElapsedFrameCountSupported()) { response_data.frame_count = 0x10; response_data.total_size += 0x10; } @@ -200,7 +200,19 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_ sizeof(EffectOutStatus)); effect_out_status_offset += sizeof(EffectOutStatus); } - return output_params; + + // Update behavior info output + const std::size_t behavior_out_status_offset{ + sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size + + response_data.effects_size + response_data.sinks_size + + response_data.performance_manager_size}; + + if (!behavior_info.UpdateOutput(output_params, behavior_out_status_offset)) { + LOG_ERROR(Audio, "Failed to update behavior info output parameters"); + return Audren::ERR_INVALID_PARAMETERS; + } + + return MakeResult(output_params); } void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 62faf9f19..b42770fae 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -8,11 +8,13 @@ #include <memory> #include <vector> +#include "audio_core/behavior_info.h" #include "audio_core/stream.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/swap.h" #include "core/hle/kernel/object.h" +#include "core/hle/result.h" namespace Core::Timing { class CoreTiming; @@ -226,7 +228,7 @@ public: std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number); ~AudioRenderer(); - std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); + ResultVal<std::vector<u8>> UpdateAudioRenderer(const std::vector<u8>& input_params); void QueueMixedBuffer(Buffer::Tag tag); void ReleaseAndQueueBuffers(); u32 GetSampleRate() const; @@ -237,6 +239,7 @@ public: private: class EffectState; class VoiceState; + BehaviorInfo behavior_info{}; AudioRendererParameter worker_params; std::shared_ptr<Kernel::WritableEvent> buffer_event; diff --git a/src/audio_core/behavior_info.cpp b/src/audio_core/behavior_info.cpp new file mode 100644 index 000000000..94b7a3bf1 --- /dev/null +++ b/src/audio_core/behavior_info.cpp @@ -0,0 +1,100 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include "audio_core/behavior_info.h" +#include "audio_core/common.h" +#include "common/logging/log.h" + +namespace AudioCore { + +BehaviorInfo::BehaviorInfo() : process_revision(CURRENT_PROCESS_REVISION) {} +BehaviorInfo::~BehaviorInfo() = default; + +bool BehaviorInfo::UpdateInput(const std::vector<u8>& buffer, std::size_t offset) { + if (!CanConsumeBuffer(buffer.size(), offset, sizeof(InParams))) { + LOG_ERROR(Audio, "Buffer is an invalid size!"); + return false; + } + InParams params{}; + std::memcpy(¶ms, buffer.data() + offset, sizeof(InParams)); + + if (!IsValidRevision(params.revision)) { + LOG_ERROR(Audio, "Invalid input revision, revision=0x{:08X}", params.revision); + return false; + } + + if (user_revision != params.revision) { + LOG_ERROR(Audio, + "User revision differs from input revision, expecting 0x{:08X} but got 0x{:08X}", + user_revision, params.revision); + return false; + } + + ClearError(); + UpdateFlags(params.flags); + + // TODO(ogniK): Check input params size when InfoUpdater is used + + return true; +} + +bool BehaviorInfo::UpdateOutput(std::vector<u8>& buffer, std::size_t offset) { + if (!CanConsumeBuffer(buffer.size(), offset, sizeof(OutParams))) { + LOG_ERROR(Audio, "Buffer is an invalid size!"); + return false; + } + + OutParams params{}; + std::memcpy(params.errors.data(), errors.data(), sizeof(ErrorInfo) * errors.size()); + params.error_count = static_cast<u32_le>(error_count); + std::memcpy(buffer.data() + offset, ¶ms, sizeof(OutParams)); + return true; +} + +void BehaviorInfo::ClearError() { + error_count = 0; +} + +void BehaviorInfo::UpdateFlags(u64_le dest_flags) { + flags = dest_flags; +} + +void BehaviorInfo::SetUserRevision(u32_le revision) { + user_revision = revision; +} + +bool BehaviorInfo::IsAdpcmLoopContextBugFixed() const { + return IsRevisionSupported(2, user_revision); +} + +bool BehaviorInfo::IsSplitterSupported() const { + return IsRevisionSupported(2, user_revision); +} + +bool BehaviorInfo::IsLongSizePreDelaySupported() const { + return IsRevisionSupported(3, user_revision); +} + +bool BehaviorInfo::IsAudioRenererProcessingTimeLimit80PercentSupported() const { + return IsRevisionSupported(5, user_revision); +} + +bool BehaviorInfo::IsAudioRenererProcessingTimeLimit75PercentSupported() const { + return IsRevisionSupported(4, user_revision); +} + +bool BehaviorInfo::IsAudioRenererProcessingTimeLimit70PercentSupported() const { + return IsRevisionSupported(1, user_revision); +} + +bool BehaviorInfo::IsElapsedFrameCountSupported() const { + return IsRevisionSupported(5, user_revision); +} + +bool BehaviorInfo::IsMemoryPoolForceMappingEnabled() const { + return (flags & 1) != 0; +} + +} // namespace AudioCore diff --git a/src/audio_core/behavior_info.h b/src/audio_core/behavior_info.h new file mode 100644 index 000000000..c5e91ab39 --- /dev/null +++ b/src/audio_core/behavior_info.h @@ -0,0 +1,66 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include <vector> +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/swap.h" + +namespace AudioCore { +class BehaviorInfo { +public: + explicit BehaviorInfo(); + ~BehaviorInfo(); + + bool UpdateInput(const std::vector<u8>& buffer, std::size_t offset); + bool UpdateOutput(std::vector<u8>& buffer, std::size_t offset); + + void ClearError(); + void UpdateFlags(u64_le dest_flags); + void SetUserRevision(u32_le revision); + + bool IsAdpcmLoopContextBugFixed() const; + bool IsSplitterSupported() const; + bool IsLongSizePreDelaySupported() const; + bool IsAudioRenererProcessingTimeLimit80PercentSupported() const; + bool IsAudioRenererProcessingTimeLimit75PercentSupported() const; + bool IsAudioRenererProcessingTimeLimit70PercentSupported() const; + bool IsElapsedFrameCountSupported() const; + bool IsMemoryPoolForceMappingEnabled() const; + +private: + u32_le process_revision{}; + u32_le user_revision{}; + u64_le flags{}; + + struct ErrorInfo { + u32_le result{}; + INSERT_PADDING_WORDS(1); + u64_le result_info{}; + }; + static_assert(sizeof(ErrorInfo) == 0x10, "ErrorInfo is an invalid size"); + + std::array<ErrorInfo, 10> errors{}; + std::size_t error_count{}; + + struct InParams { + u32_le revision{}; + u32_le padding{}; + u64_le flags{}; + }; + static_assert(sizeof(InParams) == 0x10, "InParams is an invalid size"); + + struct OutParams { + std::array<ErrorInfo, 10> errors{}; + u32_le error_count{}; + INSERT_PADDING_BYTES(12); + }; + static_assert(sizeof(OutParams) == 0xb0, "OutParams is an invalid size"); +}; + +} // namespace AudioCore diff --git a/src/audio_core/common.h b/src/audio_core/common.h new file mode 100644 index 000000000..98478b66b --- /dev/null +++ b/src/audio_core/common.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/result.h" + +namespace AudioCore { +namespace Audren { +constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41}; +} + +constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); + +static constexpr u32 VersionFromRevision(u32_le rev) { + // "REV7" -> 7 + return ((rev >> 24) & 0xff) - 0x30; +} + +static constexpr bool IsRevisionSupported(u32 required, u32_le user_revision) { + const auto base = VersionFromRevision(user_revision); + return required <= base; +} + +static constexpr bool IsValidRevision(u32_le revision) { + const auto base = VersionFromRevision(revision); + constexpr auto max_rev = VersionFromRevision(CURRENT_PROCESS_REVISION); + return base <= max_rev; +} + +static constexpr bool CanConsumeBuffer(std::size_t size, std::size_t offset, std::size_t required) { + if (offset > size) { + return false; + } + if (size < required) { + return false; + } + if ((size - offset) < required) { + return false; + } + return true; +} + +} // namespace AudioCore diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 9add5d363..65cbfe5e6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -20,6 +20,7 @@ #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" #include "core/memory.h" +#include "core/settings.h" namespace Core { @@ -144,6 +145,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& config.page_table_address_space_bits = address_space_bits; config.silently_mirror_page_table = false; config.absolute_offset_page_table = true; + config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; + config.only_detect_misalignment_via_page_table_on_page_boundary = true; // Multi-process state config.processor_id = core_index; @@ -159,8 +162,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& // Unpredictable instructions config.define_unpredictable_behaviour = true; - config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; - config.only_detect_misalignment_via_page_table_on_page_boundary = true; + // Optimizations + if (Settings::values.disable_cpu_opt) { + config.enable_optimizations = false; + config.enable_fast_dispatch = false; + } return std::make_shared<Dynarmic::A64::Jit>(config); } diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index d65dae3ae..91d94025c 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -282,19 +282,19 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { return RESULT_SUCCESS; } -std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const { +std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { std::vector<u8> buffer; - const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) && + const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorA().size() > buffer_index, "BufferDescriptorA invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorA()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); } else { - ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorX().size() > buffer_index, "BufferDescriptorX invalid buffer_index {}", buffer_index); buffer.resize(BufferDescriptorX()[buffer_index].Size()); memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); @@ -304,13 +304,13 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const { } std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, - int buffer_index) const { + std::size_t buffer_index) const { if (size == 0) { LOG_WARNING(Core, "skip empty buffer write"); return 0; } - const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) && + const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && BufferDescriptorB()[buffer_index].Size()}; const std::size_t buffer_size{GetWriteBufferSize(buffer_index)}; if (size > buffer_size) { @@ -321,13 +321,13 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, auto& memory = Core::System::GetInstance().Memory(); if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorB().size() > buffer_index, "BufferDescriptorB invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, "BufferDescriptorB buffer_index {} is not large enough", buffer_index); memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); } else { - ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorC().size() > buffer_index, "BufferDescriptorC invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, "BufferDescriptorC buffer_index {} is not large enough", buffer_index); @@ -337,17 +337,17 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, return size; } -std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const { - const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) && +std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const { + const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; if (is_buffer_a) { - ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorA().size() > buffer_index, "BufferDescriptorA invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0, "BufferDescriptorA buffer_index {} is empty", buffer_index); return BufferDescriptorA()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorX().size() > buffer_index, "BufferDescriptorX invalid buffer_index {}", buffer_index); ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0, "BufferDescriptorX buffer_index {} is empty", buffer_index); @@ -355,15 +355,15 @@ std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const { } } -std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const { - const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) && +std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) const { + const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && BufferDescriptorB()[buffer_index].Size()}; if (is_buffer_b) { - ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorB().size() > buffer_index, "BufferDescriptorB invalid buffer_index {}", buffer_index); return BufferDescriptorB()[buffer_index].Size(); } else { - ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index), + ASSERT_MSG(BufferDescriptorC().size() > buffer_index, "BufferDescriptorC invalid buffer_index {}", buffer_index); return BufferDescriptorC()[buffer_index].Size(); } diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index 050ad8fd7..af3330297 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -179,10 +179,11 @@ public: } /// Helper function to read a buffer using the appropriate buffer descriptor - std::vector<u8> ReadBuffer(int buffer_index = 0) const; + std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const; /// Helper function to write a buffer using the appropriate buffer descriptor - std::size_t WriteBuffer(const void* buffer, std::size_t size, int buffer_index = 0) const; + std::size_t WriteBuffer(const void* buffer, std::size_t size, + std::size_t buffer_index = 0) const; /* Helper function to write a buffer using the appropriate buffer descriptor * @@ -194,7 +195,8 @@ public: */ template <typename ContiguousContainer, typename = std::enable_if_t<!std::is_pointer_v<ContiguousContainer>>> - std::size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const { + std::size_t WriteBuffer(const ContiguousContainer& container, + std::size_t buffer_index = 0) const { using ContiguousType = typename ContiguousContainer::value_type; static_assert(std::is_trivially_copyable_v<ContiguousType>, @@ -205,10 +207,10 @@ public: } /// Helper function to get the size of the input buffer - std::size_t GetReadBufferSize(int buffer_index = 0) const; + std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const; /// Helper function to get the size of the output buffer - std::size_t GetWriteBufferSize(int buffer_index = 0) const; + std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; template <typename T> std::shared_ptr<T> GetCopyObject(std::size_t index) { diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 557608e76..3ece2cf3c 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -903,7 +903,7 @@ private: void PopOutData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - const auto storage = applet->GetBroker().PopNormalDataToGame(); + auto storage = applet->GetBroker().PopNormalDataToGame(); if (storage == nullptr) { LOG_ERROR(Service_AM, "storage is a nullptr. There is no data in the current normal channel"); @@ -934,7 +934,7 @@ private: void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - const auto storage = applet->GetBroker().PopInteractiveDataToGame(); + auto storage = applet->GetBroker().PopInteractiveDataToGame(); if (storage == nullptr) { LOG_ERROR(Service_AM, "storage is a nullptr. There is no data in the current interactive channel"); diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 175cabf45..d8359abaa 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -92,11 +92,16 @@ private: } void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { - LOG_WARNING(Service_Audio, "(STUBBED) called"); + LOG_DEBUG(Service_Audio, "(STUBBED) called"); + + auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer()); + + if (result.Succeeded()) { + ctx.WriteBuffer(result.Unwrap()); + } - ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); + rb.Push(result.Code()); } void Start(Kernel::HLERequestContext& ctx) { @@ -252,8 +257,6 @@ private: } void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; - const auto device_name_buffer = ctx.ReadBuffer(); const std::string name = Common::StringFromBuffer(device_name_buffer); diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index df00ae625..86f36915a 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -76,7 +76,6 @@ private: } void ImportTicket(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; const auto ticket = ctx.ReadBuffer(); const auto cert = ctx.ReadBuffer(1); diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 2ccfffc19..c55d900e2 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -502,7 +502,7 @@ void Controller_NPad::SetNpadMode(u32 npad_id, NPadAssignments assignment_mode) void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids, const std::vector<Vibration>& vibrations) { - LOG_WARNING(Service_HID, "(STUBBED) called"); + LOG_DEBUG(Service_HID, "(STUBBED) called"); if (!can_controllers_vibrate) { return; diff --git a/src/core/settings.h b/src/core/settings.h index 79ec01731..c73d1c596 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -464,6 +464,7 @@ struct Values { bool dump_nso; bool reporting_services; bool quest_flag; + bool disable_cpu_opt; // BCAT std::string bcat_backend; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 713c14182..0b77afc71 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -12,7 +12,7 @@ namespace Tegra { -DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} +DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -26,7 +26,7 @@ void DmaPusher::DispatchCalls() { dma_pushbuffer_subindex = 0; - while (Core::System::GetInstance().IsPoweredOn()) { + while (system.IsPoweredOn()) { if (!Step()) { break; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 6ab06518f..d6188614a 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -10,6 +10,10 @@ #include "common/bit_field.h" #include "common/common_types.h" +namespace Core { +class System; +} + namespace Tegra { enum class SubmissionMode : u32 { @@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; */ class DmaPusher { public: - explicit DmaPusher(GPU& gpu); + explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); void Push(CommandList&& entries) { @@ -72,8 +76,6 @@ private: void CallMethod(u32 argument) const; - GPU& gpu; - std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed @@ -92,6 +94,9 @@ private: GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled + + GPU& gpu; + Core::System& system; }; } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ba63b44b4..baa74ad4c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() { color_mask.A.Assign(1); } + for (auto& format : regs.vertex_attrib_format) { + format.constant.Assign(1); + } + // NVN games expect these values to be enabled at boot regs.rasterize_enable = 1; regs.rt_separate_frag_data = 1; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8acf2eda2..a606f4abd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); + dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index dd590c38b..04532f8f8 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -42,7 +42,7 @@ #include <vulkan/vulkan_win32.h> #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -119,7 +119,7 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); break; #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) case Core::Frontend::WindowSystemType::X11: extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); break; @@ -345,7 +345,7 @@ bool RendererVulkan::CreateSurface() { } } #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::X11) { const VkXlibSurfaceCreateInfoKHR xlib_ci{ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp new file mode 100644 index 000000000..5a472ba9b --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +/* + * Build instructions: + * $ glslangValidator -V quad_indexed.comp -o output.spv + * $ spirv-opt -O --strip-debug output.spv -o optimized.spv + * $ xxd -i optimized.spv + * + * Then copy that bytecode to the C++ file + */ + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint base_vertex; + int index_shift; // 0: uint8, 1: uint16, 2: uint32 +}; + +void main() { + int primitive = int(gl_GlobalInvocationID.x); + if (primitive * 6 >= output_indexes.length()) { + return; + } + + int index_size = 8 << index_shift; + int flipped_shift = 2 - index_shift; + int mask = (1 << flipped_shift) - 1; + + const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + int offset = primitive * 4 + quad_swizzle[vertex]; + int int_offset = offset >> flipped_shift; + int bit_offset = (offset & mask) * index_size; + uint packed_input = input_indexes[int_offset]; + uint index = bitfieldExtract(packed_input, bit_offset, index_size); + output_indexes[primitive * 6 + vertex] = index + base_vertex; + } +} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 878a78755..7b0268033 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt return entry; } -VkPushConstantRange BuildQuadArrayPassPushConstantRange() { +VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { VkPushConstantRange range; range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; range.offset = 0; - range.size = sizeof(u32); + range.size = static_cast<u32>(size); return range; } @@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = { 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; -std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { +// Quad indexed SPIR-V module. Generated from the "shaders/" directory. +constexpr u8 QUAD_INDEXED_SPV[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, + 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, + 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, + 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, + 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, + 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, + 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, + 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, + 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, + 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, + 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, + 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + +std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { std::array<VkDescriptorSetLayoutBinding, 2> bindings; bindings[0].binding = 0; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings( return bindings; } -VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { +VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { VkDescriptorUpdateTemplateEntryKHR entry; entry.dstBinding = 0; entry.dstArrayElement = 0; @@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, VKUpdateDescriptorQueue& update_descriptor_queue) : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), + BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} QuadArrayPass::~QuadArrayPass() = default; std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { - const u32 num_triangle_vertices = num_vertices * 6 / 4; + const u32 num_triangle_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); @@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), - BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), uint8_pass), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} @@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } +QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue) + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), + BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), + QUAD_INDEXED_SPV), + scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, + update_descriptor_queue{update_descriptor_queue} {} + +QuadIndexedPass::~QuadIndexedPass() = default; + +std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, + VkBuffer src_buffer, u64 src_offset) { + const u32 index_shift = [index_format] { + switch (index_format) { + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: + return 0; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort: + return 1; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt: + return 2; + } + UNREACHABLE(); + return 2; + }(); + const u32 input_size = num_vertices << index_shift; + const u32 num_tri_vertices = (num_vertices / 4) * 6; + + const std::size_t staging_size = num_tri_vertices * sizeof(u32); + auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + + update_descriptor_queue.Acquire(); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, + num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + static constexpr u32 dispatch_size = 1024; + const std::array push_constants = {base_vertex, index_shift}; + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + &push_constants); + cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + }); + return {*buffer.handle, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index ec80c8683..26bf834de 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -8,6 +8,7 @@ #include <utility> #include <vector> #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -73,4 +74,22 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; +class QuadIndexedPass final : public VKComputePass { +public: + explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~QuadIndexedPass(); + + std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, + u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, + u64 src_offset); + +private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_buffer_pool; + VKUpdateDescriptorQueue& update_descriptor_queue; +}; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4ca0febb8..9bf9e1028 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -292,6 +292,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool(device, memory_manager, scheduler), descriptor_pool(device), update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, staging_pool), @@ -844,18 +845,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar bool is_indexed) { const auto& regs = system.GPU().Maxwell3D().regs; switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - if (params.is_indexed) { - UNIMPLEMENTED(); - } else { + case Maxwell::PrimitiveTopology::Quads: { + if (!params.is_indexed) { const auto [buffer, offset] = quad_array_pass.Assemble(params.num_vertices, params.base_vertex); buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); params.base_vertex = 0; params.num_vertices = params.num_vertices * 6 / 4; params.is_indexed = true; + break; } + const GPUVAddr gpu_addr = regs.index_array.IndexStart(); + auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + std::tie(buffer, offset) = quad_indexed_pass.Assemble( + regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); + + buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); + params.num_vertices = (params.num_vertices / 4) * 6; + params.base_vertex = 0; break; + } default: { if (!is_indexed) { break; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 46037860a..d9108f862 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -254,6 +254,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; VKRenderPassCache renderpass_cache; QuadArrayPass quad_array_pass; + QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; VKTextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 38a93a01a..868447af2 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <limits> #include <optional> #include <tuple> #include <vector> @@ -22,22 +23,38 @@ namespace { constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; +constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024; -std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, - VkMemoryPropertyFlags wanted) { - const auto properties = device.GetPhysical().GetMemoryProperties(); - for (u32 i = 0; i < properties.memoryTypeCount; i++) { - if (!(filter & (1 << i))) { - continue; - } - if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { +/// Find a memory type with the passed requirements +std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + VkMemoryPropertyFlags wanted, + u32 filter = std::numeric_limits<u32>::max()) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) { return i; } } return std::nullopt; } +/// Get the preferred host visible memory type. +u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + u32 filter = std::numeric_limits<u32>::max()) { + // Prefer device local host visible allocations. Both AMD and Nvidia now provide one. + // Otherwise search for a host visible allocation. + static constexpr auto HOST_MEMORY = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY); + if (!preferred_type) { + preferred_type = FindMemoryType(properties, HOST_MEMORY); + ASSERT_MSG(preferred_type, "No host visible and coherent memory type found"); + } + return preferred_type.value_or(0); +} + } // Anonymous namespace VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, @@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, VKStreamBuffer::~VKStreamBuffer() = default; std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { - ASSERT(size <= STREAM_BUFFER_SIZE); + ASSERT(size <= stream_buffer_size); mapped_size = size; if (alignment > 0) { @@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); bool invalidated = false; - if (offset + size > STREAM_BUFFER_SIZE) { + if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; current_watch_cursor = 0; @@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) { } void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties); + const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024; + VkBufferCreateInfo buffer_ci; buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; buffer_ci.pNext = nullptr; buffer_ci.flags = 0; - buffer_ci.size = STREAM_BUFFER_SIZE; + buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); buffer_ci.usage = usage; buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_ci.queueFamilyIndexCount = 0; buffer_ci.pQueueFamilyIndices = nullptr; - const auto& dev = device.GetLogical(); - buffer = dev.CreateBuffer(buffer_ci); - - const auto& dld = device.GetDispatchLoader(); - const auto requirements = dev.GetBufferMemoryRequirements(*buffer); - // Prefer device local host visible allocations (this should hit AMD's pinned memory). - auto type = - FindMemoryType(device, requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (!type) { - // Otherwise search for a host visible allocation. - type = FindMemoryType(device, requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - ASSERT_MSG(type, "No host visible and coherent memory type found"); - } + buffer = device.GetLogical().CreateBuffer(buffer_ci); + + const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); + const u32 required_flags = requirements.memoryTypeBits; + stream_buffer_size = static_cast<u64>(requirements.size); + VkMemoryAllocateInfo memory_ai; memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; memory_ai.pNext = nullptr; memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = *type; + memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - memory = dev.AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory(memory_ai); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 58ce8b973..dfddf7ad6 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -56,8 +56,9 @@ private: const VKDevice& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. - vk::Buffer buffer; ///< Mapped buffer. - vk::DeviceMemory memory; ///< Memory allocation. + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u64 stream_buffer_size{}; ///< Stream buffer size. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 224943ad9..513e9bf49 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -76,12 +76,13 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; - } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + } + if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); if (bound_buffer != cbuf->GetIndex()) { return {}; @@ -94,12 +95,12 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = + Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(amend_op); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); AmendNodeCv(amend_index, code[cursor]); // TODO Implement Bindless Index custom variable auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), @@ -142,7 +143,7 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { return {tracked, cbuf->GetIndex(), immediate->GetValue()}; } diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index e151c26c4..25d2ee2e8 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -196,9 +196,9 @@ std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb ComponentType alpha_component) noexcept { const auto format_index = static_cast<std::size_t>(format); const auto red_index = static_cast<std::size_t>(red_component); - const auto green_index = static_cast<std::size_t>(red_component); - const auto blue_index = static_cast<std::size_t>(red_component); - const auto alpha_index = static_cast<std::size_t>(red_component); + const auto green_index = static_cast<std::size_t>(green_component); + const auto blue_index = static_cast<std::size_t>(blue_component); + const auto alpha_index = static_cast<std::size_t>(alpha_component); const std::size_t srgb_index = is_srgb ? 1 : 0; return format_index * PerFormat + diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 3b9ab38dd..7f6dfac84 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -532,6 +532,8 @@ void Config::ReadDebuggingValues() { Settings::values.reporting_services = ReadSetting(QStringLiteral("reporting_services"), false).toBool(); Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); + Settings::values.disable_cpu_opt = + ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); qt_config->endGroup(); } @@ -1001,6 +1003,7 @@ void Config::SaveDebuggingValues() { WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); + WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 9631059c7..c2026763e 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -36,6 +36,7 @@ void ConfigureDebug::SetConfiguration() { ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); ui->reporting_services->setChecked(Settings::values.reporting_services); ui->quest_flag->setChecked(Settings::values.quest_flag); + ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); } @@ -48,6 +49,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); Settings::values.reporting_services = ui->reporting_services->isChecked(); Settings::values.quest_flag = ui->quest_flag->isChecked(); + Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Debugger::ToggleConsole(); Log::Filter filter; diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index e028c4c80..e0d4c4a44 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -215,6 +215,13 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="disable_cpu_opt"> + <property name="text"> + <string>Disable CPU JIT optimizations</string> + </property> + </widget> + </item> </layout> </widget> </item> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 2c8eb481d..05baec7e1 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -802,10 +802,6 @@ void GMainWindow::ConnectMenuEvents() { connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder); connect(ui.action_Install_File_NAND, &QAction::triggered, this, &GMainWindow::OnMenuInstallToNAND); - connect(ui.action_Select_NAND_Directory, &QAction::triggered, this, - [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::NAND); }); - connect(ui.action_Select_SDMC_Directory, &QAction::triggered, this, - [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::SDMC); }); connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close); connect(ui.action_Load_Amiibo, &QAction::triggered, this, &GMainWindow::OnLoadAmiibo); @@ -940,16 +936,18 @@ bool GMainWindow::LoadROM(const QString& filename) { default: if (static_cast<u32>(result) > static_cast<u32>(Core::System::ResultStatus::ErrorLoader)) { - LOG_CRITICAL(Frontend, "Failed to load ROM!"); const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); const u16 error_id = static_cast<u16>(result) - loader_id; + const std::string error_code = fmt::format("({:04X}-{:04X})", loader_id, error_id); + LOG_CRITICAL(Frontend, "Failed to load ROM! {}", error_code); QMessageBox::critical( - this, tr("Error while loading ROM!"), + this, + tr("Error while loading ROM! ").append(QString::fromStdString(error_code)), QString::fromStdString(fmt::format( - "While attempting to load the ROM requested, an error occured. Please " - "refer to the yuzu wiki for more information or the yuzu discord for " - "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", - loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)))); + "{}<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the " + "yuzu quickstart guide</a> to redump your files.<br>You can refer " + "to the yuzu wiki</a> or the yuzu Discord</a> for help.", + static_cast<Loader::ResultStatus>(error_id)))); } else { QMessageBox::critical( this, tr("Error while loading ROM!"), @@ -1663,28 +1661,6 @@ void GMainWindow::OnMenuInstallToNAND() { } } -void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) { - const auto res = QMessageBox::information( - this, tr("Changing Emulated Directory"), - tr("You are about to change the emulated %1 directory of the system. Please note " - "that this does not also move the contents of the previous directory to the " - "new one and you will have to do that yourself.") - .arg(target == EmulatedDirectoryTarget::SDMC ? tr("SD card") : tr("NAND")), - QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel}); - - if (res == QMessageBox::Cancel) - return; - - QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); - if (!dir_path.isEmpty()) { - FileUtil::GetUserPath(target == EmulatedDirectoryTarget::SDMC ? FileUtil::UserPath::SDMCDir - : FileUtil::UserPath::NANDDir, - dir_path.toStdString()); - Core::System::GetInstance().GetFileSystemController().CreateFactories(*vfs); - game_list->PopulateAsync(UISettings::values.game_dirs); - } -} - void GMainWindow::OnMenuRecentFile() { QAction* action = qobject_cast<QAction*>(sender()); assert(action); @@ -2095,27 +2071,25 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { QString errors; if (!pdm.HasFuses()) { - errors += tr("- Missing fuses - Cannot derive SBK\n"); + errors += tr("Missing fuses"); } if (!pdm.HasBoot0()) { - errors += tr("- Missing BOOT0 - Cannot derive master keys\n"); + errors += tr(" - Missing BOOT0"); } if (!pdm.HasPackage2()) { - errors += tr("- Missing BCPKG2-1-Normal-Main - Cannot derive general keys\n"); + errors += tr(" - Missing BCPKG2-1-Normal-Main"); } if (!pdm.HasProdInfo()) { - errors += tr("- Missing PRODINFO - Cannot derive title keys\n"); + errors += tr(" - Missing PRODINFO"); } if (!errors.isEmpty()) { QMessageBox::warning( - this, tr("Warning Missing Derivation Components"), - tr("The following are missing from your configuration that may hinder key " - "derivation. It will be attempted but may not complete.<br><br>") + - errors + - tr("<br><br>You can get all of these and dump all of your games easily by " - "following <a href='https://yuzu-emu.org/help/quickstart/'>the " - "quickstart guide</a>. Alternatively, you can use another method of dumping " - "to obtain all of your keys.")); + this, tr("Derivation Components Missing"), + tr("Components are missing that may hinder key derivation from completing. " + "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu " + "quickstart guide</a> to get all your keys and " + "games.<br><br><small>(%1)</small>") + .arg(errors)); } QProgressDialog prog; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a67125567..0b750689d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -196,8 +196,6 @@ private slots: void OnMenuLoadFile(); void OnMenuLoadFolder(); void OnMenuInstallToNAND(); - /// Called whenever a user select the "File->Select -- Directory" where -- is NAND or SD Card - void OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target); void OnMenuRecentFile(); void OnConfigure(); void OnLoadAmiibo(); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index a2c9e4547..ae414241e 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -64,8 +64,6 @@ <addaction name="separator"/> <addaction name="menu_recent_files"/> <addaction name="separator"/> - <addaction name="action_Select_NAND_Directory"/> - <addaction name="action_Select_SDMC_Directory"/> <addaction name="separator"/> <addaction name="action_Load_Amiibo"/> <addaction name="separator"/> @@ -217,22 +215,6 @@ <string>Show Status Bar</string> </property> </action> - <action name="action_Select_NAND_Directory"> - <property name="text"> - <string>Select NAND Directory...</string> - </property> - <property name="toolTip"> - <string>Selects a folder to use as the root of the emulated NAND</string> - </property> - </action> - <action name="action_Select_SDMC_Directory"> - <property name="text"> - <string>Select SD Card Directory...</string> - </property> - <property name="toolTip"> - <string>Selects a folder to use as the root of the emulated SD card</string> - </property> - </action> <action name="action_Fullscreen"> <property name="checkable"> <bool>true</bool> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index f4cd905c9..80341747f 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -425,6 +425,8 @@ void Config::ReadValues() { Settings::values.reporting_services = sdl2_config->GetBoolean("Debugging", "reporting_services", false); Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); + Settings::values.disable_cpu_opt = + sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); std::stringstream ss(title_list); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index d63d7a58e..171d16fa0 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -280,6 +280,9 @@ dump_nso=false # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode # false: Retail/Normal Mode (default), true: Kiosk Mode quest_flag = +# Determines whether or not JIT CPU optimizations are enabled +# false: Optimizations Enabled, true: Optimizations Disabled +disable_cpu_opt = [WebService] # Whether or not to enable telemetry |