diff options
Diffstat (limited to 'src')
75 files changed, 1494 insertions, 527 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index d18ef6940..50846a854 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -17,7 +17,7 @@ namespace AudioCore { constexpr u32 STREAM_SAMPLE_RATE{48000}; constexpr u32 STREAM_NUM_CHANNELS{2}; - +using VoiceChannelHolder = std::array<VoiceResourceInformation*, 6>; class AudioRenderer::VoiceState { public: bool IsPlaying() const { @@ -37,9 +37,10 @@ public: } void SetWaveIndex(std::size_t index); - std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory); + std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory, + const VoiceChannelHolder& voice_resources); void UpdateState(); - void RefreshBuffer(Core::Memory::Memory& memory); + void RefreshBuffer(Core::Memory::Memory& memory, const VoiceChannelHolder& voice_resources); private: bool is_in_use{}; @@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number) : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), - effects(params.effect_count), memory{memory_} { + voice_resources(params.voice_count), effects(params.effect_count), memory{memory_} { behavior_info.SetUserRevision(params.revision); audio_out = std::make_unique<AudioCore::AudioOut>(); stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, @@ -127,6 +128,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector< input_params.data() + sizeof(UpdateDataHeader) + config.behavior_size, memory_pool_count * sizeof(MemoryPoolInfo)); + // Copy voice resources + const std::size_t voice_resource_offset{sizeof(UpdateDataHeader) + config.behavior_size + + config.memory_pools_size}; + std::memcpy(voice_resources.data(), input_params.data() + voice_resource_offset, + sizeof(VoiceResourceInformation) * voice_resources.size()); + // Copy VoiceInfo structs std::size_t voice_offset{sizeof(UpdateDataHeader) + config.behavior_size + config.memory_pools_size + config.voice_resource_size}; @@ -220,14 +227,15 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) { is_refresh_pending = true; } -std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count, - Core::Memory::Memory& memory) { +std::vector<s16> AudioRenderer::VoiceState::DequeueSamples( + std::size_t sample_count, Core::Memory::Memory& memory, + const VoiceChannelHolder& voice_resources) { if (!IsPlaying()) { return {}; } if (is_refresh_pending) { - RefreshBuffer(memory); + RefreshBuffer(memory, voice_resources); } const std::size_t max_size{samples.size() - offset}; @@ -271,7 +279,8 @@ void AudioRenderer::VoiceState::UpdateState() { is_in_use = info.is_in_use; } -void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) { +void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory, + const VoiceChannelHolder& voice_resources) { const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr; const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz; std::vector<s16> new_samples(wave_buffer_size / sizeof(s16)); @@ -296,17 +305,77 @@ void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) { } switch (info.channel_count) { - case 1: + case 1: { // 1 channel is upsampled to 2 channel samples.resize(new_samples.size() * 2); + for (std::size_t index = 0; index < new_samples.size(); ++index) { - samples[index * 2] = new_samples[index]; - samples[index * 2 + 1] = new_samples[index]; + auto sample = static_cast<float>(new_samples[index]); + if (voice_resources[0]->in_use) { + sample *= voice_resources[0]->mix_volumes[0]; + } + + samples[index * 2] = static_cast<s16>(sample * info.volume); + samples[index * 2 + 1] = static_cast<s16>(sample * info.volume); } break; + } case 2: { // 2 channel is played as is samples = std::move(new_samples); + const std::size_t sample_count = (samples.size() / 2); + for (std::size_t index = 0; index < sample_count; ++index) { + const std::size_t index_l = index * 2; + const std::size_t index_r = index * 2 + 1; + + auto sample_l = static_cast<float>(samples[index_l]); + auto sample_r = static_cast<float>(samples[index_r]); + + if (voice_resources[0]->in_use) { + sample_l *= voice_resources[0]->mix_volumes[0]; + } + + if (voice_resources[1]->in_use) { + sample_r *= voice_resources[1]->mix_volumes[1]; + } + + samples[index_l] = static_cast<s16>(sample_l * info.volume); + samples[index_r] = static_cast<s16>(sample_r * info.volume); + } + break; + } + case 6: { + samples.resize((new_samples.size() / 6) * 2); + const std::size_t sample_count = samples.size() / 2; + + for (std::size_t index = 0; index < sample_count; ++index) { + auto FL = static_cast<float>(new_samples[index * 6]); + auto FR = static_cast<float>(new_samples[index * 6 + 1]); + auto FC = static_cast<float>(new_samples[index * 6 + 2]); + auto BL = static_cast<float>(new_samples[index * 6 + 4]); + auto BR = static_cast<float>(new_samples[index * 6 + 5]); + + if (voice_resources[0]->in_use) { + FL *= voice_resources[0]->mix_volumes[0]; + } + if (voice_resources[1]->in_use) { + FR *= voice_resources[1]->mix_volumes[1]; + } + if (voice_resources[2]->in_use) { + FC *= voice_resources[2]->mix_volumes[2]; + } + if (voice_resources[4]->in_use) { + BL *= voice_resources[4]->mix_volumes[4]; + } + if (voice_resources[5]->in_use) { + BR *= voice_resources[5]->mix_volumes[5]; + } + + samples[index * 2] = + static_cast<s16>((0.3694f * FL + 0.2612f * FC + 0.3694f * BL) * info.volume); + samples[index * 2 + 1] = + static_cast<s16>((0.3694f * FR + 0.2612f * FC + 0.3694f * BR) * info.volume); + } break; } default: @@ -352,11 +421,17 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { if (!voice.IsPlaying()) { continue; } + VoiceChannelHolder resources{}; + for (u32 channel = 0; channel < voice.GetInfo().channel_count; channel++) { + const auto channel_resource_id = voice.GetInfo().voice_channel_resource_ids[channel]; + resources[channel] = &voice_resources[channel_resource_id]; + } std::size_t offset{}; s64 samples_remaining{BUFFER_SIZE}; while (samples_remaining > 0) { - const std::vector<s16> samples{voice.DequeueSamples(samples_remaining, memory)}; + const std::vector<s16> samples{ + voice.DequeueSamples(samples_remaining, memory, resources)}; if (samples.empty()) { break; diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index b42770fae..1f9114c07 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -9,6 +9,7 @@ #include <vector> #include "audio_core/behavior_info.h" +#include "audio_core/common.h" #include "audio_core/stream.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -116,6 +117,14 @@ struct WaveBuffer { }; static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size"); +struct VoiceResourceInformation { + s32_le id{}; + std::array<float_le, MAX_MIX_BUFFERS> mix_volumes{}; + bool in_use{}; + INSERT_PADDING_BYTES(11); +}; +static_assert(sizeof(VoiceResourceInformation) == 0x70, "VoiceResourceInformation has wrong size"); + struct VoiceInfo { u32_le id; u32_le node_id; @@ -244,6 +253,7 @@ private: AudioRendererParameter worker_params; std::shared_ptr<Kernel::WritableEvent> buffer_event; std::vector<VoiceState> voices; + std::vector<VoiceResourceInformation> voice_resources; std::vector<EffectState> effects; std::unique_ptr<AudioOut> audio_out; StreamPtr stream; diff --git a/src/audio_core/common.h b/src/audio_core/common.h index 98478b66b..7bb145c53 100644 --- a/src/audio_core/common.h +++ b/src/audio_core/common.h @@ -14,6 +14,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41}; } constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); +constexpr std::size_t MAX_MIX_BUFFERS = 24; static constexpr u32 VersionFromRevision(u32_le rev) { // "REV7" -> 7 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index d1ec8ff08..e6769a5f3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -148,6 +148,8 @@ add_library(common STATIC thread.h thread_queue_list.h threadsafe_queue.h + time_zone.cpp + time_zone.h timer.cpp timer.h uint128.cpp diff --git a/src/common/time_zone.cpp b/src/common/time_zone.cpp new file mode 100644 index 000000000..ce239eb63 --- /dev/null +++ b/src/common/time_zone.cpp @@ -0,0 +1,49 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <iomanip> +#include <sstream> + +#include "common/logging/log.h" +#include "common/time_zone.h" + +namespace Common::TimeZone { + +std::string GetDefaultTimeZone() { + return "GMT"; +} + +static std::string GetOsTimeZoneOffset() { + const std::time_t t{std::time(nullptr)}; + const std::tm tm{*std::localtime(&t)}; + + std::stringstream ss; + ss << std::put_time(&tm, "%z"); // Get the current timezone offset, e.g. "-400", as a string + + return ss.str(); +} + +static int ConvertOsTimeZoneOffsetToInt(const std::string& timezone) { + try { + return std::stoi(timezone); + } catch (const std::invalid_argument&) { + LOG_CRITICAL(Common, "invalid_argument with {}!", timezone); + return 0; + } catch (const std::out_of_range&) { + LOG_CRITICAL(Common, "out_of_range with {}!", timezone); + return 0; + } +} + +std::chrono::seconds GetCurrentOffsetSeconds() { + const int offset{ConvertOsTimeZoneOffsetToInt(GetOsTimeZoneOffset())}; + + int seconds{(offset / 100) * 60 * 60}; // Convert hour component to seconds + seconds += (offset % 100) * 60; // Convert minute component to seconds + + return std::chrono::seconds{seconds}; +} + +} // namespace Common::TimeZone diff --git a/src/common/time_zone.h b/src/common/time_zone.h new file mode 100644 index 000000000..945daa09c --- /dev/null +++ b/src/common/time_zone.h @@ -0,0 +1,18 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <string> + +namespace Common::TimeZone { + +/// Gets the default timezone, i.e. "GMT" +std::string GetDefaultTimeZone(); + +/// Gets the offset of the current timezone (from the default), in seconds +std::chrono::seconds GetCurrentOffsetSeconds(); + +} // namespace Common::TimeZone diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp index f155a1341..63cd2eead 100644 --- a/src/core/file_sys/control_metadata.cpp +++ b/src/core/file_sys/control_metadata.cpp @@ -95,6 +95,10 @@ u32 NACP::GetSupportedLanguages() const { return raw.supported_languages; } +u64 NACP::GetDeviceSaveDataSize() const { + return raw.device_save_data_size; +} + std::vector<u8> NACP::GetRawBytes() const { std::vector<u8> out(sizeof(RawNACP)); std::memcpy(out.data(), &raw, sizeof(RawNACP)); diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h index 2d8c251ac..e37b2fadf 100644 --- a/src/core/file_sys/control_metadata.h +++ b/src/core/file_sys/control_metadata.h @@ -113,6 +113,7 @@ public: u32 GetSupportedLanguages() const; std::vector<u8> GetRawBytes() const; bool GetUserAccountSwitchLock() const; + u64 GetDeviceSaveDataSize() const; private: RawNACP raw{}; diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp index f3def93ab..adfd2c1a4 100644 --- a/src/core/file_sys/savedata_factory.cpp +++ b/src/core/file_sys/savedata_factory.cpp @@ -57,7 +57,8 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) { bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) { return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage || (space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User - desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0); + (desc.type == SaveDataType::SaveData || desc.type == SaveDataType::DeviceSaveData) && + desc.title_id == 0 && desc.save_id == 0); } } // Anonymous namespace @@ -139,8 +140,10 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ u128 user_id, u64 save_id) { // According to switchbrew, if a save is of type SaveData and the title id field is 0, it should // be interpreted as the title id of the current process. - if (type == SaveDataType::SaveData && title_id == 0) { - title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); + if (type == SaveDataType::SaveData || type == SaveDataType::DeviceSaveData) { + if (title_id == 0) { + title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); + } } std::string out = GetSaveDataSpaceIdPath(space); diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp index 6e22f97b0..aa313de66 100644 --- a/src/core/file_sys/system_archive/system_version.cpp +++ b/src/core/file_sys/system_archive/system_version.cpp @@ -12,17 +12,17 @@ namespace SystemVersionData { // This section should reflect the best system version to describe yuzu's HLE api. // TODO(DarkLordZach): Update when HLE gets better. -constexpr u8 VERSION_MAJOR = 5; -constexpr u8 VERSION_MINOR = 1; -constexpr u8 VERSION_MICRO = 0; +constexpr u8 VERSION_MAJOR = 10; +constexpr u8 VERSION_MINOR = 0; +constexpr u8 VERSION_MICRO = 2; -constexpr u8 REVISION_MAJOR = 3; +constexpr u8 REVISION_MAJOR = 1; constexpr u8 REVISION_MINOR = 0; constexpr char PLATFORM_STRING[] = "NX"; -constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd"; -constexpr char DISPLAY_VERSION[] = "5.1.0"; -constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0"; +constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc"; +constexpr char DISPLAY_VERSION[] = "10.0.2"; +constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0"; } // namespace SystemVersionData diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index eda466a5d..9a081fbd4 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -46,7 +46,7 @@ private: EmuWindow::EmuWindow() { // TODO: Find a better place to set this. config.min_client_area_size = - std::make_pair(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); + std::make_pair(Layout::MinimumSize::Width, Layout::MinimumSize::Height); active_config = config; touch_state = std::make_shared<TouchState>(); Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state); diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 15ecfb13d..91ecc30ab 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -8,6 +8,11 @@ namespace Layout { +namespace MinimumSize { +constexpr u32 Width = 640; +constexpr u32 Height = 360; +} // namespace MinimumSize + namespace ScreenUndocked { constexpr u32 Width = 1280; constexpr u32 Height = 720; diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index f6503fe2f..20c331b77 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -767,7 +767,7 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter) {1014, nullptr, "OutputMultiProgramTagAccessLog"}, {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, - {1200, nullptr, "OpenMultiCommitManager"}, + {1200, &FSP_SRV::OpenMultiCommitManager, "OpenMultiCommitManager"}, {1300, nullptr, "OpenBisWiper"}, }; // clang-format on @@ -988,4 +988,40 @@ void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) { rb.Push(access_log_program_index); } +class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> { +public: + explicit IMultiCommitManager() : ServiceFramework("IMultiCommitManager") { + static const FunctionInfo functions[] = { + {1, &IMultiCommitManager::Add, "Add"}, + {2, &IMultiCommitManager::Commit, "Commit"}, + }; + RegisterHandlers(functions); + } + +private: + FileSys::VirtualFile backend; + + void Add(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_FS, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void Commit(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_FS, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } +}; + +void FSP_SRV::OpenMultiCommitManager(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_FS, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IMultiCommitManager>(std::make_shared<IMultiCommitManager>()); +} + } // namespace Service::FileSystem diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index d52b55999..dfb3e395b 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h @@ -50,6 +50,7 @@ private: void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx); + void OpenMultiCommitManager(Kernel::HLERequestContext& ctx); FileSystemController& fsc; diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 358cb9329..9a8d354ba 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -38,10 +38,11 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, cur_entry.sampling_number = last_entry.sampling_number + 1; cur_entry.sampling_number2 = cur_entry.sampling_number; + cur_entry.key.fill(0); + cur_entry.modifier = 0; + for (std::size_t i = 0; i < keyboard_keys.size(); ++i) { - for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) { - cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k); - } + cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE)); } for (std::size_t i = 0; i < keyboard_mods.size(); ++i) { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 5559587e3..c84cb1483 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -157,7 +157,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {11, &Hid::ActivateTouchScreen, "ActivateTouchScreen"}, {21, &Hid::ActivateMouse, "ActivateMouse"}, {31, &Hid::ActivateKeyboard, "ActivateKeyboard"}, - {32, nullptr, "SendKeyboardLockKeyEvent"}, + {32, &Hid::SendKeyboardLockKeyEvent, "SendKeyboardLockKeyEvent"}, {40, nullptr, "AcquireXpadIdEventHandle"}, {41, nullptr, "ReleaseXpadIdEventHandle"}, {51, &Hid::ActivateXpad, "ActivateXpad"}, @@ -871,6 +871,15 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto flags{rp.Pop<u32>()}; + LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + class HidDbg final : public ServiceFramework<HidDbg> { public: explicit HidDbg() : ServiceFramework{"hid:dbg"} { diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 23552efb1..c8ed4ad8b 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -130,6 +130,7 @@ private: void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx); void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); + void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); std::shared_ptr<IAppletResource> applet_resource; Core::System& system; diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 767158444..01ddcdbd6 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -177,7 +177,8 @@ private: void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_NIFM, "called"); - ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size"); + ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, + "SfNetworkProfileData is not the correct size"); u128 uuid{}; auto buffer = ctx.ReadBuffer(); std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index f1e3d832a..caca80dde 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -138,9 +138,7 @@ u32 BufferQueue::Query(QueryType type) { switch (type) { case QueryType::NativeWindowFormat: - // TODO(Subv): Use an enum for this - static constexpr u32 FormatABGR8 = 1; - return FormatABGR8; + return static_cast<u32>(PixelFormat::RGBA8888); } UNIMPLEMENTED(); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index d5f31e567..8a837e5aa 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -66,6 +66,16 @@ public: Rotate270 = 0x07, }; + enum class PixelFormat : u32 { + RGBA8888 = 1, + RGBX8888 = 2, + RGB888 = 3, + RGB565 = 4, + BGRA8888 = 5, + RGBA5551 = 6, + RRGBA4444 = 7, + }; + struct Buffer { enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 }; diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp index 9d6c55865..b4dfe45e5 100644 --- a/src/core/hle/service/time/time_manager.cpp +++ b/src/core/hle/service/time/time_manager.cpp @@ -5,6 +5,7 @@ #include <chrono> #include <ctime> +#include "common/time_zone.h" #include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h" #include "core/hle/service/time/local_system_clock_context_writer.h" #include "core/hle/service/time/network_system_clock_context_writer.h" @@ -21,8 +22,16 @@ static std::chrono::seconds GetSecondsSinceEpoch() { Settings::values.custom_rtc_differential; } +static s64 GetExternalTimeZoneOffset() { + // With "auto" timezone setting, we use the external system's timezone offset + if (Settings::GetTimeZoneString() == "auto") { + return Common::TimeZone::GetCurrentOffsetSeconds().count(); + } + return 0; +} + static s64 GetExternalRtcValue() { - return GetSecondsSinceEpoch().count(); + return GetSecondsSinceEpoch().count() + GetExternalTimeZoneOffset(); } TimeManager::TimeManager(Core::System& system) diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 78d4acd95..c070d6e97 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp @@ -5,6 +5,7 @@ #include <sstream> #include "common/logging/log.h" +#include "common/time_zone.h" #include "core/core.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" @@ -14,6 +15,7 @@ #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/time/time_manager.h" #include "core/hle/service/time/time_zone_content_manager.h" +#include "core/settings.h" namespace Service::Time::TimeZone { @@ -68,10 +70,22 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system) : system{system}, location_name_cache{BuildLocationNameCache(system)} { - if (FileSys::VirtualFile vfs_file; GetTimeZoneInfoFile("GMT", vfs_file) == RESULT_SUCCESS) { + + std::string location_name; + const auto timezone_setting = Settings::GetTimeZoneString(); + if (timezone_setting == "auto") { + location_name = Common::TimeZone::GetDefaultTimeZone(); + } else if (timezone_setting == "default") { + location_name = location_name; + } else { + location_name = timezone_setting; + } + + if (FileSys::VirtualFile vfs_file; + GetTimeZoneInfoFile(location_name, vfs_file) == RESULT_SUCCESS) { const auto time_point{ time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)}; - time_manager.SetupTimeZoneManager("GMT", time_point, location_name_cache.size(), {}, + time_manager.SetupTimeZoneManager(location_name, time_point, location_name_cache.size(), {}, vfs_file); } else { time_zone_manager.MarkAsInitialized(); @@ -114,6 +128,12 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati vfs_file = zoneinfo_dir->GetFile(location_name); if (!vfs_file) { + LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.", + time_zone_binary_titleid, location_name); + vfs_file = zoneinfo_dir->GetFile(Common::TimeZone::GetDefaultTimeZone()); + } + + if (!vfs_file) { LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"!", time_zone_binary_titleid, location_name); return ERROR_TIME_NOT_FOUND; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 2b0bdc4d3..4edff9cd8 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -63,6 +63,21 @@ const std::array<const char*, NumMouseButtons> mapping = {{ Values values = {}; +std::string GetTimeZoneString() { + static constexpr std::array<const char*, 46> timezones{{ + "auto", "default", "CET", "CST6CDT", "Cuba", "EET", "Egypt", "Eire", + "EST", "EST5EDT", "GB", "GB-Eire", "GMT", "GMT+0", "GMT-0", "GMT0", + "Greenwich", "Hongkong", "HST", "Iceland", "Iran", "Israel", "Jamaica", "Japan", + "Kwajalein", "Libya", "MET", "MST", "MST7MDT", "Navajo", "NZ", "NZ-CHAT", + "Poland", "Portugal", "PRC", "PST8PDT", "ROC", "ROK", "Singapore", "Turkey", + "UCT", "Universal", "UTC", "W-SU", "WET", "Zulu", + }}; + + ASSERT(Settings::values.time_zone_index < timezones.size()); + + return timezones[Settings::values.time_zone_index]; +} + void Apply() { GDBStub::SetServerPort(values.gdbstub_port); GDBStub::ToggleServer(values.use_gdbstub); @@ -87,6 +102,7 @@ void LogSettings() { LogSetting("System_CurrentUser", Settings::values.current_user); LogSetting("System_LanguageIndex", Settings::values.language_index); LogSetting("System_RegionIndex", Settings::values.region_index); + LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index); LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); @@ -96,6 +112,7 @@ void LogSettings() { LogSetting("Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation); LogSetting("Renderer_UseVsync", Settings::values.use_vsync); + LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); LogSetting("Audio_OutputEngine", Settings::values.sink_id); LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); diff --git a/src/core/settings.h b/src/core/settings.h index 163900f0b..78eb33737 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -394,6 +394,7 @@ struct Values { s32 current_user; s32 language_index; s32 region_index; + s32 time_zone_index; s32 sound_index; // Controls @@ -445,6 +446,7 @@ struct Values { GPUAccuracy gpu_accuracy; bool use_asynchronous_gpu_emulation; bool use_vsync; + bool use_assembly_shaders; bool force_30fps_mode; bool use_fast_gpu_time; @@ -490,6 +492,9 @@ struct Values { bool IsGPULevelExtreme(); bool IsGPULevelHigh(); +std::string GetTimeZoneString(); + void Apply(); void LogSettings(); + } // namespace Settings diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 1c3b03a1c..c781b3cfc 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); + AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d23c53843..f00c71dae 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h + buffer_cache/map_interval.cpp buffer_cache/map_interval.h dirty_flags.cpp dirty_flags.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 56e570994..d9a4a1b4d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,11 +12,12 @@ #include <utility> #include <vector> -#include <boost/icl/interval_map.hpp> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_set.hpp> -#include <boost/range/iterator_range.hpp> +#include <boost/intrusive/set.hpp> #include "common/alignment.h" +#include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" @@ -29,10 +30,12 @@ namespace VideoCommon { -using MapInterval = std::shared_ptr<MapIntervalBase>; - template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> class BufferCache { + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalType = typename IntervalSet::interval_type; + using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; + public: using BufferInfo = std::pair<BufferType, u64>; @@ -40,14 +43,12 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const std::optional<VAddr> cpu_addr_opt = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - - VAddr cpu_addr = *cpu_addr_opt; + const VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. @@ -77,16 +78,19 @@ public: } } - auto block = GetBlock(cpu_addr, size); - auto map = MapAddress(block, gpu_addr, cpu_addr, size); + OwnerBuffer block = GetBlock(cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); + if (!map) { + return {GetEmptyBuffer(size), 0}; + } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { MarkForAsyncFlush(map); } - if (!map->IsWritten()) { - map->MarkAsWritten(true); - MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (!map->is_written) { + map->is_written = true; + MarkRegionAsWritten(map->start, map->end - 1); } } @@ -132,12 +136,11 @@ public: void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& object : objects) { - if (object->IsModified() && object->IsRegistered()) { + VectorMapInterval objects = GetMapsInRange(addr, size); + std::sort(objects.begin(), objects.end(), + [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); + for (MapInterval* object : objects) { + if (object->is_modified && object->is_registered) { mutex.unlock(); FlushMap(object); mutex.lock(); @@ -148,9 +151,9 @@ public: bool MustFlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - const std::vector<MapInterval> objects = GetMapsInRange(addr, size); - return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { - return map->IsModified() && map->IsRegistered(); + const VectorMapInterval objects = GetMapsInRange(addr, size); + return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { + return map->is_modified && map->is_registered; }); } @@ -158,9 +161,8 @@ public: void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - for (auto& object : objects) { - if (object->IsRegistered()) { + for (auto& object : GetMapsInRange(addr, size)) { + if (object->is_registered) { Unregister(object); } } @@ -169,10 +171,10 @@ public: void OnCPUWrite(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - for (const auto& object : GetMapsInRange(addr, size)) { - if (object->IsMemoryMarked() && object->IsRegistered()) { + for (MapInterval* object : GetMapsInRange(addr, size)) { + if (object->is_memory_marked && object->is_registered) { UnmarkMemory(object); - object->SetSyncPending(true); + object->is_sync_pending = true; marked_for_unregister.emplace_back(object); } } @@ -181,9 +183,9 @@ public: void SyncGuestHost() { std::lock_guard lock{mutex}; - for (const auto& object : marked_for_unregister) { - if (object->IsRegistered()) { - object->SetSyncPending(false); + for (auto& object : marked_for_unregister) { + if (object->is_registered) { + object->is_sync_pending = false; Unregister(object); } } @@ -192,9 +194,9 @@ public: void CommitAsyncFlushes() { if (uncommitted_flushes) { - auto commit_list = std::make_shared<std::list<MapInterval>>(); - for (auto& map : *uncommitted_flushes) { - if (map->IsRegistered() && map->IsModified()) { + auto commit_list = std::make_shared<std::list<MapInterval*>>(); + for (MapInterval* map : *uncommitted_flushes) { + if (map->is_registered && map->is_modified) { // TODO(Blinkhawk): Implement backend asynchronous flushing // AsyncFlushMap(map) commit_list->push_back(map); @@ -228,8 +230,8 @@ public: committed_flushes.pop_front(); return; } - for (MapInterval& map : *flush_list) { - if (map->IsRegistered()) { + for (MapInterval* map : *flush_list) { + if (map->is_registered) { // TODO(Blinkhawk): Replace this for reading the asynchronous flush FlushMap(map); } @@ -265,61 +267,60 @@ protected: } /// Register an object into the cache - void Register(const MapInterval& new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map->GetStart(); + MapInterval* Register(MapInterval new_map, bool inherit_written = false) { + const VAddr cpu_addr = new_map.start; if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map->GetGpuAddress()); - return; + new_map.gpu_addr); + return nullptr; } - const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->MarkAsRegistered(true); - const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; - mapped_addresses.insert({interval, new_map}); + const std::size_t size = new_map.end - new_map.start; + new_map.is_registered = true; rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - new_map->SetMemoryMarked(true); + new_map.is_memory_marked = true; if (inherit_written) { - MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); - new_map->MarkAsWritten(true); + MarkRegionAsWritten(new_map.start, new_map.end - 1); + new_map.is_written = true; } + MapInterval* const storage = mapped_addresses_allocator.Allocate(); + *storage = new_map; + mapped_addresses.insert(*storage); + return storage; } - void UnmarkMemory(const MapInterval& map) { - if (!map->IsMemoryMarked()) { + void UnmarkMemory(MapInterval* map) { + if (!map->is_memory_marked) { return; } - const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); - map->SetMemoryMarked(false); + const std::size_t size = map->end - map->start; + rasterizer.UpdatePagesCachedCount(map->start, size, -1); + map->is_memory_marked = false; } /// Unregisters an object from the cache - void Unregister(const MapInterval& map) { + void Unregister(MapInterval* map) { UnmarkMemory(map); - map->MarkAsRegistered(false); - if (map->IsSyncPending()) { + map->is_registered = false; + if (map->is_sync_pending) { + map->is_sync_pending = false; marked_for_unregister.remove(map); - map->SetSyncPending(false); } - if (map->IsWritten()) { - UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (map->is_written) { + UnmarkRegionAsWritten(map->start, map->end - 1); } - const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; - mapped_addresses.erase(delete_interval); + const auto it = mapped_addresses.find(*map); + ASSERT(it != mapped_addresses.end()); + mapped_addresses.erase(it); + mapped_addresses_allocator.Release(map); } private: - MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { - return std::make_shared<MapIntervalBase>(start, end, gpu_addr); - } - - MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, - const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); + MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, + std::size_t size) { + const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); @@ -328,13 +329,12 @@ private: memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); } - Register(new_map); - return new_map; + return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { - MapInterval& current_map = overlaps[0]; + MapInterval* const current_map = overlaps[0]; if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } @@ -344,35 +344,39 @@ private: bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters - for (auto& overlap : overlaps) { - new_start = std::min(overlap->GetStart(), new_start); - new_end = std::max(overlap->GetEnd(), new_end); - write_inheritance |= overlap->IsWritten(); - modified_inheritance |= overlap->IsModified(); + for (MapInterval* overlap : overlaps) { + new_start = std::min(overlap->start, new_start); + new_end = std::max(overlap->end, new_end); + write_inheritance |= overlap->is_written; + modified_inheritance |= overlap->is_modified; } GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } UpdateBlock(block, new_start, new_end, overlaps); - MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + + const MapInterval new_map{new_start, new_end, new_gpu_addr}; + MapInterval* const map = Register(new_map, write_inheritance); + if (!map) { + return nullptr; + } if (modified_inheritance) { - new_map->MarkAsModified(true, GetModifiedTicks()); + map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { - MarkForAsyncFlush(new_map); + MarkForAsyncFlush(map); } } - Register(new_map, write_inheritance); - return new_map; + return map; } void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, - std::vector<MapInterval>& overlaps) { + const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); for (auto& overlap : overlaps) { - const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; + const IntervalType subtract{overlap->start, overlap->end}; interval_set.subtract(subtract); } for (auto& interval : interval_set) { @@ -386,18 +390,24 @@ private: } } - std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval result; if (size == 0) { - return {}; + return result; } - std::vector<MapInterval> objects{}; - const IntervalType interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.push_back(pair.second); + const VAddr addr_end = addr + size; + auto it = mapped_addresses.lower_bound(addr); + if (it != mapped_addresses.begin()) { + --it; } - - return objects; + while (it != mapped_addresses.end() && it->start < addr_end) { + if (it->Overlaps(addr, addr_end)) { + result.push_back(&*it); + } + ++it; + } + return result; } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -405,12 +415,12 @@ private: return ++modified_ticks; } - void FlushMap(MapInterval map) { - std::size_t size = map->GetEnd() - map->GetStart(); - OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; + void FlushMap(MapInterval* map) { + const std::size_t size = map->end - map->start; + OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); + DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -515,7 +525,7 @@ private: } else { written_pages[page_start] = 1; } - page_start++; + ++page_start; } } @@ -531,7 +541,7 @@ private: written_pages.erase(it); } } - page_start++; + ++page_start; } } @@ -542,14 +552,14 @@ private: if (written_pages.count(page_start) > 0) { return true; } - page_start++; + ++page_start; } return false; } - void MarkForAsyncFlush(MapInterval& map) { + void MarkForAsyncFlush(MapInterval* map) { if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); + uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); } uncommitted_flushes->insert(map); } @@ -566,10 +576,9 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<VAddr>; - using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; - using IntervalType = typename IntervalCache::interval_type; - IntervalCache mapped_addresses; + MapIntervalAllocator mapped_addresses_allocator; + boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> + mapped_addresses; static constexpr u64 write_page_bit = 11; std::unordered_map<u64, u32> written_pages; @@ -583,10 +592,10 @@ private: u64 modified_ticks = 0; std::vector<u8> staging_buffer; - std::list<MapInterval> marked_for_unregister; + std::list<MapInterval*> marked_for_unregister; - std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; - std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; + std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; + std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> +#include <memory> + +#include "video_core/buffer_cache/map_interval.h" + +namespace VideoCommon { + +MapIntervalAllocator::MapIntervalAllocator() { + FillFreeList(first_chunk); +} + +MapIntervalAllocator::~MapIntervalAllocator() = default; + +void MapIntervalAllocator::AllocateNewChunk() { + *new_chunk = std::make_unique<Chunk>(); + FillFreeList(**new_chunk); + new_chunk = &(*new_chunk)->next; +} + +void MapIntervalAllocator::FillFreeList(Chunk& chunk) { + const std::size_t old_size = free_list.size(); + free_list.resize(old_size + chunk.data.size()); + std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, + [](MapInterval& interval) { return &interval; }); +} + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 29d8b26f3..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,104 +4,89 @@ #pragma once +#include <array> +#include <cstddef> +#include <memory> +#include <vector> + +#include <boost/intrusive/set_hook.hpp> + #include "common/common_types.h" #include "video_core/gpu.h" namespace VideoCommon { -class MapIntervalBase { -public: - MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) - : start{start}, end{end}, gpu_addr{gpu_addr} {} - - void SetCpuAddress(VAddr new_cpu_addr) { - cpu_addr = new_cpu_addr; - } - - VAddr GetCpuAddress() const { - return cpu_addr; - } - - GPUVAddr GetGpuAddress() const { - return gpu_addr; - } - - bool IsInside(const VAddr other_start, const VAddr other_end) const { - return (start <= other_start && other_end <= end); - } - - bool operator==(const MapIntervalBase& rhs) const { - return std::tie(start, end) == std::tie(rhs.start, rhs.end); - } - - bool operator!=(const MapIntervalBase& rhs) const { - return !operator==(rhs); - } +struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { + MapInterval() = default; - void MarkAsRegistered(const bool registered) { - is_registered = registered; - } + /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} - bool IsRegistered() const { - return is_registered; - } + explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept + : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; + bool IsInside(VAddr other_start, VAddr other_end) const noexcept { + return start <= other_start && other_end <= end; } - bool IsMemoryMarked() const { - return is_memory_marked; + bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { + return start < other_end && other_start < end; } - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } + void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { + is_modified = is_modified_; + ticks = ticks_; + } + + boost::intrusive::set_member_hook<> member_hook_; + VAddr start = 0; + VAddr end = 0; + GPUVAddr gpu_addr = 0; + u64 ticks = 0; + bool is_written = false; + bool is_modified = false; + bool is_registered = false; + bool is_memory_marked = false; + bool is_sync_pending = false; +}; - bool IsSyncPending() const { - return is_sync_pending; +struct MapIntervalCompare { + constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { + return lhs.start < rhs.start; } +}; - VAddr GetStart() const { - return start; - } +class MapIntervalAllocator { +public: + MapIntervalAllocator(); + ~MapIntervalAllocator(); - VAddr GetEnd() const { - return end; + MapInterval* Allocate() { + if (free_list.empty()) { + AllocateNewChunk(); + } + MapInterval* const interval = free_list.back(); + free_list.pop_back(); + return interval; } - void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_; - ticks = tick; + void Release(MapInterval* interval) { + free_list.push_back(interval); } - bool IsModified() const { - return is_modified; - } +private: + struct Chunk { + std::unique_ptr<Chunk> next; + std::array<MapInterval, 0x8000> data; + }; - u64 GetModificationTick() const { - return ticks; - } + void AllocateNewChunk(); - void MarkAsWritten(const bool is_written_) { - is_written = is_written_; - } + void FillFreeList(Chunk& chunk); - bool IsWritten() const { - return is_written; - } + std::vector<MapInterval*> free_list; + std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; -private: - VAddr start; - VAddr end; - GPUVAddr gpu_addr; - VAddr cpu_addr{}; - bool is_written{}; - bool is_modified{}; - bool is_registered{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u64 ticks{}; + Chunk first_chunk; }; } // namespace VideoCommon diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index bdc023d54..f2f96ac33 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -54,9 +54,7 @@ bool DmaPusher::Step() { return true; }); const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; - GPUVAddr dma_get = command_list_header.addr; - GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); - bool non_main = command_list_header.is_non_main; + const GPUVAddr dma_get = command_list_header.addr; if (dma_pushbuffer_subindex >= command_list.size()) { // We've gone through the current list, remove it from the queue @@ -133,11 +131,6 @@ bool DmaPusher::Step() { index++; } - if (!non_main) { - // TODO (degasus): This is dead code, as dma_mget is never read. - dma_mget = dma_put; - } - return true; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index e8b714e94..efa90d170 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -102,7 +102,6 @@ private: DmaState dma_state{}; bool dma_increment_once{}; - GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8dae754d4..e7cb87589 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -168,18 +168,22 @@ enum class Pred : u64 { }; enum class PredCondition : u64 { - LessThan = 1, - Equal = 2, - LessEqual = 3, - GreaterThan = 4, - NotEqual = 5, - GreaterEqual = 6, - LessThanWithNan = 9, - LessEqualWithNan = 11, - GreaterThanWithNan = 12, - NotEqualWithNan = 13, - GreaterEqualWithNan = 14, - // TODO(Subv): Other condition types + F = 0, // Always false + LT = 1, // Ordered less than + EQ = 2, // Ordered equal + LE = 3, // Ordered less than or equal + GT = 4, // Ordered greater than + NE = 5, // Ordered not equal + GE = 6, // Ordered greater than or equal + NUM = 7, // Ordered + NAN_ = 8, // Unordered + LTU = 9, // Unordered less than + EQU = 10, // Unordered equal + LEU = 11, // Unordered less than or equal + GTU = 12, // Unordered greater than + NEU = 13, // Unordered not equal + GEU = 14, // Unordered greater than or equal + T = 15, // Always true }; enum class PredOperation : u64 { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d2cab50bd..9964ea894 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d83dca25a..466a911db 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -13,6 +13,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" +#include "core/settings.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_precise_bug = TestPreciseBug(); has_broken_compute = is_intel_proprietary; has_fast_buffer_sub_data = is_nvidia; + use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && + GLAD_GL_NV_compute_program5; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); + + if (Settings::values.use_assembly_shaders && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + } } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a55050cb5..e915dbd86 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -88,6 +88,10 @@ public: return has_fast_buffer_sub_data; } + bool UseAssemblyShaders() const { + return use_assembly_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -107,6 +111,7 @@ private: bool has_precise_bug{}; bool has_broken_compute{}; bool has_fast_buffer_sub_data{}; + bool use_assembly_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 99ddcb3f8..ec5421afa 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 467891457..8116a5daa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) { } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info, GLShader::ProgramManager& program_manager, - StateTracker& state_tracker) - : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, + const Device& device, ScreenInfo& info, + ProgramManager& program_manager, StateTracker& state_tracker) + : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, + state_tracker}, shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { CheckExtensions(); + + if (device.UseAssemblyShaders()) { + glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); + for (const GLuint cbuf : staging_cbufs) { + glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), + nullptr, 0); + } + } } -RasterizerOpenGL::~RasterizerOpenGL() {} +RasterizerOpenGL::~RasterizerOpenGL() { + if (device.UseAssemblyShaders()) { + glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); + } +} void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { @@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); + std::size_t num_ssbos = 0; u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { Shader shader{shader_cache.GetStageProgram(program)}; + if (device.UseAssemblyShaders()) { + // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this + // all stages share the same bindings. + const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); + ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); + num_ssbos += num_stage_ssbos; + } + // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); @@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncFramebufferSRGB(); buffer_cache.Acquire(); + current_cbuf = 0; std::size_t buffer_size = CalculateVertexArraysSize(); @@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Uniform space for the 5 shader stages - buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + - (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * - Maxwell::MaxShaderStage; + buffer_size = + Common::AlignUp<std::size_t>(buffer_size, 4) + + (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * @@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Setup emulation uniform buffer. - GLShader::MaxwellUniformData ubo; - ubo.SetFromRegs(gpu); - const auto [buffer, offset] = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + if (!device.UseAssemblyShaders()) { + MaxwellUniformData ubo; + ubo.SetFromRegs(gpu); + const auto [buffer, offset] = + buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + static_cast<GLsizeiptr>(sizeof(ubo))); + } // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } buffer_cache.Acquire(); + current_cbuf = 0; auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); - program_manager.BindComputeShader(kernel->GetHandle()); const std::size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * @@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { + static constexpr std::array PARAMETER_LUT = { + GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, + GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, + GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; + MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; - u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; + u32 binding = + device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; for (const auto& entry : shader->GetEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(binding++, buffer, entry); + SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); } } @@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(binding++, buffer, entry); + SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); } } -void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, +void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, + const Tegra::Engines::ConstBufferInfo& buffer, const ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, - sizeof(float)); + if (device.UseAssemblyShaders()) { + glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); + } else { + glBindBufferRange(GL_UNIFORM_BUFFER, binding, + buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + } return; } @@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); const auto alignment = device.GetUniformBufferAlignment(); - const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, - device.HasFastBufferSubData()); - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, + device.HasFastBufferSubData()); + if (!device.UseAssemblyShaders()) { + glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + return; + } + if (offset != 0) { + const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; + glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); + cbuf = staging_cbuf; + offset = 0; + } + glBindBufferRangeNV(stage, binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { @@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; - u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; + u32 binding = + device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; for (const auto& entry : shader->GetEntries().global_memory_entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b94c65907..87f7fe159 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,8 +56,8 @@ struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info, GLShader::ProgramManager& program_manager, - StateTracker& state_tracker); + const Device& device, ScreenInfo& info, + ProgramManager& program_manager, StateTracker& state_tracker); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -106,7 +106,7 @@ private: void SetupComputeConstBuffers(const Shader& kernel); /// Configures a constant buffer. - void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, + void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. @@ -224,7 +224,7 @@ private: void SetupShaders(GLenum primitive_mode); - const Device device; + const Device& device; TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; @@ -236,7 +236,7 @@ private: Core::System& system; ScreenInfo& screen_info; - GLShader::ProgramManager& program_manager; + ProgramManager& program_manager; StateTracker& state_tracker; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; @@ -248,6 +248,12 @@ private: std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> enabled_transform_feedback_buffers; + static constexpr std::size_t NUM_CONSTANT_BUFFERS = + Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; + std::size_t current_cbuf = 0; + /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 97803d480..a787e27d2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -125,6 +125,15 @@ void OGLProgram::Release() { handle = 0; } +void OGLAssemblyProgram::Release() { + if (handle == 0) { + return; + } + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteProgramsARB(1, &handle); + handle = 0; +} + void OGLPipeline::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index de93f4212..f8b322227 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -167,6 +167,22 @@ public: GLuint handle = 0; }; +class OGLAssemblyProgram : private NonCopyable { +public: + OGLAssemblyProgram() = default; + + OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLAssemblyProgram() { + Release(); + } + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + class OGLPipeline : private NonCopyable { public: OGLPipeline() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9759a7078..4cd0f36cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { return {}; } +constexpr GLenum AssemblyEnum(ShaderType shader_type) { + switch (shader_type) { + case ShaderType::Vertex: + return GL_VERTEX_PROGRAM_NV; + case ShaderType::TesselationControl: + return GL_TESS_CONTROL_PROGRAM_NV; + case ShaderType::TesselationEval: + return GL_TESS_EVALUATION_PROGRAM_NV; + case ShaderType::Geometry: + return GL_GEOMETRY_PROGRAM_NV; + case ShaderType::Fragment: + return GL_FRAGMENT_PROGRAM_NV; + case ShaderType::Compute: + return GL_COMPUTE_PROGRAM_NV; + } + return {}; +} + std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); } @@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } -std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, - u64 unique_identifier, const ShaderIR& ir, - const Registry& registry, bool hint_retrievable = false) { +ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, + const ShaderIR& ir, const Registry& registry, + bool hint_retrievable = false) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); + auto program = std::make_shared<ProgramHandle>(); + + if (device.UseAssemblyShaders()) { + const std::string arb = "Not implemented"; + + GLuint& arb_prog = program->assembly_program.handle; + +// Commented out functions signal OpenGL errors but are compatible with apitrace. +// Use them only to capture and replay on apitrace. +#if 0 + glGenProgramsNV(1, &arb_prog); + glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), + reinterpret_cast<const GLubyte*>(arb.data())); +#else + glGenProgramsARB(1, &arb_prog); + glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast<GLsizei>(arb.size()), arb.data()); +#endif + const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + LOG_CRITICAL(Render_OpenGL, "{}", err); + LOG_INFO(Render_OpenGL, "\n{}", arb); + } + } else { + const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); + OGLShader shader; + shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); + + program->source_program.Create(true, hint_retrievable, shader.handle); + } - auto program = std::make_shared<OGLProgram>(); - program->Create(true, hint_retrievable, shader.handle); return program; } @@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() { CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, - ShaderEntries entries, std::shared_ptr<OGLProgram> program) + ShaderEntries entries, ProgramSharedPtr program_) : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, - size_in_bytes{size_in_bytes}, program{std::move(program)} {} + size_in_bytes{size_in_bytes}, program{std::move(program_)} { + // Assign either the assembly program or source program. We can't have both. + handle = program->assembly_program.handle; + if (handle == 0) { + handle = program->source_program.handle; + } + ASSERT(handle != 0); +} CachedShader::~CachedShader() = default; GLuint CachedShader::GetHandle() const { DEBUG_ASSERT(registry->IsConsistent()); - return program->handle; + return handle; } Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, @@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } - const std::vector gl_cache = disk_cache.LoadPrecompiled(); + std::vector<ShaderDiskCachePrecompiled> gl_cache; + if (!device.UseAssemblyShaders()) { + // Only load precompiled cache when we are not using assembly shaders + gl_cache = disk_cache.LoadPrecompiled(); + } const auto supported_formats = GetSupportedFormats(); // Track if precompiled cache was altered during loading to know if we have to @@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, auto registry = MakeRegistry(entry); const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - std::shared_ptr<OGLProgram> program; + ProgramSharedPtr program; if (precompiled_entry) { // If the shader is precompiled, attempt to load it with program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); @@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } + if (device.UseAssemblyShaders()) { + // Don't store precompiled binaries for assembly shaders. + return; + } + // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw // before precompiling them @@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const u64 id = (*transferable)[i].unique_identifier; const auto it = find_precompiled(id); if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->handle; + const GLuint program = runtime_cache.at(id).program->source_program.handle; disk_cache.SavePrecompiled(id, program); precompiled_cache_altered = true; } @@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( +ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set<GLenum>& supported_formats) { if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { @@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( return {}; } - auto program = std::make_shared<OGLProgram>(); - program->handle = glCreateProgram(); - glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(program->handle, precompiled_entry.binary_format, - precompiled_entry.binary.data(), + auto program = std::make_shared<ProgramHandle>(); + GLuint& handle = program->source_program.handle; + handle = glCreateProgram(); + glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), static_cast<GLsizei>(precompiled_entry.binary.size())); GLint link_status; - glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); + glGetProgramiv(handle, GL_LINK_STATUS, &link_status); if (link_status == GL_FALSE) { LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); return {}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 91690b470..b2ae8d7f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -43,8 +43,14 @@ struct UnspecializedShader; using Shader = std::shared_ptr<CachedShader>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct ProgramHandle { + OGLProgram source_program; + OGLAssemblyProgram assembly_program; +}; +using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; + struct PrecompiledShader { - std::shared_ptr<OGLProgram> program; + ProgramSharedPtr program; std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; }; @@ -87,12 +93,13 @@ public: private: explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, - ShaderEntries entries, std::shared_ptr<OGLProgram> program); + ShaderEntries entries, ProgramSharedPtr program); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; std::size_t size_in_bytes = 0; - std::shared_ptr<OGLProgram> program; + ProgramSharedPtr program; + GLuint handle = 0; }; class ShaderCacheOpenGL final : public RasterizerCache<Shader> { @@ -115,7 +122,7 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( + ProgramSharedPtr GeneratePrecompiledProgram( const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set<GLenum>& supported_formats); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 99fd4ae2c..c83a08d42 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1840,34 +1840,40 @@ private: Type::HalfFloat}; } - template <Type type> - Expression LogicalLessThan(Operation operation) { - return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); - } - - template <Type type> - Expression LogicalEqual(Operation operation) { - return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); - } - - template <Type type> - Expression LogicalLessEqual(Operation operation) { - return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); - } - - template <Type type> - Expression LogicalGreaterThan(Operation operation) { - return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); + template <const std::string_view& op, Type type, bool unordered = false> + Expression Comparison(Operation operation) { + static_assert(!unordered || type == Type::Float); + + const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); + + if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { + // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's + // and Nvidia's proprietary stacks. Manually force an ordered comparison. + return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), + VisitOperand(operation, 0).AsFloat(), + VisitOperand(operation, 1).AsFloat()), + Type::Bool}; + } + if constexpr (!unordered) { + return expr; + } + // Unordered comparisons are always true for NaN operands. + return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), + VisitOperand(operation, 0).AsFloat(), + VisitOperand(operation, 1).AsFloat()), + Type::Bool}; } - template <Type type> - Expression LogicalNotEqual(Operation operation) { - return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); + Expression FOrdered(Operation operation) { + return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), + VisitOperand(operation, 1).AsFloat()), + Type::Bool}; } - template <Type type> - Expression LogicalGreaterEqual(Operation operation) { - return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); + Expression FUnordered(Operation operation) { + return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), + VisitOperand(operation, 1).AsFloat()), + Type::Bool}; } Expression LogicalAddCarry(Operation operation) { @@ -2303,6 +2309,18 @@ private: return {"gl_SubGroupInvocationARB", Type::Uint}; } + template <const std::string_view& comparison> + Expression ThreadMask(Operation) { + if (device.HasWarpIntrinsics()) { + return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; + } + if (device.HasShaderBallot()) { + return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; + } + LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); + return {"0U", Type::Uint}; + } + Expression ShuffleIndexed(Operation operation) { std::string value = VisitOperand(operation, 0).AsFloat(); @@ -2324,6 +2342,19 @@ private: Func() = delete; ~Func() = delete; + static constexpr std::string_view LessThan = "<"; + static constexpr std::string_view Equal = "=="; + static constexpr std::string_view LessEqual = "<="; + static constexpr std::string_view GreaterThan = ">"; + static constexpr std::string_view NotEqual = "!="; + static constexpr std::string_view GreaterEqual = ">="; + + static constexpr std::string_view Eq = "Eq"; + static constexpr std::string_view Ge = "Ge"; + static constexpr std::string_view Gt = "Gt"; + static constexpr std::string_view Le = "Le"; + static constexpr std::string_view Lt = "Lt"; + static constexpr std::string_view Add = "Add"; static constexpr std::string_view Min = "Min"; static constexpr std::string_view Max = "Max"; @@ -2425,27 +2456,34 @@ private: &GLSLDecompiler::LogicalPick2, &GLSLDecompiler::LogicalAnd2, - &GLSLDecompiler::LogicalLessThan<Type::Float>, - &GLSLDecompiler::LogicalEqual<Type::Float>, - &GLSLDecompiler::LogicalLessEqual<Type::Float>, - &GLSLDecompiler::LogicalGreaterThan<Type::Float>, - &GLSLDecompiler::LogicalNotEqual<Type::Float>, - &GLSLDecompiler::LogicalGreaterEqual<Type::Float>, - &GLSLDecompiler::LogicalFIsNan, - - &GLSLDecompiler::LogicalLessThan<Type::Int>, - &GLSLDecompiler::LogicalEqual<Type::Int>, - &GLSLDecompiler::LogicalLessEqual<Type::Int>, - &GLSLDecompiler::LogicalGreaterThan<Type::Int>, - &GLSLDecompiler::LogicalNotEqual<Type::Int>, - &GLSLDecompiler::LogicalGreaterEqual<Type::Int>, - - &GLSLDecompiler::LogicalLessThan<Type::Uint>, - &GLSLDecompiler::LogicalEqual<Type::Uint>, - &GLSLDecompiler::LogicalLessEqual<Type::Uint>, - &GLSLDecompiler::LogicalGreaterThan<Type::Uint>, - &GLSLDecompiler::LogicalNotEqual<Type::Uint>, - &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, + &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, + &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, + &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, + &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, + &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, + &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, + &GLSLDecompiler::FOrdered, + &GLSLDecompiler::FUnordered, + &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, + &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, + &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, + &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, + &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, + &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, + + &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, + &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, + &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, + &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, + &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, + &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, + + &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, + &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, + &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, + &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, + &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, + &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, &GLSLDecompiler::LogicalAddCarry, @@ -2534,6 +2572,11 @@ private: &GLSLDecompiler::VoteEqual, &GLSLDecompiler::ThreadId, + &GLSLDecompiler::ThreadMask<Func::Eq>, + &GLSLDecompiler::ThreadMask<Func::Ge>, + &GLSLDecompiler::ThreadMask<Func::Gt>, + &GLSLDecompiler::ThreadMask<Func::Le>, + &GLSLDecompiler::ThreadMask<Func::Lt>, &GLSLDecompiler::ShuffleIndexed, &GLSLDecompiler::MemoryBarrierGL, diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 9c7b0adbd..96605db84 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -6,45 +6,105 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" -namespace OpenGL::GLShader { +namespace OpenGL { -ProgramManager::ProgramManager() = default; +ProgramManager::ProgramManager(const Device& device) { + use_assembly_programs = device.UseAssemblyShaders(); + if (use_assembly_programs) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } else { + graphics_pipeline.Create(); + glBindProgramPipeline(graphics_pipeline.handle); + } +} ProgramManager::~ProgramManager() = default; -void ProgramManager::Create() { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); +void ProgramManager::BindCompute(GLuint program) { + if (use_assembly_programs) { + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } else { + is_graphics_bound = false; + glUseProgram(program); + } } void ProgramManager::BindGraphicsPipeline() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); + if (use_assembly_programs) { + UpdateAssemblyPrograms(); + } else { + UpdateSourcePrograms(); } +} - // Avoid updating the pipeline when values have no changed - if (old_state == current_state) { - return; +void ProgramManager::BindHostPipeline(GLuint pipeline) { + if (use_assembly_programs) { + if (geometry_enabled) { + geometry_enabled = false; + old_state.geometry = 0; + glDisable(GL_GEOMETRY_PROGRAM_NV); + } } + glBindProgramPipeline(pipeline); +} - // Workaround for AMD bug - static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | - GL_FRAGMENT_SHADER_BIT}; - const GLuint handle = graphics_pipeline.handle; - glUseProgramStages(handle, all_used_stages, 0); - glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); - glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); - glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); +void ProgramManager::RestoreGuestPipeline() { + if (use_assembly_programs) { + glBindProgramPipeline(0); + } else { + glBindProgramPipeline(graphics_pipeline.handle); + } +} + +void ProgramManager::UpdateAssemblyPrograms() { + const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { + if (current == old) { + return; + } + if (current == 0) { + if (enabled) { + enabled = false; + glDisable(stage); + } + return; + } + if (!enabled) { + enabled = true; + glEnable(stage); + } + glBindProgramARB(stage, current); + }; + + update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); + update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, + old_state.geometry); + update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, + old_state.fragment); old_state = current_state; } -void ProgramManager::BindComputeShader(GLuint program) { - is_graphics_bound = false; - glUseProgram(program); +void ProgramManager::UpdateSourcePrograms() { + if (!is_graphics_bound) { + is_graphics_bound = true; + glUseProgram(0); + } + + const GLuint handle = graphics_pipeline.handle; + const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { + if (current == old) { + return; + } + glUseProgramStages(handle, stage, current); + }; + update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); + update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); + update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); + + old_state = current_state; } void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { @@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index d2e47f2a9..0f03b4f12 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -11,7 +11,9 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" -namespace OpenGL::GLShader { +namespace OpenGL { + +class Device; /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at @@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384, class ProgramManager { public: - explicit ProgramManager(); + explicit ProgramManager(const Device& device); ~ProgramManager(); - void Create(); + /// Binds a compute program + void BindCompute(GLuint program); - /// Updates the graphics pipeline and binds it. + /// Updates bound programs. void BindGraphicsPipeline(); - /// Binds a compute shader. - void BindComputeShader(GLuint program); + /// Binds an OpenGL pipeline object unsynchronized with the guest state. + void BindHostPipeline(GLuint pipeline); + + /// Rewinds BindHostPipeline state changes. + void RestoreGuestPipeline(); void UseVertexShader(GLuint program) { - current_state.vertex_shader = program; + current_state.vertex = program; } void UseGeometryShader(GLuint program) { - current_state.geometry_shader = program; + current_state.geometry = program; } void UseFragmentShader(GLuint program) { - current_state.fragment_shader = program; + current_state.fragment = program; } private: struct PipelineState { - bool operator==(const PipelineState& rhs) const noexcept { - return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && - geometry_shader == rhs.geometry_shader; - } - - bool operator!=(const PipelineState& rhs) const noexcept { - return !operator==(rhs); - } - - GLuint vertex_shader = 0; - GLuint fragment_shader = 0; - GLuint geometry_shader = 0; + GLuint vertex = 0; + GLuint geometry = 0; + GLuint fragment = 0; }; + /// Update NV_gpu_program5 programs. + void UpdateAssemblyPrograms(); + + /// Update GLSL programs. + void UpdateSourcePrograms(); + OGLPipeline graphics_pipeline; - OGLPipeline compute_pipeline; + PipelineState current_state; PipelineState old_state; + + bool use_assembly_programs = false; + bool is_graphics_bound = true; + + bool vertex_enabled = false; + bool geometry_enabled = false; + bool fragment_enabled = false; }; -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b2a179746..6b489e6db 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -316,7 +316,7 @@ public: RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, Core::Frontend::GraphicsContext& context) : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, - has_debug_tool{HasDebugTool()} {} + program_manager{device}, has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() { vertex_program.Create(true, false, vertex_shader.handle); fragment_program.Create(true, false, fragment_shader.handle); - // Create program pipeline - program_manager.Create(); + pipeline.Create(); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, + rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, program_manager, state_tracker); } @@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.UseVertexShader(vertex_program.handle); - program_manager.UseGeometryShader(0); - program_manager.UseFragmentShader(fragment_program.handle); - program_manager.BindGraphicsPipeline(); + program_manager.BindHostPipeline(pipeline.handle); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + program_manager.RestoreGuestPipeline(); } bool RendererOpenGL::TryPresent(int timeout_ms) { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 50b647661..61bf507f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -95,6 +96,7 @@ private: Core::Frontend::EmuWindow& emu_window; Core::System& system; Core::Frontend::GraphicsContext& context; + const Device device; StateTracker state_tracker{system}; @@ -102,13 +104,14 @@ private: OGLBuffer vertex_buffer; OGLProgram vertex_program; OGLProgram fragment_program; + OGLPipeline pipeline; OGLFramebuffer screenshot_framebuffer; /// Display information for Switch screen ScreenInfo screen_info; /// Global dummy shader pipeline - GLShader::ProgramManager program_manager; + ProgramManager program_manager; /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 6cead3a28..568744e3c 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -93,6 +93,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index cecaee48d..31a6398f2 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -164,6 +164,7 @@ struct FixedPipelineState { BitField<23, 1, u32> tessellation_clockwise; BitField<24, 1, u32> logic_op_enable; BitField<25, 4, u32> logic_op; + BitField<29, 1, u32> rasterize_enable; }; // TODO(Rodrigo): Move this to push constants diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5b494da8c..5f33d9e40 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,6 +7,7 @@ #include <memory> #include "core/core.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 04d07fe6a..043fe7947 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -7,6 +7,7 @@ #include <memory> #include "video_core/fence_manager.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5beea6a03..69b6bba00 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -281,7 +281,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa rasterization_ci.pNext = nullptr; rasterization_ci.flags = 0; rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = VK_FALSE; + rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; rasterization_ci.cullMode = rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f118e5990..be5b77fae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -569,7 +569,9 @@ void RasterizerVulkan::ReleaseFences() { } void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { - FlushRegion(addr, size); + if (Settings::IsGPULevelExtreme()) { + FlushRegion(addr, size); + } InvalidateRegion(addr, size); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 18678968c..f4ccc9848 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -515,6 +515,16 @@ private: void DeclareCommon() { thread_id = DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); + thread_masks[0] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); + thread_masks[1] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); + thread_masks[2] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); + thread_masks[3] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); + thread_masks[4] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); } void DeclareVertex() { @@ -1618,6 +1628,24 @@ private: return {}; } + Expression LogicalFOrdered(Operation operation) { + // Emulate SPIR-V's OpOrdered + const Id op_a = AsFloat(Visit(operation[0])); + const Id op_b = AsFloat(Visit(operation[1])); + const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); + const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); + return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; + } + + Expression LogicalFUnordered(Operation operation) { + // Emulate SPIR-V's OpUnordered + const Id op_a = AsFloat(Visit(operation[0])); + const Id op_b = AsFloat(Visit(operation[1])); + const Id is_nan_a = OpIsNan(t_bool, op_a); + const Id is_nan_b = OpIsNan(t_bool, op_b); + return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; + } + Id GetTextureSampler(Operation operation) { const auto& meta = std::get<MetaTexture>(operation.GetMeta()); ASSERT(!meta.sampler.is_buffer); @@ -2157,6 +2185,13 @@ private: return {OpLoad(t_uint, thread_id), Type::Uint}; } + template <std::size_t index> + Expression ThreadMask(Operation) { + // TODO(Rodrigo): Handle devices with different warp sizes + const Id mask = thread_masks[index]; + return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; + } + Expression ShuffleIndexed(Operation operation) { const Id value = AsFloat(Visit(operation[0])); const Id index = AsUint(Visit(operation[1])); @@ -2511,7 +2546,14 @@ private: &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool, Type::Float>, + &SPIRVDecompiler::LogicalFOrdered, + &SPIRVDecompiler::LogicalFUnordered, + &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, @@ -2614,6 +2656,11 @@ private: &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, &SPIRVDecompiler::ThreadId, + &SPIRVDecompiler::ThreadMask<0>, // Eq + &SPIRVDecompiler::ThreadMask<1>, // Ge + &SPIRVDecompiler::ThreadMask<2>, // Gt + &SPIRVDecompiler::ThreadMask<3>, // Le + &SPIRVDecompiler::ThreadMask<4>, // Lt &SPIRVDecompiler::ShuffleIndexed, &SPIRVDecompiler::MemoryBarrierGL, @@ -2738,6 +2785,7 @@ private: Id workgroup_id{}; Id local_invocation_id{}; Id thread_id{}; + std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt VertexIndices in_indices; VertexIndices out_indices; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a75a5cc63..eeac328a6 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -255,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); Node op_b = Immediate(branch_case.cmp_value); Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); auto result = Conditional(condition, {n}); bb.push_back(result); global_code.push_back(result); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..399a455c4 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Operation(OperationCode::WorkGroupIdY); case SystemVariable::CtaIdZ: return Operation(OperationCode::WorkGroupIdZ); + case SystemVariable::EqMask: + case SystemVariable::LtMask: + case SystemVariable::LeMask: + case SystemVariable::GtMask: + case SystemVariable::GeMask: + uses_warps = true; + switch (instr.sys20) { + case SystemVariable::EqMask: + return Operation(OperationCode::ThreadEqMask); + case SystemVariable::LtMask: + return Operation(OperationCode::ThreadLtMask); + case SystemVariable::LeMask: + return Operation(OperationCode::ThreadLeMask); + case SystemVariable::GtMask: + return Operation(OperationCode::ThreadGtMask); + case SystemVariable::GeMask: + return Operation(OperationCode::ThreadGeMask); + default: + UNREACHABLE(); + return Immediate(0u); + } default: UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 6191ffba1..c83dc6615 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -97,19 +97,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); } case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, - op_a, Immediate(0)); - const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, - op_b, Immediate(0)); + const Node comp_a = + GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); + const Node comp_b = + GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NotEqual, is_signed_a, + PredCondition::NE, is_signed_a, SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, Immediate(0x80000000)), Immediate(0)); const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NotEqual, is_signed_b, + PredCondition::NE, is_signed_b, SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, Immediate(0x80000000)), Immediate(0)); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 601c822d2..cce8aeebe 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -110,13 +110,20 @@ enum class OperationCode { LogicalPick2, /// (bool2 pair, uint index) -> bool LogicalAnd2, /// (bool2 a) -> bool - LogicalFLessThan, /// (float a, float b) -> bool - LogicalFEqual, /// (float a, float b) -> bool - LogicalFLessEqual, /// (float a, float b) -> bool - LogicalFGreaterThan, /// (float a, float b) -> bool - LogicalFNotEqual, /// (float a, float b) -> bool - LogicalFGreaterEqual, /// (float a, float b) -> bool - LogicalFIsNan, /// (float a) -> bool + LogicalFOrdLessThan, /// (float a, float b) -> bool + LogicalFOrdEqual, /// (float a, float b) -> bool + LogicalFOrdLessEqual, /// (float a, float b) -> bool + LogicalFOrdGreaterThan, /// (float a, float b) -> bool + LogicalFOrdNotEqual, /// (float a, float b) -> bool + LogicalFOrdGreaterEqual, /// (float a, float b) -> bool + LogicalFOrdered, /// (float a, float b) -> bool + LogicalFUnordered, /// (float a, float b) -> bool + LogicalFUnordLessThan, /// (float a, float b) -> bool + LogicalFUnordEqual, /// (float a, float b) -> bool + LogicalFUnordLessEqual, /// (float a, float b) -> bool + LogicalFUnordGreaterThan, /// (float a, float b) -> bool + LogicalFUnordNotEqual, /// (float a, float b) -> bool + LogicalFUnordGreaterEqual, /// (float a, float b) -> bool LogicalILessThan, /// (int a, int b) -> bool LogicalIEqual, /// (int a, int b) -> bool @@ -219,6 +226,11 @@ enum class OperationCode { VoteEqual, /// (bool) -> bool ThreadId, /// () -> uint + ThreadEqMask, /// () -> uint + ThreadGeMask, /// () -> uint + ThreadGtMask, /// () -> uint + ThreadLeMask, /// () -> uint + ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint MemoryBarrierGL, /// () -> void diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 822674926..e322c3402 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -243,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { + if (condition == PredCondition::T) { + return GetPredicate(true); + } else if (condition == PredCondition::F) { + return GetPredicate(false); + } + static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, - std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, - std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, + OperationCode(0), + OperationCode::LogicalFOrdLessThan, // LT + OperationCode::LogicalFOrdEqual, // EQ + OperationCode::LogicalFOrdLessEqual, // LE + OperationCode::LogicalFOrdGreaterThan, // GT + OperationCode::LogicalFOrdNotEqual, // NE + OperationCode::LogicalFOrdGreaterEqual, // GE + OperationCode::LogicalFOrdered, // NUM + OperationCode::LogicalFUnordered, // NAN + OperationCode::LogicalFUnordLessThan, // LTU + OperationCode::LogicalFUnordEqual, // EQU + OperationCode::LogicalFUnordLessEqual, // LEU + OperationCode::LogicalFUnordGreaterThan, // GTU + OperationCode::LogicalFUnordNotEqual, // NEU + OperationCode::LogicalFUnordGreaterEqual, // GEU }; + const std::size_t index = static_cast<std::size_t>(condition); + ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); - - if (condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan) { - predicate = Operation(OperationCode::LogicalOr, predicate, - Operation(OperationCode::LogicalFIsNan, op_a)); - predicate = Operation(OperationCode::LogicalOr, predicate, - Operation(OperationCode::LogicalFIsNan, op_b)); - } - - return predicate; + return Operation(comparison_table[index], op_a, op_b); } Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, - std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, + std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, + std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, + std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, }; const auto comparison = @@ -301,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); - Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); - - UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan, - "NaN comparisons for integers are not implemented"); - return predicate; + return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), + std::move(op_b)); } Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b) { static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, + std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, + std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, + std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, + std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, + std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, + std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, + std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, + std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, + std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, + std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, + std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, }; const auto comparison = @@ -397,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc if (!sets_cc) { return; } - Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); + Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); } diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 3d759f77b..1adf8932b 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -150,18 +150,19 @@ public: } void MakeCurrent() override { - if (is_current) { - return; + // We can't track the current state of the underlying context in this wrapper class because + // Qt may make the underlying context not current for one reason or another. In particular, + // the WebBrowser uses GL, so it seems to conflict if we aren't careful. + // Instead of always just making the context current (which does not have any caching to + // check if the underlying context is already current) we can check for the current context + // in the thread local data by calling `currentContext()` and checking if its ours. + if (QOpenGLContext::currentContext() != context.get()) { + context->makeCurrent(surface); } - is_current = context->makeCurrent(surface); } void DoneCurrent() override { - if (!is_current) { - return; - } context->doneCurrent(); - is_current = false; } QOpenGLContext* GetShareContext() { @@ -178,7 +179,6 @@ private: std::unique_ptr<QOpenGLContext> context; std::unique_ptr<QOffscreenSurface> offscreen_surface{}; QSurface* surface; - bool is_current = false; }; class DummyContext : public Core::Frontend::GraphicsContext {}; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 75c6cf20b..b08b87426 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -643,6 +643,8 @@ void Config::ReadRendererValues() { Settings::values.use_asynchronous_gpu_emulation = ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); + Settings::values.use_assembly_shaders = + ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool(); Settings::values.use_fast_gpu_time = ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); Settings::values.force_30fps_mode = @@ -687,6 +689,8 @@ void Config::ReadSystemValues() { Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt(); + Settings::values.time_zone_index = ReadSetting(QStringLiteral("time_zone_index"), 0).toInt(); + const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); if (rng_seed_enabled) { Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); @@ -1088,6 +1092,8 @@ void Config::SaveRendererValues() { WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), Settings::values.use_asynchronous_gpu_emulation, false); WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); + WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, + false); WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); @@ -1126,6 +1132,7 @@ void Config::SaveSystemValues() { WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1); + WriteSetting(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0); WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 5bb2ae555..37aadf7f8 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) ui->setupUi(this); + // TODO: Remove this after assembly shaders are fully integrated + ui->use_assembly_shaders->setVisible(false); + SetConfiguration(); } @@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); ui->use_vsync->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync); + ui->use_assembly_shaders->setEnabled(runtime_lock); + ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); @@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); Settings::values.gpu_accuracy = gpu_accuracy; Settings::values.use_vsync = ui->use_vsync->isChecked(); + Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked(); Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 770b80c50..0021607ac 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -63,6 +63,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_assembly_shaders"> + <property name="toolTip"> + <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> + </property> + <property name="text"> + <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="force_30fps_mode"> <property name="text"> <string>Force 30 FPS mode</string> diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index f49cd4c8f..10315e7a6 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -57,6 +57,7 @@ void ConfigureSystem::SetConfiguration() { ui->combo_language->setCurrentIndex(Settings::values.language_index); ui->combo_region->setCurrentIndex(Settings::values.region_index); + ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index); ui->combo_sound->setCurrentIndex(Settings::values.sound_index); ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); @@ -84,6 +85,7 @@ void ConfigureSystem::ApplyConfiguration() { Settings::values.language_index = ui->combo_language->currentIndex(); Settings::values.region_index = ui->combo_region->currentIndex(); + Settings::values.time_zone_index = ui->combo_time_zone->currentIndex(); Settings::values.sound_index = ui->combo_sound->currentIndex(); if (ui->rng_seed_checkbox->isChecked()) { diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index d8fa2d2cc..26d42d5c5 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h @@ -37,5 +37,6 @@ private: int language_index = 0; int region_index = 0; + int time_zone_index = 0; int sound_index = 0; }; diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui index 4e2c7e76e..9c8cca6dc 100644 --- a/src/yuzu/configuration/configure_system.ui +++ b/src/yuzu/configuration/configure_system.ui @@ -22,14 +22,14 @@ <string>System Settings</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="2" column="0"> + <item row="3" column="0"> <widget class="QLabel" name="label_sound"> <property name="text"> <string>Sound output mode</string> </property> </widget> </item> - <item row="3" column="0"> + <item row="4" column="0"> <widget class="QLabel" name="label_console_id"> <property name="text"> <string>Console ID:</string> @@ -174,14 +174,255 @@ </item> </widget> </item> - <item row="5" column="0"> + <item row="2" column="0"> + <widget class="QLabel" name="label_timezone"> + <property name="text"> + <string>Time Zone:</string> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QComboBox" name="combo_time_zone"> + <item> + <property name="text"> + <string>Auto</string> + </property> + </item> + <item> + <property name="text"> + <string>Default</string> + </property> + </item> + <item> + <property name="text"> + <string>CET</string> + </property> + </item> + <item> + <property name="text"> + <string>CST6CDT</string> + </property> + </item> + <item> + <property name="text"> + <string>Cuba</string> + </property> + </item> + <item> + <property name="text"> + <string>EET</string> + </property> + </item> + <item> + <property name="text"> + <string>Egypt</string> + </property> + </item> + <item> + <property name="text"> + <string>Eire</string> + </property> + </item> + <item> + <property name="text"> + <string>EST</string> + </property> + </item> + <item> + <property name="text"> + <string>EST5EDT</string> + </property> + </item> + <item> + <property name="text"> + <string>GB</string> + </property> + </item> + <item> + <property name="text"> + <string>GB-Eire</string> + </property> + </item> + <item> + <property name="text"> + <string>GMT</string> + </property> + </item> + <item> + <property name="text"> + <string>GMT+0</string> + </property> + </item> + <item> + <property name="text"> + <string>GMT-0</string> + </property> + </item> + <item> + <property name="text"> + <string>GMT0</string> + </property> + </item> + <item> + <property name="text"> + <string>Greenwich</string> + </property> + </item> + <item> + <property name="text"> + <string>Hongkong</string> + </property> + </item> + <item> + <property name="text"> + <string>HST</string> + </property> + </item> + <item> + <property name="text"> + <string>Iceland</string> + </property> + </item> + <item> + <property name="text"> + <string>Iran</string> + </property> + </item> + <item> + <property name="text"> + <string>Israel</string> + </property> + </item> + <item> + <property name="text"> + <string>Jamaica</string> + </property> + </item> + <item> + <property name="text"> + <string>Japan</string> + </property> + </item> + <item> + <property name="text"> + <string>Kwajalein</string> + </property> + </item> + <item> + <property name="text"> + <string>Libya</string> + </property> + </item> + <item> + <property name="text"> + <string>MET</string> + </property> + </item> + <item> + <property name="text"> + <string>MST</string> + </property> + </item> + <item> + <property name="text"> + <string>MST7MDT</string> + </property> + </item> + <item> + <property name="text"> + <string>Navajo</string> + </property> + </item> + <item> + <property name="text"> + <string>NZ</string> + </property> + </item> + <item> + <property name="text"> + <string>NZ-CHAT</string> + </property> + </item> + <item> + <property name="text"> + <string>Poland</string> + </property> + </item> + <item> + <property name="text"> + <string>Portugal</string> + </property> + </item> + <item> + <property name="text"> + <string>PRC</string> + </property> + </item> + <item> + <property name="text"> + <string>PST8PDT</string> + </property> + </item> + <item> + <property name="text"> + <string>ROC</string> + </property> + </item> + <item> + <property name="text"> + <string>ROK</string> + </property> + </item> + <item> + <property name="text"> + <string>Singapore</string> + </property> + </item> + <item> + <property name="text"> + <string>Turkey</string> + </property> + </item> + <item> + <property name="text"> + <string>UCT</string> + </property> + </item> + <item> + <property name="text"> + <string>Universal</string> + </property> + </item> + <item> + <property name="text"> + <string>UTC</string> + </property> + </item> + <item> + <property name="text"> + <string>W-SU</string> + </property> + </item> + <item> + <property name="text"> + <string>WET</string> + </property> + </item> + <item> + <property name="text"> + <string>Zulu</string> + </property> + </item> + </widget> + </item> + <item row="6" column="0"> <widget class="QCheckBox" name="rng_seed_checkbox"> <property name="text"> <string>RNG Seed</string> </property> </widget> </item> - <item row="2" column="1"> + <item row="3" column="1"> <widget class="QComboBox" name="combo_sound"> <item> <property name="text"> @@ -207,7 +448,7 @@ </property> </widget> </item> - <item row="3" column="1"> + <item row="4" column="1"> <widget class="QPushButton" name="button_regenerate_console_id"> <property name="sizePolicy"> <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> @@ -223,14 +464,14 @@ </property> </widget> </item> - <item row="4" column="0"> + <item row="5" column="0"> <widget class="QCheckBox" name="custom_rtc_checkbox"> <property name="text"> <string>Custom RTC</string> </property> </widget> </item> - <item row="4" column="1"> + <item row="5" column="1"> <widget class="QDateTimeEdit" name="custom_rtc_edit"> <property name="minimumDate"> <date> @@ -244,7 +485,7 @@ </property> </widget> </item> - <item row="5" column="1"> + <item row="6" column="1"> <widget class="QLineEdit" name="rng_seed_edit"> <property name="sizePolicy"> <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp index ea0079353..a93733b26 100644 --- a/src/yuzu/discord_impl.cpp +++ b/src/yuzu/discord_impl.cpp @@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() { // The number is the client ID for yuzu, it's used for images and the // application name - Discord_Initialize("471872241299226636", &handlers, 1, nullptr); + Discord_Initialize("712465656758665259", &handlers, 1, nullptr); } DiscordImpl::~DiscordImpl() { diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index dccbabcbf..bfb600df0 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -488,11 +488,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0); - connect(open_save_location, &QAction::triggered, [this, program_id]() { - emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); + connect(open_save_location, &QAction::triggered, [this, program_id, path]() { + emit OpenFolderRequested(GameListOpenTarget::SaveData, path); }); - connect(open_lfs_location, &QAction::triggered, [this, program_id]() { - emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); + connect(open_lfs_location, &QAction::triggered, [this, program_id, path]() { + emit OpenFolderRequested(GameListOpenTarget::ModData, path); }); connect(open_transferable_shader_cache, &QAction::triggered, [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); }); diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 878d94413..a38cb2fc3 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -73,7 +73,7 @@ public: signals: void GameChosen(QString game_path); void ShouldCancelWorker(); - void OpenFolderRequested(u64 program_id, GameListOpenTarget target); + void OpenFolderRequested(GameListOpenTarget target, const std::string& game_path); void OpenTransferableShaderCacheRequested(u64 program_id); void DumpRomFSRequested(u64 program_id, const std::string& game_path); void CopyTIDRequested(u64 program_id); diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp index 2a6483370..ae842306c 100644 --- a/src/yuzu/loading_screen.cpp +++ b/src/yuzu/loading_screen.cpp @@ -19,6 +19,7 @@ #include <QTime> #include <QtConcurrent/QtConcurrentRun> #include "common/logging/log.h" +#include "core/frontend/framebuffer_layout.h" #include "core/loader/loader.h" #include "ui_loading_screen.h" #include "video_core/rasterizer_interface.h" @@ -61,7 +62,7 @@ LoadingScreen::LoadingScreen(QWidget* parent) : QWidget(parent), ui(std::make_unique<Ui::LoadingScreen>()), previous_stage(VideoCore::LoadCallbackStage::Complete) { ui->setupUi(this); - setMinimumSize(1280, 720); + setMinimumSize(Layout::MinimumSize::Width, Layout::MinimumSize::Height); // Create a fade out effect to hide this loading screen widget. // When fading opacity, it will fade to the parent widgets background color, which is why we diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 86e8a1d49..0b291c7d0 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -724,13 +724,13 @@ void GMainWindow::InitializeHotkeys() { } void GMainWindow::SetDefaultUIGeometry() { - // geometry: 55% of the window contents are in the upper screen half, 45% in the lower half + // geometry: 53% of the window contents are in the upper screen half, 47% in the lower half const QRect screenRect = QApplication::desktop()->screenGeometry(this); const int w = screenRect.width() * 2 / 3; - const int h = screenRect.height() / 2; + const int h = screenRect.height() * 2 / 3; const int x = (screenRect.x() + screenRect.width()) / 2 - w / 2; - const int y = (screenRect.y() + screenRect.height()) / 2 - h * 55 / 100; + const int y = (screenRect.y() + screenRect.height()) / 2 - h * 53 / 100; setGeometry(x, y, w, h); } @@ -831,6 +831,7 @@ void GMainWindow::ConnectMenuEvents() { &GMainWindow::OnDisplayTitleBars); connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar); connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible); + connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize); // Fullscreen ui.action_Fullscreen->setShortcut( @@ -1154,39 +1155,61 @@ void GMainWindow::OnGameListLoadFile(QString game_path) { BootGame(game_path); } -void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target) { +void GMainWindow::OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path) { std::string path; QString open_target; + + const auto v_file = Core::GetGameFileFromPath(vfs, game_path); + const auto loader = Loader::GetLoader(v_file); + FileSys::NACP control{}; + u64 program_id{}; + + loader->ReadControlData(control); + loader->ReadProgramId(program_id); + + const bool has_user_save{control.GetDefaultNormalSaveSize() > 0}; + const bool has_device_save{control.GetDeviceSaveDataSize() > 0}; + + ASSERT_MSG(has_user_save != has_device_save, "Game uses both user and device savedata?"); + switch (target) { case GameListOpenTarget::SaveData: { open_target = tr("Save Data"); const std::string nand_dir = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir); ASSERT(program_id != 0); - const auto select_profile = [this] { - QtProfileSelectionDialog dialog(this); - dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint | - Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint); - dialog.setWindowModality(Qt::WindowModal); + if (has_user_save) { + // User save data + const auto select_profile = [this] { + QtProfileSelectionDialog dialog(this); + dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint | + Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint); + dialog.setWindowModality(Qt::WindowModal); - if (dialog.exec() == QDialog::Rejected) { - return -1; - } + if (dialog.exec() == QDialog::Rejected) { + return -1; + } - return dialog.GetIndex(); - }; + return dialog.GetIndex(); + }; - const auto index = select_profile(); - if (index == -1) { - return; - } + const auto index = select_profile(); + if (index == -1) { + return; + } - Service::Account::ProfileManager manager; - const auto user_id = manager.GetUser(static_cast<std::size_t>(index)); - ASSERT(user_id); - path = nand_dir + FileSys::SaveDataFactory::GetFullPath(FileSys::SaveDataSpaceId::NandUser, - FileSys::SaveDataType::SaveData, - program_id, user_id->uuid, 0); + Service::Account::ProfileManager manager; + const auto user_id = manager.GetUser(static_cast<std::size_t>(index)); + ASSERT(user_id); + path = nand_dir + FileSys::SaveDataFactory::GetFullPath( + FileSys::SaveDataSpaceId::NandUser, + FileSys::SaveDataType::SaveData, program_id, user_id->uuid, 0); + } else { + // Device save data + path = nand_dir + FileSys::SaveDataFactory::GetFullPath( + FileSys::SaveDataSpaceId::NandUser, + FileSys::SaveDataType::SaveData, program_id, {}, 0); + } if (!FileUtil::Exists(path)) { FileUtil::CreateFullPath(path); @@ -1829,6 +1852,20 @@ void GMainWindow::ToggleWindowMode() { } } +void GMainWindow::ResetWindowSize() { + const auto aspect_ratio = Layout::EmulationAspectRatio( + static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio), + static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width); + if (!ui.action_Single_Window_Mode->isChecked()) { + render_window->resize(Layout::ScreenUndocked::Height / aspect_ratio, + Layout::ScreenUndocked::Height); + } else { + resize(Layout::ScreenUndocked::Height / aspect_ratio, + Layout::ScreenUndocked::Height + menuBar()->height() + + (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0)); + } +} + void GMainWindow::OnConfigure() { const auto old_theme = UISettings::values.theme; const bool old_discord_presence = UISettings::values.enable_discord_presence; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 60b17c54a..4f4c8ddbe 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -183,7 +183,7 @@ private slots: void OnMenuReportCompatibility(); /// Called whenever a user selects a game in the game list widget. void OnGameListLoadFile(QString game_path); - void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target); + void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); void OnTransferableShaderCacheOpenFile(u64 program_id); void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); void OnGameListCopyTID(u64 program_id); @@ -208,6 +208,7 @@ private slots: void ShowFullscreen(); void HideFullscreen(); void ToggleWindowMode(); + void ResetWindowSize(); void OnCaptureScreenshot(); void OnCoreError(Core::System::ResultStatus, std::string); void OnReinitializeKeys(ReinitializeKeyBehavior behavior); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index ae414241e..97c90f50b 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -6,8 +6,8 @@ <rect> <x>0</x> <y>0</y> - <width>1081</width> - <height>730</height> + <width>1280</width> + <height>720</height> </rect> </property> <property name="windowTitle"> @@ -44,7 +44,7 @@ <rect> <x>0</x> <y>0</y> - <width>1081</width> + <width>1280</width> <height>21</height> </rect> </property> @@ -96,6 +96,7 @@ <addaction name="action_Display_Dock_Widget_Headers"/> <addaction name="action_Show_Filter_Bar"/> <addaction name="action_Show_Status_Bar"/> + <addaction name="action_Reset_Window_Size"/> <addaction name="separator"/> <addaction name="menu_View_Debugging"/> </widget> @@ -215,6 +216,11 @@ <string>Show Status Bar</string> </property> </action> + <action name="action_Reset_Window_Size"> + <property name="text"> + <string>Reset Window Size</string> + </property> + </action> <action name="action_Fullscreen"> <property name="checkable"> <bool>true</bool> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 8476a5a16..c20d48c42 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -367,6 +367,9 @@ void Config::ReadValues() { Settings::values.custom_rtc = std::nullopt; } + Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); + Settings::values.time_zone_index = sdl2_config->GetInteger("System", "time_zone_index", 0); + // Core Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); @@ -394,6 +397,8 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); Settings::values.use_vsync = static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); + Settings::values.use_assembly_shaders = + sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false); Settings::values.use_fast_gpu_time = sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); @@ -409,8 +414,6 @@ void Config::ReadValues() { Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)); - Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); - // Miscellaneous Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace"); Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 60b1a62fa..abc6e6e65 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -134,6 +134,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = +# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. +# 0 (default): Off, 1: On +use_assembly_shaders = + # Turns on the frame limiter, which will limit frames output to the target game speed # 0: Off, 1: On (default) use_frame_limit = @@ -262,6 +266,10 @@ language_index = # -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan region_value = +# The system time zone that yuzu will use during emulation +# 0: Auto-select (default), 1: Default (system archive value), Others: Index for specified time zone +time_zone_index = + [Miscellaneous] # A filter which removes logs below a certain logging level. # Examples: *:Debug Kernel.SVC:Trace Service.*:Critical |