diff options
Diffstat (limited to 'src')
78 files changed, 2342 insertions, 1475 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 906c486fd..9c6f1c07c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/xmad.cpp" "${VIDEO_CORE}/shader/ast.cpp" "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/compiler_settings.cpp" "${VIDEO_CORE}/shader/compiler_settings.h" + "${VIDEO_CORE}/shader/const_buffer_locker.cpp" + "${VIDEO_CORE}/shader/const_buffer_locker.h" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/expr.cpp" "${VIDEO_CORE}/shader/expr.h" @@ -95,11 +97,11 @@ add_custom_command(OUTPUT scm_rev.cpp ) add_library(common STATIC + algorithm.h alignment.h assert.h detached_tasks.cpp detached_tasks.h - binary_find.h bit_field.h bit_util.h cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/algorithm.h index 5cc523bf9..e21b1373c 100644 --- a/src/common/binary_find.h +++ b/src/common/algorithm.h @@ -5,6 +5,12 @@ #pragma once #include <algorithm> +#include <functional> + +// Algorithms that operate on iterators, much like the <algorithm> header. +// +// Note: If the algorithm is not general-purpose and/or doesn't operate on iterators, +// it should probably not be placed within this header. namespace Common { diff --git a/src/common/hash.h b/src/common/hash.h index 40194d1ee..ebd4125e2 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -6,6 +6,8 @@ #include <cstddef> #include <cstring> +#include <utility> +#include <boost/functional/hash.hpp> #include "common/cityhash.h" #include "common/common_types.h" @@ -68,4 +70,13 @@ struct HashableStruct { } }; +struct PairHash { + template <class T1, class T2> + std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { + std::size_t seed = std::hash<T1>()(pair.first); + boost::hash_combine(seed, std::hash<T2>()(pair.second)); + return seed; + } +}; + } // namespace Common diff --git a/src/core/core.cpp b/src/core/core.cpp index d79045eea..eba17218a 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -112,8 +112,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, } struct System::Impl { explicit Impl(System& system) - : kernel{system}, fs_controller{system}, cpu_core_manager{system}, - applet_manager{system}, reporter{system} {} + : kernel{system}, fs_controller{system}, cpu_core_manager{system}, reporter{system}, + applet_manager{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); @@ -240,22 +240,27 @@ struct System::Impl { } void Shutdown() { - // Log last frame performance stats - const auto perf_results = GetAndResetPerfStats(); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", - perf_results.emulation_speed * 100.0); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", - perf_results.game_fps); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", - perf_results.frametime * 1000.0); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", - perf_stats->GetMeanFrametime()); + // Log last frame performance stats if game was loded + if (perf_stats) { + const auto perf_results = GetAndResetPerfStats(); + telemetry_session->AddField(Telemetry::FieldType::Performance, + "Shutdown_EmulationSpeed", + perf_results.emulation_speed * 100.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", + perf_results.game_fps); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", + perf_results.frametime * 1000.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", + perf_stats->GetMeanFrametime()); + } lm_manager.Flush(); is_powered_on = false; exit_lock = false; + gpu_core->WaitIdle(); + // Shutdown emulation session renderer.reset(); GDBStub::Shutdown(); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 941ebc93a..3a32d5b41 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1140,8 +1140,9 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { - const auto backend = BCAT::CreateBackendFromSettings( - [this](u64 tid) { return system.GetFileSystemController().GetBCATDirectory(tid); }); + const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { + return system.GetFileSystemController().GetBCATDirectory(tid); + }); const auto build_id_full = system.GetCurrentProcessBuildID(); u64 build_id{}; std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp index 073d0f6fa..25a886238 100644 --- a/src/core/hle/service/apm/controller.cpp +++ b/src/core/hle/service/apm/controller.cpp @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <utility> + #include "common/logging/log.h" #include "core/core_timing.h" #include "core/hle/service/apm/controller.h" @@ -9,8 +13,7 @@ namespace Service::APM { -constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = - PerformanceConfiguration::Config7; +constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7; Controller::Controller(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing}, configs{ @@ -22,18 +25,35 @@ Controller::~Controller() = default; void Controller::SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config) { - static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ - {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, - {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, - {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, - {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, - {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, - {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, - {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, - {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, - }; - - SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); + static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{ + {PerformanceConfiguration::Config1, 1020}, + {PerformanceConfiguration::Config2, 1020}, + {PerformanceConfiguration::Config3, 1224}, + {PerformanceConfiguration::Config4, 1020}, + {PerformanceConfiguration::Config5, 1020}, + {PerformanceConfiguration::Config6, 1224}, + {PerformanceConfiguration::Config7, 1020}, + {PerformanceConfiguration::Config8, 1020}, + {PerformanceConfiguration::Config9, 1020}, + {PerformanceConfiguration::Config10, 1020}, + {PerformanceConfiguration::Config11, 1020}, + {PerformanceConfiguration::Config12, 1020}, + {PerformanceConfiguration::Config13, 1785}, + {PerformanceConfiguration::Config14, 1785}, + {PerformanceConfiguration::Config15, 1020}, + {PerformanceConfiguration::Config16, 1020}, + }}; + + const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(), + [config](const auto& entry) { return entry.first == config; }); + + if (iter == config_to_speed.cend()) { + LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", + static_cast<u32>(config)); + return; + } + + SetClockSpeed(iter->second); configs.insert_or_assign(mode, config); } @@ -48,7 +68,7 @@ void Controller::SetFromCpuBoostMode(CpuBoostMode mode) { BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); } -PerformanceMode Controller::GetCurrentPerformanceMode() { +PerformanceMode Controller::GetCurrentPerformanceMode() const { return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; } diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h index 454caa6eb..af0c4cd34 100644 --- a/src/core/hle/service/apm/controller.h +++ b/src/core/hle/service/apm/controller.h @@ -56,7 +56,7 @@ public: void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); void SetFromCpuBoostMode(CpuBoostMode mode); - PerformanceMode GetCurrentPerformanceMode(); + PerformanceMode GetCurrentPerformanceMode() const; PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); private: diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 9d6946bc5..b86fda29a 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -10,8 +10,8 @@ namespace Service::BCAT { -ProgressServiceBackend::ProgressServiceBackend(std::string_view event_name) { - auto& kernel{Core::System::GetInstance().Kernel()}; +ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel, + std::string_view event_name) { event = Kernel::WritableEvent::CreateEventPair( kernel, Kernel::ResetType::Automatic, std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h index 51dbd3316..ea4b16ad0 100644 --- a/src/core/hle/service/bcat/backend/backend.h +++ b/src/core/hle/service/bcat/backend/backend.h @@ -15,6 +15,14 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/result.h" +namespace Core { +class System; +} + +namespace Kernel { +class KernelCore; +} + namespace Service::BCAT { struct DeliveryCacheProgressImpl; @@ -88,7 +96,7 @@ public: void FinishDownload(ResultCode result); private: - explicit ProgressServiceBackend(std::string_view event_name); + explicit ProgressServiceBackend(Kernel::KernelCore& kernel, std::string_view event_name); Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const; DeliveryCacheProgressImpl& GetImpl(); @@ -145,6 +153,6 @@ public: std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override; }; -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter); +std::unique_ptr<Backend> CreateBackendFromSettings(Core::System& system, DirectoryGetter getter); } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 64022982b..918159e11 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -104,14 +104,15 @@ std::string GetZIPFilePath(u64 title_id) { // If the error is something the user should know about (build ID mismatch, bad client version), // display an error. -void HandleDownloadDisplayResult(DownloadResult res) { +void HandleDownloadDisplayResult(const AM::Applets::AppletManager& applet_manager, + DownloadResult res) { if (res == DownloadResult::Success || res == DownloadResult::NoResponse || res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError || res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) { return; } - const auto& frontend{Core::System::GetInstance().GetAppletManager().GetAppletFrontendSet()}; + const auto& frontend{applet_manager.GetAppletFrontendSet()}; frontend.error->ShowCustomErrorText( ResultCode(-1), "There was an error while attempting to use Boxcat.", DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); @@ -264,12 +265,13 @@ private: u64 build_id; }; -Boxcat::Boxcat(DirectoryGetter getter) : Backend(std::move(getter)) {} +Boxcat::Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter) + : Backend(std::move(getter)), applet_manager{applet_manager_} {} Boxcat::~Boxcat() = default; -void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, - ProgressServiceBackend& progress, +void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGetter dir_getter, + TitleIDVersion title, ProgressServiceBackend& progress, std::optional<std::string> dir_name = {}) { progress.SetNeedHLELock(true); @@ -295,7 +297,7 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, FileUtil::Delete(zip_path); } - HandleDownloadDisplayResult(res); + HandleDownloadDisplayResult(applet_manager, res); progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE); return; } @@ -364,17 +366,24 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) { is_syncing.exchange(true); - std::thread([this, title, &progress] { SynchronizeInternal(dir_getter, title, progress); }) + + std::thread([this, title, &progress] { + SynchronizeInternal(applet_manager, dir_getter, title, progress); + }) .detach(); + return true; } bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name, ProgressServiceBackend& progress) { is_syncing.exchange(true); - std::thread( - [this, title, name, &progress] { SynchronizeInternal(dir_getter, title, progress, name); }) + + std::thread([this, title, name, &progress] { + SynchronizeInternal(applet_manager, dir_getter, title, progress, name); + }) .detach(); + return true; } @@ -420,7 +429,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title) FileUtil::Delete(path); } - HandleDownloadDisplayResult(res); + HandleDownloadDisplayResult(applet_manager, res); return std::nullopt; } } diff --git a/src/core/hle/service/bcat/backend/boxcat.h b/src/core/hle/service/bcat/backend/boxcat.h index 601151189..d65b42e58 100644 --- a/src/core/hle/service/bcat/backend/boxcat.h +++ b/src/core/hle/service/bcat/backend/boxcat.h @@ -9,6 +9,10 @@ #include <optional> #include "core/hle/service/bcat/backend/backend.h" +namespace Service::AM::Applets { +class AppletManager; +} + namespace Service::BCAT { struct EventStatus { @@ -20,12 +24,13 @@ struct EventStatus { /// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and /// doesn't require a switch or nintendo account. The content is controlled by the yuzu team. class Boxcat final : public Backend { - friend void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, + friend void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, + DirectoryGetter dir_getter, TitleIDVersion title, ProgressServiceBackend& progress, std::optional<std::string> dir_name); public: - explicit Boxcat(DirectoryGetter getter); + explicit Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter); ~Boxcat() override; bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; @@ -53,6 +58,7 @@ private: class Client; std::unique_ptr<Client> client; + AM::Applets::AppletManager& applet_manager; }; } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 4e4aa758b..6d9d1527d 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp @@ -125,7 +125,11 @@ private: class IBcatService final : public ServiceFramework<IBcatService> { public: explicit IBcatService(Core::System& system_, Backend& backend_) - : ServiceFramework("IBcatService"), system{system_}, backend{backend_} { + : ServiceFramework("IBcatService"), system{system_}, backend{backend_}, + progress{{ + ProgressServiceBackend{system_.Kernel(), "Normal"}, + ProgressServiceBackend{system_.Kernel(), "Directory"}, + }} { // clang-format off static const FunctionInfo functions[] = { {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, @@ -249,10 +253,7 @@ private: Core::System& system; Backend& backend; - std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress{ - ProgressServiceBackend{"Normal"}, - ProgressServiceBackend{"Directory"}, - }; + std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress; }; void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { @@ -557,12 +558,12 @@ void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId( rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id)); } -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { - const auto backend = Settings::values.bcat_backend; - +std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system, + DirectoryGetter getter) { #ifdef YUZU_ENABLE_BOXCAT - if (backend == "boxcat") - return std::make_unique<Boxcat>(std::move(getter)); + if (Settings::values.bcat_backend == "boxcat") { + return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter)); + } #endif return std::make_unique<NullBackend>(std::move(getter)); @@ -571,7 +572,8 @@ std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_, FileSystem::FileSystemController& fsc_, const char* name) : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, - backend{CreateBackendFromSettings([&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, + backend{CreateBackendFromSettings(system_, + [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, system{system_} {} Module::Interface::~Interface() = default; diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index a2b25a796..81bd2f3cb 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -583,36 +583,6 @@ bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) { return true; } -bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { - if (controller == NPadControllerType::Handheld) { - // Handheld is not even a supported type, lets stop here - if (std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), - NPAD_HANDHELD) == supported_npad_id_types.end()) { - return false; - } - // Handheld should not be supported in docked mode - if (Settings::values.use_docked_mode) { - return false; - } - } - switch (controller) { - case NPadControllerType::ProController: - return style.pro_controller; - case NPadControllerType::Handheld: - return style.handheld; - case NPadControllerType::JoyDual: - return style.joycon_dual; - case NPadControllerType::JoyLeft: - return style.joycon_left; - case NPadControllerType::JoyRight: - return style.joycon_right; - case NPadControllerType::Pokeball: - return style.pokeball; - default: - return false; - } -} - Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) { if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) { // These are controllers without led patterns @@ -659,25 +629,24 @@ void Controller_NPad::ClearAllConnectedControllers() { } void Controller_NPad::DisconnectAllConnectedControllers() { - std::for_each(connected_controllers.begin(), connected_controllers.end(), - [](ControllerHolder& controller) { controller.is_connected = false; }); + for (ControllerHolder& controller : connected_controllers) { + controller.is_connected = false; + } } void Controller_NPad::ConnectAllDisconnectedControllers() { - std::for_each(connected_controllers.begin(), connected_controllers.end(), - [](ControllerHolder& controller) { - if (controller.type != NPadControllerType::None && !controller.is_connected) { - controller.is_connected = false; - } - }); + for (ControllerHolder& controller : connected_controllers) { + if (controller.type != NPadControllerType::None && !controller.is_connected) { + controller.is_connected = true; + } + } } void Controller_NPad::ClearAllControllers() { - std::for_each(connected_controllers.begin(), connected_controllers.end(), - [](ControllerHolder& controller) { - controller.type = NPadControllerType::None; - controller.is_connected = false; - }); + for (ControllerHolder& controller : connected_controllers) { + controller.type = NPadControllerType::None; + controller.is_connected = false; + } } u32 Controller_NPad::GetAndResetPressState() { @@ -685,10 +654,10 @@ u32 Controller_NPad::GetAndResetPressState() { } bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { - const bool support_handheld = - std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), NPAD_HANDHELD) != - supported_npad_id_types.end(); if (controller == NPadControllerType::Handheld) { + const bool support_handheld = + std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), + NPAD_HANDHELD) != supported_npad_id_types.end(); // Handheld is not even a supported type, lets stop here if (!support_handheld) { return false; @@ -700,6 +669,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const return true; } + if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(), [](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) { switch (controller) { @@ -717,6 +687,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const return false; } } + return false; } @@ -795,6 +766,7 @@ Controller_NPad::NPadControllerType Controller_NPad::DecideBestController( priority_list.push_back(NPadControllerType::JoyLeft); priority_list.push_back(NPadControllerType::JoyRight); priority_list.push_back(NPadControllerType::JoyDual); + break; } const auto iter = std::find_if(priority_list.begin(), priority_list.end(), diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 1bc3d55d6..16c4caa1f 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -301,6 +301,11 @@ private: bool is_connected; }; + void InitNewlyAddedControler(std::size_t controller_idx); + bool IsControllerSupported(NPadControllerType controller) const; + NPadControllerType DecideBestController(NPadControllerType priority) const; + void RequestPadStateUpdate(u32 npad_id); + u32 press_state{}; NPadType style{}; @@ -321,12 +326,7 @@ private: std::array<ControllerHolder, 10> connected_controllers{}; bool can_controllers_vibrate{true}; - void InitNewlyAddedControler(std::size_t controller_idx); - bool IsControllerSupported(NPadControllerType controller) const; - NPadControllerType DecideBestController(NPadControllerType priority) const; - void RequestPadStateUpdate(u32 npad_id); std::array<ControllerPad, 10> npad_pad_states{}; - bool IsControllerSupported(NPadControllerType controller); bool is_in_lr_assignment_mode{false}; Core::System& system; }; diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/core_timing.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/perf_stats.h" @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 transform, crop_rect}; system.GetPerfStats().EndGameFrame(); + system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); + system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index eb88fee1b..b27ee0502 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::BadParameter; } + u32 event_id = params.value & 0x00FF; + + if (event_id >= MaxNvEvents) { + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::BadParameter; + } + + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { + event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { + event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::Timeout; } - u32 event_id; - if (is_async) { - event_id = params.value & 0x00FF; - if (event_id >= MaxNvEvents) { - std::memcpy(output.data(), ¶ms, sizeof(params)); - return NvResult::BadParameter; - } - } else { - if (ctrl.fresh_call) { - const auto result = events_interface.GetFreeEvent(); - if (result) { - event_id = *result; - } else { - LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); - event_id = params.value & 0x00FF; - } - } else { - event_id = ctrl.event_id; - } - } - EventState status = events_interface.status[event_id]; if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { events_interface.SetEventStatus(event_id, EventState::Waiting); @@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - events_interface.events[event_id].writable->Clear(); + event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3, 1}; rb.Push(RESULT_SUCCESS); if (event_id < MaxNvEvents) { - rb.PushCopyObjects(nvdrv->GetEvent(event_id)); + auto event = nvdrv->GetEvent(event_id); + event->Clear(); + rb.PushCopyObjects(event); rb.Push<u32>(NvResult::Success); } else { rb.Push<u32>(0); diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Automatic, event_label); + events_interface.events[i] = + Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index e1a07d3ee..55b68eb0c 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -14,8 +14,8 @@ namespace Service::NVFlinger { -BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { - auto& kernel = Core::System::GetInstance().Kernel(); +BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id) + : id(id), layer_id(layer_id) { buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "BufferQueue NativeHandle"); } diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 356bedb81..8f9b18547 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -15,6 +15,10 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" +namespace Kernel { +class KernelCore; +} + namespace Service::NVFlinger { struct IGBPBuffer { @@ -44,7 +48,7 @@ public: NativeWindowFormat = 2, }; - BufferQueue(u32 id, u64 layer_id); + explicit BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id); ~BufferQueue(); enum class BufferTransformFlags : u32 { diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b9..cc9522aad 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -83,7 +83,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { const u64 layer_id = next_layer_id++; const u32 buffer_queue_id = next_buffer_queue_id++; - buffer_queues.emplace_back(buffer_queue_id, layer_id); + buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); display->CreateLayer(layer_id, buffer_queues.back()); return layer_id; } @@ -187,14 +187,18 @@ void NVFlinger::Compose() { MicroProfileFlip(); if (!buffer) { - // There was no queued buffer to draw, render previous frame - system.GetPerfStats().EndGameFrame(); - system.GPU().SwapBuffers({}); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; + const auto& gpu = system.GPU(); + const auto& multi_fence = buffer->get().multi_fence; + for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { + const auto& fence = multi_fence.fences[fence_id]; + gpu.WaitFence(fence.id, fence.value); + } + // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based // on which display we're drawing (Default, Internal, External, etc) diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b56cb0627..10821d452 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -22,7 +22,7 @@ constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(const Core::System& system, const CheatProcessMetadata& metadata) - : system(system), metadata(metadata) {} + : metadata(metadata), system(system) {} StandardVmCallbacks::~StandardVmCallbacks() = default; @@ -176,9 +176,8 @@ std::vector<CheatEntry> TextCheatParser::Parse(const Core::System& system, CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats, const std::array<u8, 0x20>& build_id) - : system{system}, core_timing{system.CoreTiming()}, vm{std::make_unique<StandardVmCallbacks>( - system, metadata)}, - cheats(std::move(cheats)) { + : vm{std::make_unique<StandardVmCallbacks>(system, metadata)}, + cheats(std::move(cheats)), core_timing{system.CoreTiming()}, system{system} { metadata.main_nso_build_id = build_id; } diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index cc16d15a4..4f4fa5099 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp @@ -1133,8 +1133,8 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { case SaveRestoreRegisterOpType::ClearRegs: case SaveRestoreRegisterOpType::Restore: default: - src = registers.data(); - dst = saved_values.data(); + src = saved_values.data(); + dst = registers.data(); break; } for (std::size_t i = 0; i < NumRegisters; i++) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index eaa694ff8..cb6eda1b8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_upload.cpp engines/engine_upload.h @@ -107,10 +108,12 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h - shader/control_flow.cpp - shader/control_flow.h shader/compiler_settings.cpp shader/compiler_settings.h + shader/const_buffer_locker.cpp + shader/const_buffer_locker.h + shader/control_flow.cpp + shader/control_flow.h shader/decode.cpp shader/expr.cpp shader/expr.h diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 000000000..ac27b6cbe --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -0,0 +1,119 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +struct SamplerDescriptor { + union { + BitField<0, 20, Tegra::Shader::TextureType> texture_type; + BitField<20, 1, u32> is_array; + BitField<21, 1, u32> is_buffer; + BitField<22, 1, u32> is_shadow; + u32 raw{}; + }; + + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + + bool operator!=(const SamplerDescriptor& rhs) const noexcept { + return !operator==(rhs); + } + + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { + SamplerDescriptor result; + switch (tic_texture_type) { + case Tegra::Texture::TextureType::Texture1D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture3D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubemap: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DBuffer: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(1); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DNoMipmap: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubeArray: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + default: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + } +}; +static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); + +class ConstBufferEngineInterface { +public: + virtual ~ConstBufferEngineInterface() = default; + virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; + virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const = 0; + virtual u32 GetBoundBuffer() const = 0; +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d449135..91adef360 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan GetTSCEntry(tex_handle.tsc_id)}; } -u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { +u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; u32 result; std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); return result; } +SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage == ShaderType::Compute); + const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2..8e7182727 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -37,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -201,7 +202,16 @@ public: Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, std::size_t offset) const; - u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } private: Core::System& system; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7802fd808..514ed93fa 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) +#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const u32 start, const u32 range, const u8 position) { + const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); @@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; - u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); + u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { set_block(rt_reg, registers_per_rt, rt_dirty_reg); - rt_dirty_reg++; + ++rt_dirty_reg; } constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; @@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; - u32 va_reg = DIRTY_REGS_POS(vertex_array); - u32 vi_reg = DIRTY_REGS_POS(vertex_instance); + u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); + u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; vertex_reg += vertex_array_size) { - set_block(vertex_reg, 3, va_reg); + set_block(vertex_reg, 3, va_dirty_reg); // The divisor concerns vertex array instances - dirty_pointers[vertex_reg + 3] = vi_reg; - va_reg++; - vi_reg++; + dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; + ++va_dirty_reg; + ++vi_dirty_reg; } constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; - va_reg = DIRTY_REGS_POS(vertex_array); + va_dirty_reg = DIRTY_REGS_POS(vertex_array); for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; vertex_reg += vertex_limit_size) { - set_block(vertex_reg, vertex_limit_size, va_reg); - va_reg++; + set_block(vertex_reg, vertex_limit_size, va_dirty_reg); + va_dirty_reg++; } constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); constexpr u32 vertex_instance_size = sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); constexpr u32 vertex_instance_end = vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; - vi_reg = DIRTY_REGS_POS(vertex_instance); + vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; vertex_reg += vertex_instance_size) { - set_block(vertex_reg, vertex_instance_size, vi_reg); - vi_reg++; + set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); + vi_dirty_reg++; } set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), DIRTY_REGS_POS(vertex_attrib_format)); @@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() { // State // Viewport - constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); + constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); set_block(viewport_start, viewport_size, viewport_dirty_reg); @@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() { set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); // Depth Test - constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); + constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; @@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; // Color Mask - constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); + constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), color_mask_dirty_reg); // Blend State - constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); + constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), blend_state_dirty_reg); dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; @@ -237,12 +237,12 @@ void Maxwell3D::InitDirtySettings() { blend_state_dirty_reg); // Scissor State - constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); + constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), scissor_test_dirty_reg); // Polygon Offset - constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); + constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; @@ -251,7 +251,7 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; // Depth bounds - constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); + constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; } @@ -478,7 +478,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -846,7 +846,8 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } -u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { +u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; @@ -854,4 +855,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 return result; } +SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage != ShaderType::Compute); + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e3f1047d5..987ad77b2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" @@ -44,7 +45,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final { +class Maxwell3D final : public ConstBufferEngineInterface { public: explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -1165,6 +1166,8 @@ public: struct DirtyRegs { static constexpr std::size_t NUM_REGS = 256; + static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); + union { struct { bool null_dirty; @@ -1257,7 +1260,16 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; - u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a6355ce2..d3d05a866 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 { }; union Instruction { - Instruction& operator=(const Instruction& instr) { + constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; return *this; } @@ -1760,22 +1760,22 @@ public: class Matcher { public: - Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) + constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} - const char* GetName() const { + constexpr const char* GetName() const { return name; } - u16 GetMask() const { + constexpr u16 GetMask() const { return mask; } - Id GetId() const { + constexpr Id GetId() const { return id; } - Type GetType() const { + constexpr Type GetType() const { return type; } @@ -1784,7 +1784,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - bool Matches(u16 instruction) const { + constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -1818,7 +1818,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static auto GetMaskAndExpect(const char* const bitstring) { + static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -1835,15 +1835,15 @@ private: break; } } - return std::make_tuple(mask, expect); + return std::make_pair(mask, expect); } public: /// Creates a matcher that can match and parse instructions based on bitstring. - static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, - const char* const name) { - const auto mask_expect = GetMaskAndExpect(bitstring); - return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); + static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, + const char* const name) { + const auto [mask, expected] = GetMaskAndExpect(bitstring); + return Matcher(name, mask, expected, op, type); } }; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); + block.timestamp = system.CoreTiming().GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,12 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; @@ -121,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); @@ -128,7 +126,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index dbaeac6db..42031d80a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -11,6 +11,77 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { +namespace { +enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, +}; +} // Anonymous namespace + +enum class MacroInterpreter::ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 +}; + +enum class MacroInterpreter::ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 +}; + +enum class MacroInterpreter::BranchCondition : u32 { + Zero = 0, + NotZero = 1, +}; + +union MacroInterpreter::Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + // If set on a branch, then the branch doesn't have a delay slot. + BitField<5, 1, u32> branch_annul; + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + + s32 GetBranchTarget() const { + return static_cast<s32>(immediate * sizeof(u32)); + } +}; MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 76b6a895b..631146d89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -6,7 +6,6 @@ #include <array> #include <optional> -#include <vector> #include "common/bit_field.h" #include "common/common_types.h" @@ -28,75 +27,11 @@ public: void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); private: - enum class Operation : u32 { - ALU = 0, - AddImmediate = 1, - ExtractInsert = 2, - ExtractShiftLeftImmediate = 3, - ExtractShiftLeftRegister = 4, - Read = 5, - Unused = 6, // This operation doesn't seem to be a valid encoding. - Branch = 7, - }; - - enum class ALUOperation : u32 { - Add = 0, - AddWithCarry = 1, - Subtract = 2, - SubtractWithBorrow = 3, - // Operations 4-7 don't seem to be valid encodings. - Xor = 8, - Or = 9, - And = 10, - AndNot = 11, - Nand = 12 - }; - - enum class ResultOperation : u32 { - IgnoreAndFetch = 0, - Move = 1, - MoveAndSetMethod = 2, - FetchAndSend = 3, - MoveAndSend = 4, - FetchAndSetMethod = 5, - MoveAndSetMethodFetchAndSend = 6, - MoveAndSetMethodSend = 7 - }; + enum class ALUOperation : u32; + enum class BranchCondition : u32; + enum class ResultOperation : u32; - enum class BranchCondition : u32 { - Zero = 0, - NotZero = 1, - }; - - union Opcode { - u32 raw; - BitField<0, 3, Operation> operation; - BitField<4, 3, ResultOperation> result_operation; - BitField<4, 1, BranchCondition> branch_condition; - BitField<5, 1, u32> - branch_annul; // If set on a branch, then the branch doesn't have a delay slot. - BitField<7, 1, u32> is_exit; - BitField<8, 3, u32> dst; - BitField<11, 3, u32> src_a; - BitField<14, 3, u32> src_b; - // The signed immediate overlaps the second source operand and the alu operation. - BitField<14, 18, s32> immediate; - - BitField<17, 5, ALUOperation> alu_operation; - - // Bitfield instructions data - BitField<17, 5, u32> bf_src_bit; - BitField<22, 5, u32> bf_size; - BitField<27, 5, u32> bf_dst_bit; - - u32 GetBitfieldMask() const { - return (1 << bf_size) - 1; - } - - s32 GetBranchTarget() const { - return static_cast<s32>(immediate * sizeof(u32)); - } - }; + union Opcode; union MethodAddress { u32 raw; @@ -149,9 +84,10 @@ private: Engines::Maxwell3D& maxwell3d; - u32 pc; ///< Current program counter - std::optional<u32> - delayed_pc; ///< Program counter to execute at after the delay slot is executed. + /// Current program counter + u32 pc; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; static constexpr std::size_t NumMacroRegisters = 8; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index ab71870ab..fe5f08ace 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::DXT23_SRGB>, MortonCopy<true, PixelFormat::DXT45_SRGB>, MortonCopy<true, PixelFormat::BC7U_SRGB>, + MortonCopy<true, PixelFormat::R4G4B4A4U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, @@ -101,6 +102,16 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_10X8>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, @@ -162,6 +173,17 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::DXT23_SRGB>, MortonCopy<false, PixelFormat::DXT45_SRGB>, MortonCopy<false, PixelFormat::BC7U_SRGB>, + MortonCopy<false, PixelFormat::R4G4B4A4U>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..9431d64ac 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; @@ -974,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage); + tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second); return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1004,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1049,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; }(); SetupImage(bindpoint, tic, entry); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include <cstddef> #include <map> #include <memory> +#include <mutex> #include <optional> #include <tuple> #include <utility> @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map<u64, int>; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd..f1b89165d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,13 +3,16 @@ // Refer to the license.txt file included. #include <mutex> +#include <optional> +#include <string> #include <thread> +#include <unordered_set> #include <boost/functional/hash.hpp> #include "common/assert.h" -#include "common/hash.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -21,18 +24,20 @@ namespace OpenGL { +using Tegra::Engines::ShaderType; +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::ShaderIR; + +namespace { // One UBO is always reserved for emulation values on staged shaders constexpr u32 STAGE_RESERVED_UBOS = 1; -struct UnspecializedShader { - std::string code; - GLShader::ShaderEntries entries; - ProgramType program_type; -}; +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; -namespace { +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { @@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) return gpu.regs.code_address.CodeAddress() + shader_config.offset; } +/// Gets if the current instruction offset is a scheduler instruction +constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { + constexpr std::size_t start_offset = 10; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & mask) == self_jumping_branch) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + offset++; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr) { @@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g }); memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); + program_code.resize(CalculateProgramSize(program_code)); return program_code; } @@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { } } -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - /// Describes primitive behavior on geometry shaders constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { switch (primitive_mode) { @@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { return {}; } -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { - constexpr std::size_t start_offset = 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - std::size_t size = start_offset * sizeof(u64); - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - size += sizeof(u64); - offset++; - } - // The last instruction is included in the program size - return std::min(size + sizeof(u64), program.size() * sizeof(u64)); -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, - const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { - if (size_a == 0) { - size_a = CalculateProgramSize(code); - } - u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); - if (program_type != ProgramType::VertexA) { - return unique_identifier; - } - // VertexA programs include two programs - - std::size_t seed = 0; - boost::hash_combine(seed, unique_identifier); - - if (size_b == 0) { - size_b = CalculateProgramSize(code_b); + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (program_type == ProgramType::VertexA) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - const u64 identifier_b = - Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); - boost::hash_combine(seed, identifier_b); - return static_cast<u64>(seed); + return unique_identifier; } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b) { - GLShader::ShaderSetup setup(program_code); - setup.program.size_a = CalculateProgramSize(program_code); - setup.program.size_b = 0; - if (program_type == ProgramType::VertexA) { - // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. - // Conventional HW does not support this, so we combine VertexA and VertexB into one - // stage here. - setup.SetProgramB(program_code_b); - setup.program.size_b = CalculateProgramSize(program_code_b); - } - setup.program.unique_identifier = GetUniqueIdentifier( - program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); - +std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, + const std::optional<ShaderIR>& ir_b) { switch (program_type) { case ProgramType::VertexA: case ProgramType::VertexB: - return GLShader::GenerateVertexShader(device, setup); + return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); case ProgramType::Geometry: - return GLShader::GenerateGeometryShader(device, setup); + return GLShader::GenerateGeometryShader(device, ir); case ProgramType::Fragment: - return GLShader::GenerateFragmentShader(device, setup); + return GLShader::GenerateFragmentShader(device, ir); case ProgramType::Compute: - return GLShader::GenerateComputeShader(device, setup); + return GLShader::GenerateComputeShader(device, ir); default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); return {}; } } -CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, - ProgramType program_type, const ProgramVariant& variant, - bool hint_retrievable = false) { +constexpr const char* GetProgramTypeName(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return "VS"; + case ProgramType::TessellationControl: + return "TCS"; + case ProgramType::TessellationEval: + return "TES"; + case ProgramType::Geometry: + return "GS"; + case ProgramType::Fragment: + return "FS"; + case ProgramType::Compute: + return "CS"; + } + return "UNK"; +} + +Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return Tegra::Engines::ShaderType::Vertex; + case ProgramType::TessellationControl: + return Tegra::Engines::ShaderType::TesselationControl; + case ProgramType::TessellationEval: + return Tegra::Engines::ShaderType::TesselationEval; + case ProgramType::Geometry: + return Tegra::Engines::ShaderType::Geometry; + case ProgramType::Fragment: + return Tegra::Engines::ShaderType::Fragment; + case ProgramType::Compute: + return Tegra::Engines::ShaderType::Compute; + } + UNREACHABLE(); + return {}; +} + +std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { + return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); +} + +Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( + Core::System& system, ProgramType program_type) { + if (program_type == ProgramType::Compute) { + return system.GPU().KeplerCompute(); + } else { + return system.GPU().Maxwell3D(); + } +} + +std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { + return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), + GetConstBufferEngineInterface(system, program_type)); +} + +void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + for (const auto& key : usage.keys) { + const auto [buffer, offset] = key.first; + locker.InsertKey(buffer, offset, key.second); + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + locker.InsertBoundSampler(offset, sampler); + } + for (const auto& [key, sampler] : usage.bindless_samplers) { + const auto [buffer, offset] = key; + locker.InsertBindlessSampler(buffer, offset, sampler); + } +} + +CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, + const ProgramCode& program_code, const ProgramCode& program_code_b, + const ProgramVariant& variant, ConstBufferLocker& locker, + bool hint_retrievable = false) { + LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); + + const bool is_compute = program_type == ProgramType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); + std::optional<ShaderIR> ir_b; + if (!program_code_b.empty()) { + ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); + } + const auto entries = GLShader::GetEntries(ir); + auto base_bindings{variant.base_bindings}; const auto primitive_mode{variant.primitive_mode}; const auto texture_buffer_usage{variant.texture_buffer_usage}; - std::string source = R"(#version 430 core + std::string source = fmt::format(R"(// {} +#version 430 core #extension GL_ARB_separate_shader_objects : enable #extension GL_ARB_shader_viewport_layer_array : enable #extension GL_EXT_shader_image_load_formatted : enable #extension GL_NV_gpu_shader5 : enable #extension GL_NV_shader_thread_group : enable #extension GL_NV_shader_thread_shuffle : enable -)"; - if (program_type == ProgramType::Compute) { +)", + GetShaderId(unique_identifier, program_type)); + if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } source += '\n'; - if (program_type != ProgramType::Compute) { + if (!is_compute) { source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); } @@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn } source += '\n'; - source += code; + source += GenerateGLSL(device, program_type, ir, ir_b); OGLShader shader; shader.Create(source.c_str(), GetShaderType(program_type)); @@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn return program; } -std::set<GLenum> GetSupportedFormats() { - std::set<GLenum> supported_formats; - +std::unordered_set<GLenum> GetSupportedFormats() { GLint num_formats{}; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector<GLint> formats(num_formats); glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - for (const GLint format : formats) + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { supported_formats.insert(static_cast<GLenum>(format)); + } return supported_formats; } } // Anonymous namespace CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result) - : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, program_type{program_type}, - disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, - entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b) + : RasterizerCacheObject{params.host_ptr}, system{params.system}, + disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, + unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, + program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { + if (!params.precompiled_variants) { + return; + } + for (const auto& pair : *params.precompiled_variants) { + auto locker = MakeLocker(system, program_type); + const auto& usage = pair->first; + FillLocker(*locker, usage); + + std::unique_ptr<LockerVariant>* locker_variant = nullptr; + const auto it = + std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { + return variant->locker->HasEqualKeys(*locker); + }); + if (it == locker_variants.end()) { + locker_variant = &locker_variants.emplace_back(); + *locker_variant = std::make_unique<LockerVariant>(); + locker_variant->get()->locker = std::move(locker); + } else { + locker_variant = &*it; + } + locker_variant->get()->programs.emplace(usage.variant, pair->second); + } +} Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, - ProgramCode&& program_code_b) { - const auto code_size{CalculateProgramSize(program_code)}; - const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{ - CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; - if (result.first.empty()) { - // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now - return {}; - } - + ProgramCode program_code, ProgramCode program_code_b) { params.disk_cache.SaveRaw(ShaderDiskCacheRaw( - params.unique_identifier, GetProgramType(program_type), - static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), - std::move(program_code), std::move(program_code_b))); - + params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); + + ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); + const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!program_code_b.empty()) { + // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); + // } return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); + new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), + std::move(program_code), std::move(program_code_b))); } -Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result) { - return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); -} - -Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; - - const auto code_size{CalculateProgramSize(code)}; - params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, - static_cast<u32>(code_size / sizeof(u64)), 0, - std::move(code), {})); +Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { + params.disk_cache.SaveRaw( + ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); + ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); + return std::shared_ptr<CachedShader>(new CachedShader( + params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); } -Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result) { - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); +Shader CachedShader::CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized) { + return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, + unspecialized.entries, unspecialized.code, + unspecialized.code_b)); } std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { - const auto [entry, is_cache_miss] = programs.try_emplace(variant); + UpdateVariant(); + + const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); - } + program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, + variant, *curr_variant->locker); + disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { - const auto found = precompiled_programs.find(GetUsage(variant)); - if (found == precompiled_programs.end()) { - return {}; +void CachedShader::UpdateVariant() { + if (curr_variant && !curr_variant->locker->IsConsistent()) { + curr_variant = nullptr; + } + if (!curr_variant) { + for (auto& variant : locker_variants) { + if (variant->locker->IsConsistent()) { + curr_variant = variant.get(); + } + } + } + if (!curr_variant) { + auto& new_variant = locker_variants.emplace_back(); + new_variant = std::make_unique<LockerVariant>(); + new_variant->locker = MakeLocker(system, program_type); + curr_variant = new_variant.get(); } - return found->second; } -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { +ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, + const ConstBufferLocker& locker) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; usage.variant = variant; + usage.keys = locker.GetKeys(); + usage.bound_samplers = locker.GetBoundSamplers(); + usage.bindless_samplers = locker.GetBindlessSamplers(); return usage; } @@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto [raws, shader_usages] = *transferable; - - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); - - const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized_shaders{ - GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) { + if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { return; } - // Track if precompiled cache was altered during loading to know if we have to serialize the - // virtual precompiled cache file back to the hard drive + const auto dumps = disk_cache.LoadPrecompiled(); + const auto supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to + // serialize the virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; // Inform the frontend about shader build initialization @@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& usage{shader_usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", - usage.unique_identifier, i, shader_usages.size()); - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; const auto dump{dumps.find(usage)}; @@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - shader = SpecializeShader(unspecialized.code, unspecialized.entries, - unspecialized.program_type, usage.variant, true); + auto locker{MakeLocker(system, unspecialized.program_type)}; + FillLocker(*locker, usage); + shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, + unspecialized.code, unspecialized.code_b, usage.variant, + *locker, true); } - std::scoped_lock lock(mutex); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, shader_usages.size()); } precompiled_programs.emplace(usage, std::move(shader)); + + // TODO(Rodrigo): Is there a better way to do this? + precompiled_variants[usage.unique_identifier].push_back( + precompiled_programs.find(usage)); } }; - const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; + const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; const std::size_t bucket_size{shader_usages.size() / num_workers}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); std::vector<std::thread> threads(num_workers); @@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (compilation_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); - dumps.clear(); precompiled_cache_altered = true; return; } @@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before - // precompiling them + // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw + // before precompiling them for (std::size_t i = 0; i < shader_usages.size(); ++i) { const auto& usage{shader_usages[i]}; @@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { +const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { + const auto it = precompiled_variants.find(unique_identifier); + return it == precompiled_variants.end() ? nullptr : &it->second; +} +CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); return {}; @@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( return shader; } -std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( +bool ShaderCacheOpenGL::GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { - std::unordered_map<u64, UnspecializedShader> unspecialized; - + const std::vector<ShaderDiskCacheRaw>& raws) { if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); } for (std::size_t i = 0; i < raws.size(); ++i) { if (stop_loading) { - return {}; + return false; } const auto& raw{raws[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 calculated_hash{ GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; if (unique_identifier != calculated_hash) { - LOG_ERROR( - Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - " + "removing shader cache", + raw.GetUniqueIdentifier(), calculated_hash); disk_cache.InvalidateTransferable(); - return {}; + return false; } - GLShader::ProgramResult result; - if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { - // If it's stored in the precompiled file, avoid decompiling it here - const auto& stored_decompiled{it->second}; - result = {stored_decompiled.code, stored_decompiled.entries}; - } else { - // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), - raw.GetProgramCodeB()); - disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); - } - - precompiled_shaders.insert({unique_identifier, result}); - - unspecialized.insert( - {raw.GetUniqueIdentifier(), - {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); + const u32 main_offset = + raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); + const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (raw.HasProgramA()) { + // ir_b.emplace(raw.GetProgramCodeB(), main_offset); + // } + + UnspecializedShader unspecialized; + unspecialized.entries = GLShader::GetEntries(ir); + unspecialized.program_type = raw.GetProgramType(); + unspecialized.code = raw.GetProgramCode(); + unspecialized.code_b = raw.GetProgramCodeB(); + unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); } } - return unspecialized; + return true; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { @@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { } auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(address)}; Shader shader{TryGet(host_ptr)}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; - if (is_program_a) { - const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - - const auto unique_identifier = - GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, - host_ptr, unique_identifier}; - - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { - shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), - std::move(program_code_b)); + ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; + ProgramCode code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); + } + + const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; + + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { + shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), + std::move(code_b)); } else { - shader = CachedShader::CreateStageFromCache(params, program, found->second); + shader = CachedShader::CreateFromCache(params, found->second); } Register(shader); @@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { // No kernel found - create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, - host_ptr, unique_identifier}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateKernelFromCache(params, found->second); + kernel = CachedShader::CreateFromCache(params, found->second); } Register(kernel); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5d..6bd7c9cf1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -8,9 +8,10 @@ #include <atomic> #include <bitset> #include <memory> -#include <set> +#include <string> #include <tuple> #include <unordered_map> +#include <unordered_set> #include <vector> #include <glad/glad.h> @@ -20,6 +21,8 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/shader_ir.h" namespace Core { class System; @@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>; using CachedProgram = std::shared_ptr<OGLProgram>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; -using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; +using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; + +struct UnspecializedShader { + GLShader::ShaderEntries entries; + ProgramType program_type; + ProgramCode code; + ProgramCode code_b; +}; struct ShaderParameters { + Core::System& system; ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; + const PrecompiledVariants* precompiled_variants; const Device& device; VAddr cpu_addr; u8* host_ptr; @@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject { public: static Shader CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, ProgramCode&& program_code_b); - - static Shader CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result); + ProgramCode program_code, ProgramCode program_code_b); + static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); - - static Shader CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result); + static Shader CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized); VAddr GetCpuAddr() const override { return cpu_addr; } std::size_t GetSizeInBytes() const override { - return shader_length; + return program_code.size() * sizeof(u64); } /// Gets the shader entries for the shader @@ -83,24 +89,36 @@ public: std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); private: + struct LockerVariant { + std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; + std::unordered_map<ProgramVariant, CachedProgram> programs; + }; + explicit CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result); + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b); - CachedProgram TryLoadProgram(const ProgramVariant& variant) const; + void UpdateVariant(); - ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, + const VideoCommon::Shader::ConstBufferLocker& locker) const; + + Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const Device& device; VAddr cpu_addr{}; + u64 unique_identifier{}; ProgramType program_type{}; - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; GLShader::ShaderEntries entries; - std::string code; - std::size_t shader_length{}; - std::unordered_map<ProgramVariant, CachedProgram> programs; + ProgramCode program_code; + ProgramCode program_code_b; + + LockerVariant* curr_variant = nullptr; + std::vector<std::unique_ptr<LockerVariant>> locker_variants; }; class ShaderCacheOpenGL final : public RasterizerCache<Shader> { @@ -123,21 +141,26 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); + bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback, + const std::vector<ShaderDiskCacheRaw>& raws); CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set<GLenum>& supported_formats); + const std::unordered_set<GLenum>& supported_formats); + + const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; + ShaderDiskCacheOpenGL disk_cache; - PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::unordered_map<u64, PrecompiledVariants> precompiled_variants; + + std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6a610a3bc..030550c53 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -415,27 +415,6 @@ public: return code.GetResult(); } - ShaderEntries GetShaderEntries() const { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& [offset, image] : ir.GetImages()) { - entries.images.emplace_back(image); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, - usage.is_read, usage.is_written); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - return entries; - } - private: friend class ASTDecompiler; friend class ExprDecompiler; @@ -1148,7 +1127,7 @@ private: for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); - } else if (std::get_if<TextureAoffi>(&variant)) { + } else if (std::holds_alternative<TextureAoffi>(variant)) { expr += GenerateTextureAoffi(meta->aoffi); } else { UNREACHABLE(); @@ -1158,8 +1137,8 @@ private: return expr + ')'; } - std::string GenerateTextureArgument(TextureArgument argument) { - const auto [type, operand] = argument; + std::string GenerateTextureArgument(const TextureArgument& argument) { + const auto& [type, operand] = argument; if (operand == nullptr) { return {}; } @@ -1235,7 +1214,7 @@ private: std::string BuildImageValues(Operation operation) { constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; const std::size_t values_count{meta.values.size()}; std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); @@ -1780,14 +1759,14 @@ private: return {"0", Type::Int}; } - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), Type::Uint}; } Expression ImageStore(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), BuildIntegerCoordinates(operation), BuildImageValues(operation)); return {}; @@ -1795,7 +1774,7 @@ private: template <const std::string_view& opname> Expression AtomicImage(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; ASSERT(meta.values.size() == 1); return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), @@ -2246,7 +2225,7 @@ private: code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); } - std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { + std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { return fmt::format("{}_{}_{}", name, index, suffix); } @@ -2271,17 +2250,15 @@ private: ShaderWriter code; }; -static constexpr std::string_view flow_var = "flow_var_"; - std::string GetFlowVariable(u32 i) { - return fmt::format("{}{}", flow_var, i); + return fmt::format("flow_var_{}", i); } class ExprDecompiler { public: explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ExprAnd& expr) { + void operator()(const ExprAnd& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " && "; @@ -2289,7 +2266,7 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprOr& expr) { + void operator()(const ExprOr& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " || "; @@ -2297,17 +2274,17 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprNot& expr) { + void operator()(const ExprNot& expr) { inner += '!'; std::visit(*this, *expr.operand1); } - void operator()(VideoCommon::Shader::ExprPredicate& expr) { + void operator()(const ExprPredicate& expr) { const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); inner += decomp.GetPredicate(pred); } - void operator()(VideoCommon::Shader::ExprCondCode& expr) { + void operator()(const ExprCondCode& expr) { const Node cc = decomp.ir.GetConditionCode(expr.cc); std::string target; @@ -2316,10 +2293,13 @@ public: switch (index) { case Tegra::Shader::Pred::NeverExecute: target = "false"; + break; case Tegra::Shader::Pred::UnusedIndex: target = "true"; + break; default: target = decomp.GetPredicate(index); + break; } } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { target = decomp.GetInternalFlag(flag->GetFlag()); @@ -2329,15 +2309,20 @@ public: inner += target; } - void operator()(VideoCommon::Shader::ExprVar& expr) { + void operator()(const ExprVar& expr) { inner += GetFlowVariable(expr.var_index); } - void operator()(VideoCommon::Shader::ExprBoolean& expr) { + void operator()(const ExprBoolean& expr) { inner += expr.value ? "true" : "false"; } - std::string& GetResult() { + void operator()(VideoCommon::Shader::ExprGprEqual& expr) { + inner += + "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; + } + + const std::string& GetResult() const { return inner; } @@ -2350,7 +2335,7 @@ class ASTDecompiler { public: explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ASTProgram& ast) { + void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -2358,7 +2343,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTIfThen& ast) { + void operator()(const ASTIfThen& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); @@ -2372,7 +2357,7 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTIfElse& ast) { + void operator()(const ASTIfElse& ast) { decomp.code.AddLine("else {{"); decomp.code.scope++; ASTNode current = ast.nodes.GetFirst(); @@ -2384,29 +2369,29 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { + void operator()(const ASTBlockDecoded& ast) { decomp.VisitBlock(ast.nodes); } - void operator()(VideoCommon::Shader::ASTVarSet& ast) { + void operator()(const ASTVarSet& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTLabel& ast) { + void operator()(const ASTLabel& ast) { decomp.code.AddLine("// Label_{}:", ast.index); } - void operator()(VideoCommon::Shader::ASTGoto& ast) { + void operator()([[maybe_unused]] const ASTGoto& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTDoWhile& ast) { + void operator()(const ASTDoWhile& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("do {{"); @@ -2420,7 +2405,7 @@ public: decomp.code.AddLine("}} while({});", expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTReturn& ast) { + void operator()(const ASTReturn& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2440,7 +2425,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTBreak& ast) { + void operator()(const ASTBreak& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2455,7 +2440,7 @@ public: } } - void Visit(VideoCommon::Shader::ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -2468,32 +2453,53 @@ void GLSLDecompiler::DecompileAST() { for (u32 i = 0; i < num_flow_variables; i++) { code.AddLine("bool {} = false;", GetFlowVariable(i)); } + ASTDecompiler decompiler{*this}; - VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); - decompiler.Visit(program); + decompiler.Visit(ir.GetASTProgram()); } } // Anonymous namespace +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { + ShaderEntries entries; + for (const auto& cbuf : ir.GetConstantBuffers()) { + entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), + cbuf.first); + } + for (const auto& sampler : ir.GetSamplers()) { + entries.samplers.emplace_back(sampler); + } + for (const auto& [offset, image] : ir.GetImages()) { + entries.images.emplace_back(image); + } + for (const auto& [base, usage] : ir.GetGlobalMemory()) { + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, + usage.is_written); + } + entries.clip_distances = ir.GetClipDistances(); + entries.shader_length = ir.GetLength(); + return entries; +} + std::string GetCommonDeclarations() { - return fmt::format( - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}}\n\n"); + return R"(#define ftoi floatBitsToInt +#define ftou floatBitsToUint +#define itof intBitsToFloat +#define utof uintBitsToFloat + +bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { + bvec2 is_nan1 = isnan(pair1); + bvec2 is_nan2 = isnan(pair2); + return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); +} +)"; } -ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, - const std::string& suffix) { +std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, + const std::string& suffix) { GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); - return {decompiler.GetResult(), decompiler.GetShaderEntries()}; + return decompiler.GetResult(); } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001..fead2a51e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -34,10 +34,7 @@ enum class ProgramType : u32 { namespace OpenGL::GLShader { -struct ShaderEntries; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ProgramResult = std::pair<std::string, ShaderEntries>; using SamplerEntry = VideoCommon::Shader::Sampler; using ImageEntry = VideoCommon::Shader::Image; @@ -93,9 +90,11 @@ struct ShaderEntries { std::size_t shader_length{}; }; +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); + std::string GetCommonDeclarations(); -ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ProgramType stage, const std::string& suffix); +std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + ProgramType stage, const std::string& suffix); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 74cc33476..184a565e6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -22,6 +22,29 @@ namespace OpenGL { +using VideoCommon::Shader::BindlessSamplerMap; +using VideoCommon::Shader::BoundSamplerMap; +using VideoCommon::Shader::KeyMap; + +namespace { + +struct ConstBufferKey { + u32 cbuf; + u32 offset; + u32 value; +}; + +struct BoundSamplerKey { + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerKey { + u32 cbuf; + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + using ShaderCacheVersionHash = std::array<u8, 64>; enum class TransferableEntryKind : u32 { @@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 { Usage, }; -enum class PrecompiledEntryKind : u32 { - Decompiled, - Dump, -}; - -constexpr u32 NativeVersion = 4; +constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ShaderDiskCacheUsage) == 40); - -namespace { ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -} // namespace +} // Anonymous namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, ProgramCode program_code, ProgramCode program_code_b) : unique_identifier{unique_identifier}, program_type{program_type}, - program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; @@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(program_type)) != 1 || - file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { + file.WriteObject(static_cast<u32>(program_code.size())) != 1 || + file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { return false; } - if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) + if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) return false; if (HasProgramA() && - file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { + file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { return false; } return true; @@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, - "Failed to get transferable cache version for title id={} - skipping", + "Failed to get transferable cache version for title id={}, skipping", GetTitleID()); return {}; } if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); + LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); file.Close(); InvalidateTransferable(); is_usable = true; @@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() { } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator - skipping"); + "of the emulator, skipping"); return {}; } // Version is valid, load the shaders + constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; std::vector<ShaderDiskCacheRaw> raws; std::vector<ShaderDiskCacheUsage> usages; while (file.Tell() < file.GetSize()) { TransferableEntryKind kind{}; if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { - LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); + LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); return {}; } @@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { case TransferableEntryKind::Raw: { ShaderDiskCacheRaw entry; if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); + LOG_ERROR(Render_OpenGL, error_loading); return {}; } transferable.insert({entry.GetUniqueIdentifier(), {}}); @@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() { break; } case TransferableEntryKind::Usage: { - ShaderDiskCacheUsage usage{}; - if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); + ShaderDiskCacheUsage usage; + + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + if (file.ReadArray(&usage.unique_identifier, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_bindless_samplers, 1) != 1) { + LOG_ERROR(Render_OpenGL, error_loading); return {}; } + + std::vector<ConstBufferKey> keys(num_keys); + std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); + std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); + if (file.ReadArray(keys.data(), keys.size()) != keys.size() || + file.ReadArray(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + LOG_ERROR(Render_OpenGL, error_loading); + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } + usages.push_back(std::move(usage)); break; } default: - LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", + LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", static_cast<u32>(kind)); return {}; } @@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{std::move(raws), std::move(usages)}}; } -std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> +std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!is_usable) { return {}; } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + std::string path = GetPrecompiledPath(); + FileUtil::IOFile file(path, "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", GetTitleID()); @@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { const auto result = LoadPrecompiledFile(file); if (!result) { LOG_INFO(Render_OpenGL, - "Failed to load precompiled cache for game with title id={} - removing", + "Failed to load precompiled cache for game with title id={}, removing", GetTitleID()); file.Close(); InvalidatePrecompiled(); @@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> +std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); @@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - PrecompiledEntryKind kind{}; - if (!LoadObjectFromPrecompiled(kind)) { + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + ShaderDiskCacheUsage usage; + if (!LoadObjectFromPrecompiled(usage.unique_identifier) || + !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(num_bound_samplers) || + !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; } - - switch (kind) { - case PrecompiledEntryKind::Decompiled: { - u64 unique_identifier{}; - if (!LoadObjectFromPrecompiled(unique_identifier)) { - return {}; - } - - auto entry = LoadDecompiledEntry(); - if (!entry) { - return {}; - } - decompiled.insert({unique_identifier, std::move(*entry)}); - break; - } - case PrecompiledEntryKind::Dump: { - ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { - return {}; - } - - ShaderDiskCacheDump dump; - if (!LoadObjectFromPrecompiled(dump.binary_format)) { - return {}; - } - - u32 binary_length{}; - if (!LoadObjectFromPrecompiled(binary_length)) { - return {}; - } - - dump.binary.resize(binary_length); - if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { - return {}; - } - - dumps.insert({usage, dump}); - break; - } - default: + std::vector<ConstBufferKey> keys(num_keys); + std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); + std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); + if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || + !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { return {}; } - } - return {{decompiled, dumps}}; -} - -std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { - u32 code_size{}; - if (!LoadObjectFromPrecompiled(code_size)) { - return {}; - } - - std::string code(code_size, '\0'); - if (!LoadArrayFromPrecompiled(code.data(), code.size())) { - return {}; - } - - ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); - - u32 const_buffers_count{}; - if (!LoadObjectFromPrecompiled(const_buffers_count)) { - return {}; - } - - for (u32 i = 0; i < const_buffers_count; ++i) { - u32 max_offset{}; - u32 index{}; - bool is_indirect{}; - if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(is_indirect)) { - return {}; + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); - } - - u32 samplers_count{}; - if (!LoadObjectFromPrecompiled(samplers_count)) { - return {}; - } - - for (u32 i = 0; i < samplers_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - bool is_array{}; - bool is_shadow{}; - bool is_bindless{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || - !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { - return {}; + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); - } - - u32 images_count{}; - if (!LoadObjectFromPrecompiled(images_count)) { - return {}; - } - for (u32 i = 0; i < images_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - u8 is_bindless{}; - u8 is_written{}; - u8 is_read{}; - u8 is_atomic{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || - !LoadObjectFromPrecompiled(is_atomic)) { - return {}; + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); } - entry.entries.images.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, - is_read != 0, is_atomic != 0); - } - u32 global_memory_count{}; - if (!LoadObjectFromPrecompiled(global_memory_count)) { - return {}; - } - for (u32 i = 0; i < global_memory_count; ++i) { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_read{}; - bool is_written{}; - if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { + ShaderDiskCacheDump dump; + if (!LoadObjectFromPrecompiled(dump.binary_format)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, - is_written); - } - for (auto& clip_distance : entry.entries.clip_distances) { - if (!LoadObjectFromPrecompiled(clip_distance)) { + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { return {}; } - } - - u64 shader_length{}; - if (!LoadObjectFromPrecompiled(shader_length)) { - return {}; - } - entry.entries.shader_length = static_cast<std::size_t>(shader_length); - - return entry; -} - -bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || - !SaveObjectToPrecompiled(unique_identifier) || - !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || - !SaveArrayToPrecompiled(code.data(), code.size())) { - return false; - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { - return false; - } - for (const auto& cbuf : entries.const_buffers) { - if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || - !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(cbuf.IsIndirect())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { - return false; - } - for (const auto& sampler : entries.samplers) { - if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(sampler.IsArray()) || - !SaveObjectToPrecompiled(sampler.IsShadow()) || - !SaveObjectToPrecompiled(sampler.IsBindless())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { - return false; - } - for (const auto& image : entries.images) { - if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { - return false; - } - } - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { - return false; - } - for (const auto& gmem : entries.global_memory_entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { - return false; - } - } - - for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(clip_distance)) { - return false; + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { + return {}; } - } - if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { - return false; + dumps.emplace(std::move(usage), dump); } - - return true; + return dumps; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { } FileUtil::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) + if (!file.IsOpen()) { return; + } if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); + LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); file.Close(); InvalidateTransferable(); return; @@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { FileUtil::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) return; - - if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { - LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); file.Close(); InvalidateTransferable(); - return; - } -} + }; -void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!is_usable) { + if (file.WriteObject(TransferableEntryKind::Usage) != 1 || + file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || + file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || + file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { + Close(); return; } - - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } } - - if (!SaveDecompiledFile(unique_identifier, code, entries)) { - LOG_ERROR(Render_OpenGL, - "Failed to save decompiled entry to the precompiled file - removing"); - InvalidatePrecompiled(); + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } } } @@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p return; } + // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header + // when writing the dump. This should be done the moment I get access to write to the virtual + // file. + if (precompiled_cache_virtual_file.GetSize() == 0) { + SavePrecompiledHeaderToVirtualPrecompiledCache(); + } + GLint binary_length{}; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); @@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || - !SaveObjectToPrecompiled(usage) || - !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || - !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", usage.unique_identifier); InvalidatePrecompiled(); + }; + + if (!SaveObjectToPrecompiled(usage.unique_identifier) || + !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } + if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || + !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { + Close(); + } } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) + if (!EnsureDirectories()) { return {}; + } const auto transferable_path{GetTransferablePath()}; const bool existed = FileUtil::Exists(transferable_path); @@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { precompiled_cache_virtual_file_offset = 0; - const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector<u8>& compressed = + const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); + const std::vector<u8> compressed = Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 9595bd71b..db23ada93 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -8,6 +8,7 @@ #include <optional> #include <string> #include <tuple> +#include <type_traits> #include <unordered_map> #include <unordered_set> #include <utility> @@ -19,6 +20,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/shader/const_buffer_locker.h" namespace Core { class System; @@ -53,6 +55,7 @@ struct BaseBindings { return !operator==(rhs); } }; +static_assert(std::is_trivially_copyable_v<BaseBindings>); /// Describes the different variants a single program can be compiled. struct ProgramVariant { @@ -70,13 +73,20 @@ struct ProgramVariant { } }; +static_assert(std::is_trivially_copyable_v<ProgramVariant>); + /// Describes how a shader is used. struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + VideoCommon::Shader::KeyMap keys; + VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::BindlessSamplerMap bindless_samplers; bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); + return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == + std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, + rhs.bindless_samplers); } bool operator!=(const ShaderDiskCacheUsage& rhs) const { @@ -123,8 +133,7 @@ namespace OpenGL { class ShaderDiskCacheRaw { public: explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, - ProgramCode program_code, ProgramCode program_code_b); + ProgramCode program_code, ProgramCode program_code_b = {}); ShaderDiskCacheRaw(); ~ShaderDiskCacheRaw(); @@ -155,22 +164,14 @@ public: private: u64 unique_identifier{}; ProgramType program_type{}; - u32 program_code_size{}; - u32 program_code_size_b{}; ProgramCode program_code; ProgramCode program_code_b; }; -/// Contains decompiled data from a shader -struct ShaderDiskCacheDecompiled { - std::string code; - GLShader::ShaderEntries entries; -}; - /// Contains an OpenGL dumped binary program struct ShaderDiskCacheDump { - GLenum binary_format; + GLenum binary_format{}; std::vector<u8> binary; }; @@ -184,9 +185,7 @@ public: LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> - LoadPrecompiled(); + std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. void InvalidateTransferable(); @@ -200,10 +199,6 @@ public: /// Saves shader usage to the transferable file. Does not check for collisions. void SaveUsage(const ShaderDiskCacheUsage& usage); - /// Saves a decompiled entry to the precompiled file. Does not check for collisions. - void SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); @@ -212,18 +207,9 @@ public: private: /// Loads the transferable cache. Returns empty on failure. - std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> + std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> LoadPrecompiledFile(FileUtil::IOFile& file); - /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on - /// failure. - std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); - - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b5a43e79e..0e22eede9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -16,17 +16,8 @@ using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::ShaderIR; -static constexpr u32 PROGRAM_OFFSET = 10; -static constexpr u32 COMPUTE_OFFSET = 0; - -static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; - -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: VS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; @@ -34,17 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); - const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; - ProgramResult program = Decompile(device, program_ir, stage, "vertex"); - out += program.first; - - if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, - settings); - ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); - out += program_b.first; + const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; + out += Decompile(device, ir, stage, "vertex"); + if (ir_b) { + out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); } out += R"( @@ -52,7 +36,7 @@ void main() { execute_vertex(); )"; - if (setup.IsDualProgram()) { + if (ir_b) { out += " execute_vertex_b();"; } @@ -66,17 +50,13 @@ void main() { // Viewport can be flipped, which is unsupported by glViewport gl_Position.xy *= viewport_flip.xy; } -})"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: GS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; @@ -84,25 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); - ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); - out += program.first; + out += Decompile(device, ir, ProgramType::Geometry, "geometry"); out += R"( void main() { execute_geometry(); -};)"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: FS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; @@ -119,36 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); - ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); - out += program.first; + out += Decompile(device, ir, ProgramType::Fragment, "fragment"); out += R"( void main() { execute_fragment(); } - )"; - return {std::move(out), std::move(program.second)}; + return out; } -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: CS" + id + "\n\n"; - out += GetCommonDeclarations(); - - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); - ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); - out += program.first; - +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); + out += Decompile(device, ir, ProgramType::Compute, "compute"); out += R"( void main() { execute_compute(); } )"; - return {std::move(out), std::move(program.second)}; + return out; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3833e88ab..cba2be9f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -17,44 +17,18 @@ class Device; namespace OpenGL::GLShader { using VideoCommon::Shader::ProgramCode; - -struct ShaderSetup { - explicit ShaderSetup(ProgramCode program_code) { - program.code = std::move(program_code); - } - - struct { - ProgramCode code; - ProgramCode code_b; // Used for dual vertex shaders - u64 unique_identifier; - std::size_t size_a; - std::size_t size_b; - } program; - - /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(ProgramCode program_b) { - program.code_b = std::move(program_b); - has_program_b = true; - } - - bool IsDualProgram() const { - return has_program_b; - } - -private: - bool has_program_b{}; -}; +using VideoCommon::Shader::ShaderIR; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 173b76c4e..2f9bfd7e4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -111,7 +111,8 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45_SRGB {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U_SRGB + true}, // BC7U_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB @@ -120,6 +121,16 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8bcd04221..42cf068b6 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1648,32 +1648,32 @@ class ExprDecompiler { public: explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} - Id operator()(VideoCommon::Shader::ExprAnd& expr) { + Id operator()(const ExprAnd& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); const Id op2 = Visit(expr.operand2); return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); } - Id operator()(VideoCommon::Shader::ExprOr& expr) { + Id operator()(const ExprOr& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); const Id op2 = Visit(expr.operand2); return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); } - Id operator()(VideoCommon::Shader::ExprNot& expr) { + Id operator()(const ExprNot& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); } - Id operator()(VideoCommon::Shader::ExprPredicate& expr) { + Id operator()(const ExprPredicate& expr) { const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); } - Id operator()(VideoCommon::Shader::ExprCondCode& expr) { + Id operator()(const ExprCondCode& expr) { const Node cc = decomp.ir.GetConditionCode(expr.cc); Id target; @@ -1682,10 +1682,13 @@ public: switch (index) { case Tegra::Shader::Pred::NeverExecute: target = decomp.v_false; + break; case Tegra::Shader::Pred::UnusedIndex: target = decomp.v_true; + break; default: target = decomp.predicates.at(index); + break; } } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); @@ -1693,15 +1696,22 @@ public: return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); } - Id operator()(VideoCommon::Shader::ExprVar& expr) { + Id operator()(const ExprVar& expr) { return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); } - Id operator()(VideoCommon::Shader::ExprBoolean& expr) { + Id operator()(const ExprBoolean& expr) { return expr.value ? decomp.v_true : decomp.v_false; } - Id Visit(VideoCommon::Shader::Expr& node) { + Id operator()(const ExprGprEqual& expr) { + const Id target = decomp.Constant(decomp.t_uint, expr.value); + const Id gpr = decomp.BitcastTo<Type::Uint>( + decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)))); + return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target)); + } + + Id Visit(const Expr& node) { return std::visit(*this, *node); } @@ -1713,7 +1723,7 @@ class ASTDecompiler { public: explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ASTProgram& ast) { + void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -1721,7 +1731,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTIfThen& ast) { + void operator()(const ASTIfThen& ast) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); const Id then_label = decomp.OpLabel(); @@ -1738,33 +1748,33 @@ public: decomp.Emit(endif_label); } - void operator()(VideoCommon::Shader::ASTIfElse& ast) { + void operator()([[maybe_unused]] const ASTIfElse& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { + void operator()(const ASTBlockDecoded& ast) { decomp.VisitBasicBlock(ast.nodes); } - void operator()(VideoCommon::Shader::ASTVarSet& ast) { + void operator()(const ASTVarSet& ast) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); } - void operator()(VideoCommon::Shader::ASTLabel& ast) { + void operator()([[maybe_unused]] const ASTLabel& ast) { // Do nothing } - void operator()(VideoCommon::Shader::ASTGoto& ast) { + void operator()([[maybe_unused]] const ASTGoto& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTDoWhile& ast) { + void operator()(const ASTDoWhile& ast) { const Id loop_label = decomp.OpLabel(); const Id endloop_label = decomp.OpLabel(); const Id loop_start_block = decomp.OpLabel(); @@ -1787,7 +1797,7 @@ public: decomp.Emit(endloop_label); } - void operator()(VideoCommon::Shader::ASTReturn& ast) { + void operator()(const ASTReturn& ast) { if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); @@ -1817,7 +1827,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTBreak& ast) { + void operator()(const ASTBreak& ast) { if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); @@ -1837,7 +1847,7 @@ public: } } - void Visit(VideoCommon::Shader::ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -1853,9 +1863,11 @@ void SPIRVDecompiler::DecompileAST() { Name(id, fmt::format("flow_var_{}", i)); flow_variables.emplace(i, AddGlobalVariable(id)); } + + const ASTNode program = ir.GetASTProgram(); ASTDecompiler decompiler{*this}; - VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); decompiler.Visit(program); + const Id next_block = OpLabel(); Emit(OpBranch(next_block)); Emit(next_block); diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 436d45f4b..3f96d9076 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -3,6 +3,9 @@ // Refer to the license.txt file included. #include <string> +#include <string_view> + +#include <fmt/format.h> #include "common/assert.h" #include "common/common_types.h" @@ -225,11 +228,16 @@ public: inner += expr.value ? "true" : "false"; } + void operator()(const ExprGprEqual& expr) { + inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + } + const std::string& GetResult() const { return inner; } - std::string inner{}; +private: + std::string inner; }; class ASTPrinter { @@ -249,7 +257,7 @@ public: void operator()(const ASTIfThen& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "if (" + expr_parser.GetResult() + ") {\n"; + inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -257,11 +265,13 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "}\n"; + inner += fmt::format("{}}}\n", Indent()); } void operator()(const ASTIfElse& ast) { - inner += Ident() + "else {\n"; + inner += Indent(); + inner += "else {\n"; + scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -269,40 +279,41 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "}\n"; + + inner += Indent(); + inner += "}\n"; } void operator()(const ASTBlockEncoded& ast) { - inner += Ident() + "Block(" + std::to_string(ast.start) + ", " + std::to_string(ast.end) + - ");\n"; + inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); } - void operator()(const ASTBlockDecoded& ast) { - inner += Ident() + "Block;\n"; + void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { + inner += Indent(); + inner += "Block;\n"; } void operator()(const ASTVarSet& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += - Ident() + "V" + std::to_string(ast.index) + " := " + expr_parser.GetResult() + ";\n"; + inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); } void operator()(const ASTLabel& ast) { - inner += "Label_" + std::to_string(ast.index) + ":\n"; + inner += fmt::format("Label_{}:\n", ast.index); } void operator()(const ASTGoto& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> goto Label_" + - std::to_string(ast.label) + ";\n"; + inner += + fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); } void operator()(const ASTDoWhile& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "do {\n"; + inner += fmt::format("{}do {{\n", Indent()); scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -310,32 +321,23 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "} while (" + expr_parser.GetResult() + ");\n"; + inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); } void operator()(const ASTReturn& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> " + - (ast.kills ? "discard" : "exit") + ";\n"; + inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), + ast.kills ? "discard" : "exit"); } void operator()(const ASTBreak& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> break;\n"; + inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); } - std::string& Ident() { - if (memo_scope == scope) { - return tabs_memo; - } - tabs_memo = tabs.substr(0, scope * 2); - memo_scope = scope; - return tabs_memo; - } - - void Visit(ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -344,16 +346,29 @@ public: } private: + std::string_view Indent() { + if (space_segment_scope == scope) { + return space_segment; + } + + // Ensure that we don't exceed our view. + ASSERT(scope * 2 < spaces.size()); + + space_segment = spaces.substr(0, scope * 2); + space_segment_scope = scope; + return space_segment; + } + std::string inner{}; - u32 scope{}; + std::string_view space_segment; - std::string tabs_memo{}; - u32 memo_scope{}; + u32 scope{}; + u32 space_segment_scope{}; - static constexpr std::string_view tabs{" "}; + static constexpr std::string_view spaces{" "}; }; -std::string ASTManager::Print() { +std::string ASTManager::Print() const { ASTPrinter printer{}; printer.Visit(main_node); return printer.GetResult(); @@ -549,13 +564,13 @@ bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) co return min->GetParent() == max->GetParent(); } -void ASTManager::ShowCurrentState(std::string_view state) { +void ASTManager::ShowCurrentState(std::string_view state) const { LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); SanityCheck(); } -void ASTManager::SanityCheck() { - for (auto& label : labels) { +void ASTManager::SanityCheck() const { + for (const auto& label : labels) { if (!label->GetParent()) { LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); } diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index d7bf11821..a2f0044ba 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -328,13 +328,13 @@ public: void InsertReturn(Expr condition, bool kills); - std::string Print(); + std::string Print() const; void Decompile(); - void ShowCurrentState(std::string_view state); + void ShowCurrentState(std::string_view state) const; - void SanityCheck(); + void SanityCheck() const; void Clear(); diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 000000000..fe467608e --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -0,0 +1,110 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <memory> +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/const_buffer_locker.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::SamplerDescriptor; + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) + : stage{shader_stage} {} + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine} {} + +ConstBufferLocker::~ConstBufferLocker() = default; + +std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + const std::pair<u32, u32> key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( + u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +bool ConstBufferLocker::IsConsistent() const { + if (!engine) { + return false; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { + return keys == rhs.keys && bound_samplers == rhs.bound_samplers && + bindless_samplers == rhs.bindless_samplers; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 000000000..600e2f3c3 --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h @@ -0,0 +1,80 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using BindlessSamplerMap = + std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +/** + * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader + * compiler. with it, the shader can obtain required data from GPU state and store it for disk + * shader compilation. + **/ +class ConstBufferLocker { +public: + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~ConstBufferLocker(); + + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional<u32> ObtainKey(u32 buffer, u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Checks keys and samplers against engine's current const buffers. Returns true if they are + /// the same value, false otherwise; + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the locker. + bool HasEqualKeys(const ConstBufferLocker& rhs) const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + +private: + const Tegra::Engines::ShaderType stage; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 268d1aed0..d47c63d9f 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -35,14 +35,20 @@ struct BlockStack { std::stack<u32> pbk_stack{}; }; -struct BlockBranchInfo { - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; +template <typename T, typename... Args> +BlockBranchInfo MakeBranchInfo(Args&&... args) { + static_assert(std::is_convertible_v<T, BranchData>); + return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); +} + +bool BlockBranchIsIgnored(BlockBranchInfo first) { + bool ignore = false; + if (std::holds_alternative<SingleBranch>(*first)) { + const auto branch = std::get_if<SingleBranch>(first.get()); + ignore = branch->ignore; + } + return ignore; +} struct BlockInfo { u32 start{}; @@ -56,10 +62,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} + const ProgramCode& program_code; + ConstBufferLocker& locker; u32 start{}; std::vector<BlockInfo> block_info{}; std::list<u32> inspect_queries{}; @@ -69,8 +76,6 @@ struct CFGRebuildState { std::map<u32, u32> ssy_labels{}; std::map<u32, u32> pbk_labels{}; std::unordered_map<u32, BlockStack> stacks{}; - const ProgramCode& program_code; - const std::size_t program_size; ASTManager* manager; }; @@ -124,10 +129,116 @@ enum class ParseResult : u32 { AbnormalFlow, }; +struct BranchIndirectInfo { + u32 buffer{}; + u32 offset{}; + u32 entries{}; + s32 relative_position{}; +}; + +std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, + u32 start_address, u32 current_position) { + const u32 shader_start = state.start; + u32 pos = current_position; + BranchIndirectInfo result{}; + u64 track_register = 0; + + // Step 0 Get BRX Info + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() != OpCode::Id::BRX) { + return std::nullopt; + } + if (instr.brx.constant_buffer != 0) { + return std::nullopt; + } + track_register = instr.gpr8.Value(); + result.relative_position = instr.brx.GetBranchExtend(); + pos--; + bool found_track = false; + + // Step 1 Track LDC + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = {state.program_code[pos]}; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::LD_C) { + if (instr.gpr0.Value() == track_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { + result.buffer = instr.cbuf36.index.Value(); + result.offset = static_cast<u32>(instr.cbuf36.GetOffset()); + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return std::nullopt; + } + found_track = false; + + // Step 2 Track SHL + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return std::nullopt; + } + found_track = false; + + // Step 3 Track IMNMX + while (pos >= shader_start) { + if (IsSchedInstruction(pos, shader_start)) { + pos--; + continue; + } + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { + if (instr.gpr0.Value() == track_register) { + track_register = instr.gpr8.Value(); + result.entries = instr.alu.GetSignedImm20_20() + 1; + pos--; + found_track = true; + break; + } + } + pos--; + } + + if (!found_track) { + return std::nullopt; + } + return result; +} + std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast<u32>(state.program_code.size()); ParseInfo parse_info{}; + SingleBranch single_branch{}; const auto insert_label = [](CFGRebuildState& state, u32 address) { const auto pair = state.labels.emplace(address); @@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) if (offset >= end_address) { // ASSERT_OR_EXECUTE can't be used, as it ignores the break ASSERT_MSG(false, "Shader passed the current limit!"); - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.ignore = false; + + single_branch.address = exit_branch; + single_branch.ignore = false; break; } if (state.registered.count(offset) != 0) { - parse_info.branch_info.address = offset; - parse_info.branch_info.ignore = true; + single_branch.address = offset; + single_branch.ignore = true; break; } if (IsSchedInstruction(offset, state.start)) { @@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { - parse_info.branch_info.address = exit_branch; + single_branch.address = exit_branch; } else { - parse_info.branch_info.address = branch_offset; + single_branch.address = branch_offset; } insert_label(state, branch_offset); - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = true; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = true; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = true; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = true; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = true; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = true; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { + auto tmp = TrackBranchIndirectInfo(state, address, offset); + if (tmp) { + auto result = *tmp; + std::vector<CaseBranch> branches{}; + s32 pc_target = offset + result.relative_position; + for (u32 i = 0; i < result.entries; i++) { + auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!k) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *k; + u32 target = static_cast<u32>((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); + } + parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<MultiBranch>( + static_cast<u32>(instr.gpr8.Value()), std::move(branches)); + + return {ParseResult::ControlCaught, parse_info}; + } else { + LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); + } return {ParseResult::AbnormalFlow, parse_info}; } default: @@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) offset++; } - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; parse_info.end_address = offset - 1; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, + single_branch.is_brk, single_branch.ignore); return {ParseResult::BlockEnd, parse_info}; } @@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; new_block.branch = current_block.branch; - BlockBranchInfo forward_branch{}; - forward_branch.address = address; - forward_branch.ignore = true; + BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); + const auto branch = std::get_if<SingleBranch>(forward_branch.get()); + branch->address = address; + branch->ignore = true; current_block.branch = forward_branch; return true; } @@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; - if (parse_info.branch_info.condition.IsUnconditional()) { + if (std::holds_alternative<SingleBranch>(*block_info.branch)) { + const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); + if (branch->condition.IsUnconditional()) { + return true; + } + const u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); return true; } - - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); return true; } @@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) { state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (!block.branch.condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } + if (std::holds_alternative<SingleBranch>(*block.branch)) { + const auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (!branch->condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } - Query conditional_query{q2}; - if (block.branch.is_sync) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.top(); + Query conditional_query{q2}; + if (branch->is_sync) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } - conditional_query.ssy_stack.pop(); - } - if (block.branch.is_brk) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.top(); + if (branch->is_brk) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } - conditional_query.pbk_stack.pop(); + conditional_query.address = branch->address; + state.queries.push_back(std::move(conditional_query)); + return true; + } + const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + for (const auto& branch_case : multi_branch->branches) { + Query conditional_query{q2}; + conditional_query.address = branch_case.address; + state.queries.push_back(std::move(conditional_query)); } - conditional_query.address = block.branch.address; - state.queries.push_back(std::move(conditional_query)); return true; } + } // Anonymous namespace -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { +void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { const auto get_expr = ([&](const Condition& cond) -> Expr { Expr result{}; if (cond.cc != ConditionCode::T) { @@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { } return MakeExpr<ExprBoolean>(true); }); - if (branch.address < 0) { - if (branch.kill) { - mm.InsertReturn(get_expr(branch.condition), true); + if (std::holds_alternative<SingleBranch>(*branch_info)) { + const auto branch = std::get_if<SingleBranch>(branch_info.get()); + if (branch->address < 0) { + if (branch->kill) { + mm.InsertReturn(get_expr(branch->condition), true); + return; + } + mm.InsertReturn(get_expr(branch->condition), false); return; } - mm.InsertReturn(get_expr(branch.condition), false); + mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - mm.InsertGoto(get_expr(branch.condition), branch.address); + const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); + for (const auto& branch_case : multi_branch->branches) { + mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), + branch_case.address); + } } void DecompileShader(CFGRebuildState& state) { @@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) { if (state.labels.count(block.start) != 0) { state.manager->InsertLabel(block.start); } - u32 end = block.branch.ignore ? block.end + 1 : block.end; + const bool ignore = BlockBranchIsIgnored(block.branch); + u32 end = ignore ? block.end + 1 : block.end; state.manager->InsertBlock(block.start, end); - if (!block.branch.ignore) { + if (!ignore) { InsertBranch(*state.manager, block.branch); } } state.manager->Decompile(); } -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, - const CompilerSettings& settings) { +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker) { auto result_out = std::make_unique<ShaderCharacteristics>(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, program_size, start_address}; + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); @@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.ignore_branch = block.branch.ignore; + new_block.ignore_branch = BlockBranchIsIgnored(block.branch); if (!new_block.ignore_branch) { - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.branch = block.branch; } result_out->end = std::max(result_out->end, block.end); result_out->blocks.push_back(new_block); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 74e54a5c7..5304998b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -7,6 +7,7 @@ #include <list> #include <optional> #include <set> +#include <variant> #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" @@ -37,29 +38,61 @@ struct Condition { } }; -struct ShaderBlock { - struct Branch { - Condition cond{}; - bool kills{}; - s32 address{}; +class SingleBranch { +public: + SingleBranch() = default; + SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, + bool ignore) + : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, + ignore{ignore} {} + + bool operator==(const SingleBranch& b) const { + return std::tie(condition, address, kill, is_sync, is_brk, ignore) == + std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); + } + + bool operator!=(const SingleBranch& b) const { + return !operator==(b); + } + + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; + bool ignore{}; +}; - bool operator==(const Branch& b) const { - return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); - } +struct CaseBranch { + CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + u32 cmp_value; + u32 address; +}; + +class MultiBranch { +public: + MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) + : gpr{gpr}, branches{std::move(branches)} {} + + u32 gpr{}; + std::vector<CaseBranch> branches{}; +}; + +using BranchData = std::variant<SingleBranch, MultiBranch>; +using BlockBranchInfo = std::shared_ptr<BranchData>; - bool operator!=(const Branch& b) const { - return !operator==(b); - } - }; +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); +struct ShaderBlock { u32 start{}; u32 end{}; bool ignore_branch{}; - Branch branch{}; + BlockBranchInfo branch{}; bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch, branch) == - std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); + return std::tie(start, end, ignore_branch) == + std::tie(sb.start, sb.end, sb.ignore_branch) && + BlockBranchInfoAreEqual(branch, sb.branch); } bool operator!=(const ShaderBlock& sb) const { @@ -76,8 +109,8 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, - const CompilerSettings& settings); +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b1616..21fb9cb83 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace class ASTDecoder { public: @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings); + auto info = ScanFlow(program_code, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -155,7 +155,7 @@ void ShaderIR::Decode() { [[fallthrough]]; case CompileDepth::BruteForce: { coverage_begin = main_offset; - const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); + const std::size_t shader_end = program_code.size(); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); @@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative<SingleBranch>(*block.branch)) { + auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b73f6536e..a33d242e9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::ICMP_IMM: { const Node zero = Immediate(0); - const auto [op_b, test] = [&]() -> std::pair<Node, Node> { + const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), @@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { return {zero, zero}; } }(); - const Node op_a = GetRegister(instr.gpr8); + const Node op_lhs = GetRegister(instr.gpr8); const Node comparison = GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); break; } case OpCode::Id::LOP_C: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 95ec1cdd9..b02d2cb95 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { const auto offset{static_cast<std::size_t>(image.index.Value())}; - if (const auto image = TryUseExistingImage(offset, type)) { - return *image; + if (const auto existing_image = TryUseExistingImage(offset, type)) { + return *existing_image; } const std::size_t next_index{used_images.size()}; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7923d4d69..335d78146 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, false); const u32 count = GetUniformTypeElementsCount(type); + if (!real_address_base || !base_address) { + // Tracking failed, load zeroes. + for (u32 i = 0; i < count; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); + } + break; + } + for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporary(bb, 0, real_address_base); + TrackGlobalMemory(bb, instr, true); + if (!real_address_base || !base_address) { + // Tracking failed, skip the store. + break; + } const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); + const Node value = GetRegister(instr.gpr0.Value() + i); + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } @@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_write) { +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, + Instruction instr, + bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_address != nullptr); + ASSERT_OR_EXECUTE_MSG(base_address != nullptr, + { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46e0f823..116b95f76 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MOV_SYS: { - const Node value = [&]() { + const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index f6ee68a54..d419e9c45 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { + Node op_b = [this, instr] { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..d61e656b7 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -150,7 +150,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values); + WriteTexsInstructionFloat(bb, instr, values, true); break; } case OpCode::Id::TXQ_B: @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector<Node> coords; @@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info) { + const auto offset = static_cast<u32>(sampler.index.Value()); + + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu const std::size_t next_index = used_samplers.size(); const Sampler entry{offset, next_index, type, is_array, is_shadow}; return *used_samplers.emplace(entry).first; -} +} // namespace VideoCommon::Shader -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + type = Tegra::Shader::TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. const auto itr = @@ -344,14 +374,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const } } -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, + bool ignore_mask) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; SetTemporary(bb, dest_elem++, components[component]); } @@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 97fc6f9b1..b047cf870 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [&]() { + const Node op_b = [this, instr] { if (instr.video.use_register_b) { return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, instr.video.signed_b, instr.video.type_b, diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index a8e481b3c..fa8a250cc 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node width = [&] { + Node width = [this, instr] { + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has // been done reversing Nvidia's math. It won't work on all cases due to SHFL having // different parameters that don't properly map to GLSL's interface, but it should work diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index d3dcd00ec..4e8264367 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -17,13 +17,14 @@ using Tegra::Shader::Pred; class ExprAnd; class ExprBoolean; class ExprCondCode; +class ExprGprEqual; class ExprNot; class ExprOr; class ExprPredicate; class ExprVar; -using ExprData = - std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; +using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, + ExprBoolean, ExprGprEqual>; using Expr = std::shared_ptr<ExprData>; class ExprAnd final { @@ -118,6 +119,22 @@ public: bool value; }; +class ExprGprEqual final { +public: + ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + + bool operator==(const ExprGprEqual& b) const { + return gpr == b.gpr && value == b.value; + } + + bool operator!=(const ExprGprEqual& b) const { + return !operator==(b); + } + + u32 gpr; + u32 value; +}; + template <typename T, typename... Args> Expr MakeExpr(Args&&... args) { static_assert(std::is_convertible_v<T, ExprData>); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..447fb5c1d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -410,7 +410,7 @@ public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, meta, std::vector<Node>{}) {} + : OperationNode(code, std::move(meta), std::vector<Node>{}) {} explicit OperationNode(OperationCode code, std::vector<Node> operands) : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index c1f2b88c8..1d9825c76 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -2,8 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> #include <cmath> -#include <unordered_map> #include "common/assert.h" #include "common/common_types.h" @@ -22,10 +23,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings) - : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } @@ -271,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalFLessThan}, - {PredCondition::Equal, OperationCode::LogicalFEqual}, - {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); @@ -306,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalILessThan}, - {PredCondition::Equal, OperationCode::LogicalIEqual}, - {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), @@ -337,36 +343,43 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::Logical2HLessThan}, - {PredCondition::Equal, OperationCode::Logical2HEqual}, - {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, - {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, - {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, - {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, - {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, - {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, - {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, - {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, + std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, + std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); } OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { - {PredOperation::And, OperationCode::LogicalAnd}, - {PredOperation::Or, OperationCode::LogicalOr}, - {PredOperation::Xor, OperationCode::LogicalXor}, + static constexpr std::array operation_table{ + OperationCode::LogicalAnd, + OperationCode::LogicalOr, + OperationCode::LogicalXor, }; - const auto op = PredicateOperationTable.find(operation); - UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); - return op->second; + const auto index = static_cast<std::size_t>(operation); + if (index >= operation_table.size()) { + UNIMPLEMENTED_MSG("Unknown predicate operation."); + return {}; + } + + return operation_table[index]; } Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 105981d67..1fd44bde1 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -17,6 +17,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" namespace VideoCommon::Shader { @@ -66,8 +67,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -172,6 +173,13 @@ public: private: friend class ASTDecoder; + + struct SamplerInfo { + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + }; + void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); @@ -296,12 +304,11 @@ private: /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + std::optional<SamplerInfo> sampler_info); // Accesses a texture sampler for a bindless texture. const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - Tegra::Shader::TextureType type, bool is_array, - bool is_shadow); + std::optional<SamplerInfo> sampler_info); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); @@ -322,7 +329,7 @@ private: const Node4& components); void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool ignore_mask = false); void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); @@ -371,12 +378,15 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( - NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); + std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, + Tegra::Shader::Instruction instr, + bool is_write); const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + bool decompiled{}; bool disable_flow_stack{}; @@ -385,8 +395,7 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; - ASTManager program_manager; - CompilerSettings settings{}; + ASTManager program_manager{true, true}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..9a3c05288 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -212,6 +212,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, break; } break; + case Tegra::Texture::TextureFormat::A4B4G4R4: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R4G4B4A4U; + default: + break; + } + break; case Tegra::Texture::TextureFormat::R8: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: @@ -252,6 +260,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, default: break; } + break; case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -350,6 +359,16 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; case Tegra::Texture::TextureFormat::ASTC_2D_10X8: return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; + case Tegra::Texture::TextureFormat::ASTC_2D_6X6: + return is_srgb ? PixelFormat::ASTC_2D_6X6_SRGB : PixelFormat::ASTC_2D_6X6; + case Tegra::Texture::TextureFormat::ASTC_2D_10X10: + return is_srgb ? PixelFormat::ASTC_2D_10X10_SRGB : PixelFormat::ASTC_2D_10X10; + case Tegra::Texture::TextureFormat::ASTC_2D_12X12: + return is_srgb ? PixelFormat::ASTC_2D_12X12_SRGB : PixelFormat::ASTC_2D_12X12; + case Tegra::Texture::TextureFormat::ASTC_2D_8X6: + return is_srgb ? PixelFormat::ASTC_2D_8X6_SRGB : PixelFormat::ASTC_2D_8X6; + case Tegra::Texture::TextureFormat::ASTC_2D_6X5: + return is_srgb ? PixelFormat::ASTC_2D_6X5_SRGB : PixelFormat::ASTC_2D_6X5; case Tegra::Texture::TextureFormat::R16_G16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -510,6 +529,16 @@ bool IsPixelFormatASTC(PixelFormat format) { case PixelFormat::ASTC_2D_8X5_SRGB: case PixelFormat::ASTC_2D_10X8: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; @@ -530,6 +559,11 @@ bool IsPixelFormatSRGB(PixelFormat format) { case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1e1c432a5..97668f802 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -67,27 +67,38 @@ enum class PixelFormat { DXT23_SRGB = 49, DXT45_SRGB = 50, BC7U_SRGB = 51, - ASTC_2D_4X4_SRGB = 52, - ASTC_2D_8X8_SRGB = 53, - ASTC_2D_8X5_SRGB = 54, - ASTC_2D_5X4_SRGB = 55, - ASTC_2D_5X5 = 56, - ASTC_2D_5X5_SRGB = 57, - ASTC_2D_10X8 = 58, - ASTC_2D_10X8_SRGB = 59, + R4G4B4A4U = 52, + ASTC_2D_4X4_SRGB = 53, + ASTC_2D_8X8_SRGB = 54, + ASTC_2D_8X5_SRGB = 55, + ASTC_2D_5X4_SRGB = 56, + ASTC_2D_5X5 = 57, + ASTC_2D_5X5_SRGB = 58, + ASTC_2D_10X8 = 59, + ASTC_2D_10X8_SRGB = 60, + ASTC_2D_6X6 = 61, + ASTC_2D_6X6_SRGB = 62, + ASTC_2D_10X10 = 63, + ASTC_2D_10X10_SRGB = 64, + ASTC_2D_12X12 = 65, + ASTC_2D_12X12_SRGB = 66, + ASTC_2D_8X6 = 67, + ASTC_2D_8X6_SRGB = 68, + ASTC_2D_6X5 = 69, + ASTC_2D_6X5_SRGB = 70, MaxColorFormat, // Depth formats - Z32F = 60, - Z16 = 61, + Z32F = 71, + Z16 = 72, MaxDepthFormat, // DepthStencil formats - Z24S8 = 62, - S8Z24 = 63, - Z32FS8 = 64, + Z24S8 = 73, + S8Z24 = 74, + Z32FS8 = 75, MaxDepthStencilFormat, @@ -177,6 +188,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // DXT23_SRGB 2, // DXT45_SRGB 2, // BC7U_SRGB + 0, // R4G4B4A4U 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB @@ -185,6 +197,16 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // ASTC_2D_5X5_SRGB 2, // ASTC_2D_10X8 2, // ASTC_2D_10X8_SRGB + 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_SRGB + 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_SRGB + 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_SRGB + 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_SRGB + 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_SRGB 0, // Z32F 0, // Z16 0, // Z24S8 @@ -261,6 +283,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 4, // DXT23_SRGB 4, // DXT45_SRGB 4, // BC7U_SRGB + 1, // R4G4B4A4U 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB @@ -269,6 +292,16 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 5, // ASTC_2D_5X5_SRGB 10, // ASTC_2D_10X8 10, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_SRGB + 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_SRGB 1, // Z32F 1, // Z16 1, // Z24S8 @@ -285,71 +318,82 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 5, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 - 8, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // RGBX16F + 1, // R32UI + 8, // ASTC_2D_8X8 + 5, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 1, // R4G4B4A4U + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 5, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_SRGB + 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -413,6 +457,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // DXT23_SRGB 128, // DXT45_SRGB 128, // BC7U + 16, // R4G4B4A4U 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB @@ -421,6 +466,16 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // ASTC_2D_5X5_SRGB 128, // ASTC_2D_10X8 128, // ASTC_2D_10X8_SRGB + 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_SRGB + 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_SRGB + 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_SRGB + 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_SRGB + 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_SRGB 32, // Z32F 16, // Z16 32, // Z24S8 @@ -504,6 +559,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Compressed, // DXT23_SRGB SurfaceCompression::Compressed, // DXT45_SRGB SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::None, // R4G4B4A4U SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB @@ -512,6 +568,16 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB SurfaceCompression::Converted, // ASTC_2D_10X8 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X6 + SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X10 + SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB + SurfaceCompression::Converted, // ASTC_2D_12X12 + SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X6 + SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X5 + SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB SurfaceCompression::None, // Z32F SurfaceCompression::None, // Z16 SurfaceCompression::None, // Z24S8 diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 683c49207..829268b4c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/algorithm.h" #include "common/assert.h" #include "common/common_types.h" #include "common/microprofile.h" diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5e497e49f..1bed82898 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,12 +4,11 @@ #pragma once -#include <algorithm> +#include <optional> +#include <tuple> #include <unordered_map> #include <vector> -#include "common/assert.h" -#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ca2da8f97..6a92b22d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -62,10 +62,10 @@ public: } } - /*** - * `Guard` guarantees that rendertargets don't unregister themselves if the + /** + * Guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - ***/ + */ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -287,7 +287,7 @@ protected: const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a sepparate buffer. + // and reading it from a separate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { @@ -386,12 +386,13 @@ private: }; /** - * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Takes care of selecting a proper strategy to deal with a texture recycle. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param untopological Indicates to the recycler that the texture has no way + * to match the overlaps due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { @@ -402,7 +403,7 @@ private: if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } - for (auto s : overlaps) { + for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -419,16 +420,19 @@ private: } /** - * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in - *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the - *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the - *new surface from that data. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented + * strategies: Ignore and Flush. + * + * - Ignore: Just unregisters all the overlaps and loads the new texture. + * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters for the new surface. + * @param gpu_addr The starting address of the new surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or left + * blank. + * @param untopological Indicates to the recycler that the texture has no way to match the + * overlaps due to topological reasons. **/ std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, @@ -465,10 +469,12 @@ private: } /** - * `RebuildSurface` this method takes a single surface and recreates into another that - * may differ in format, target or width alingment. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we'll use to recreate the surface. + * Takes a single surface and recreates into another that may differ in + * format, target or width alignment. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we'll use to recreate the surface. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -502,12 +508,14 @@ private: } /** - * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's - * params if it's an exact match, we return the main view of the registered surface. If it's - * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * Takes a single surface and checks with the new surface's params if it's an exact + * match, we return the main view of the registered surface. If its formats don't + * match, we rebuild the surface. We call this last method a `Mirage`. If formats * match but the targets don't, we create an overview View of the registered surface. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we want to check. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we want to check. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -529,13 +537,14 @@ private: } /** - * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface - * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps - * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface - * for them, else we return nothing. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. + * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate + * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps + * of the new surface, if they all match we end up recreating a surface for them, + * else we return nothing. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. **/ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -575,7 +584,7 @@ private: } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } - for (auto surface : overlaps) { + for (const auto& surface : overlaps) { Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); @@ -584,19 +593,27 @@ private: } /** - * `GetSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps. The first is to - * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from - * memory else we move to step 3. Step 3 consists on figuring the relationship between the - * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many - * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to - * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface - * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + * Gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps: + * + * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * + * 2. Check if there are any overlaps at all, if there are none, we just load the texture from + * memory else we move to step 3. + * + * 3. Consists of figuring out the relationship between the candidate texture and the + * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If + * there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we + * have to check if the candidate is a view (layer/mipmap) of the overlap or if the + * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct + * a new surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { @@ -651,7 +668,7 @@ private: // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails - // inmediatly recycle the texture + // immediately recycle the texture for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -720,12 +737,13 @@ private: } /** - * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache that's similar to it. If there are many textures + * Gets the starting address and parameters of a candidate surface and tries to find a + * matching surface within the cache that's similar to it. If there are many textures * or the texture found if entirely incompatible, it will fail. If no texture is found, the * blit will be unsuccessful. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; @@ -777,11 +795,14 @@ private: } /** - * `DeduceBestBlit` gets the a source and destination starting address and parameters, + * Gets the a source and destination starting address and parameters, * and tries to deduce if they are supposed to be depth textures. If so, their * parameters are modified and fixed into so. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the parameters on the candidate surface. + * + * @param src_params The parameters of the candidate surface. + * @param dst_params The parameters of the destination surface. + * @param src_gpu_addr The starting address of the candidate surface. + * @param dst_gpu_addr The starting address of the destination surface. **/ void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index a9b8f69af..58b608a36 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { TexelWeightParams params; // Read the entire block mode all at once - uint16_t modeBits = strm.ReadBits(11); + uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); // Does this match the void extent block mode? if ((modeBits & 0x01FF) == 0x1FC) { @@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint } // Decode the RGBA components and renormalize them to the range [0, 255] - uint16_t r = strm.ReadBits(16); - uint16_t g = strm.ReadBits(16); - uint16_t b = strm.ReadBits(16); - uint16_t a = strm.ReadBits(16); + uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | (static_cast<uint32_t>(a) & 0xFF00) << 16; @@ -681,9 +681,10 @@ protected: public: Pixel() = default; - Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) + Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, - color{a, r, g, b} {} + color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), + static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} // Changes the depth of each pixel. This scales the values to // the appropriate bit depth by either truncating the least |