diff options
28 files changed, 877 insertions, 210 deletions
diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 6fe844ff0..8b9b65214 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -308,7 +308,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever } Common::FixedPoint<50, 14> pre_delay_sample{ - state.pre_delay_line.Read() * Common::FixedPoint<50, 14>::from_base(params.late_gain)}; + state.pre_delay_line.TapOut(state.pre_delay_time) * + Common::FixedPoint<50, 14>::from_base(params.late_gain)}; std::array<Common::FixedPoint<50, 14>, ReverbInfo::MaxDelayLines> mix_matrix{ state.prev_feedback_output[2] + state.prev_feedback_output[1] + pre_delay_sample, diff --git a/src/audio_core/renderer/effect/i3dl2.h b/src/audio_core/renderer/effect/i3dl2.h index 1ebbc5c4c..6e3ffd1d4 100644 --- a/src/audio_core/renderer/effect/i3dl2.h +++ b/src/audio_core/renderer/effect/i3dl2.h @@ -104,7 +104,8 @@ public: } void Write(const Common::FixedPoint<50, 14> sample) { - *(input++) = sample; + *input = sample; + input++; if (input >= buffer_end) { input = buffer.data(); } diff --git a/src/audio_core/renderer/effect/reverb.h b/src/audio_core/renderer/effect/reverb.h index a72475c3c..6cc345ef6 100644 --- a/src/audio_core/renderer/effect/reverb.h +++ b/src/audio_core/renderer/effect/reverb.h @@ -79,12 +79,10 @@ public: return; } sample_count = delay_time; - input = &buffer[(output - buffer.data() + sample_count) % (sample_count_max + 1)]; + input = &buffer[0]; } Common::FixedPoint<50, 14> Tick(const Common::FixedPoint<50, 14> sample) { - Write(sample); - auto out_sample{Read()}; output++; @@ -92,6 +90,7 @@ public: output = buffer.data(); } + Write(sample); return out_sample; } @@ -100,7 +99,8 @@ public: } void Write(const Common::FixedPoint<50, 14> sample) { - *(input++) = sample; + *input = sample; + input++; if (input >= buffer_end) { input = buffer.data(); } diff --git a/src/common/settings.h b/src/common/settings.h index 512ecff69..1ae28ce93 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -128,7 +128,7 @@ public: /** * Sets a default value, label, and setting value. * - * @param default_val Intial value of the setting, and default value of the setting + * @param default_val Initial value of the setting, and default value of the setting * @param name Label for the setting */ explicit Setting(const Type& default_val, const std::string& name) @@ -139,7 +139,7 @@ public: /** * Sets a default value, minimum value, maximum value, and label. * - * @param default_val Intial value of the setting, and default value of the setting + * @param default_val Initial value of the setting, and default value of the setting * @param min_val Sets the minimum allowed value of the setting * @param max_val Sets the maximum allowed value of the setting * @param name Label for the setting @@ -231,7 +231,7 @@ public: /** * Sets a default value, label, and setting value. * - * @param default_val Intial value of the setting, and default value of the setting + * @param default_val Initial value of the setting, and default value of the setting * @param name Label for the setting */ explicit SwitchableSetting(const Type& default_val, const std::string& name) @@ -242,7 +242,7 @@ public: /** * Sets a default value, minimum value, maximum value, and label. * - * @param default_val Intial value of the setting, and default value of the setting + * @param default_val Initial value of the setting, and default value of the setting * @param min_val Sets the minimum allowed value of the setting * @param max_val Sets the maximum allowed value of the setting * @param name Label for the setting diff --git a/src/core/constants.cpp b/src/core/constants.cpp index 4430173ef..760dc5f23 100644 --- a/src/core/constants.cpp +++ b/src/core/constants.cpp @@ -4,13 +4,24 @@ #include "core/constants.h" namespace Core::Constants { -const std::array<u8, 107> ACCOUNT_BACKUP_JPEG{{ - 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, - 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, - 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, - 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, - 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xc9, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, - 0x01, 0x01, 0x11, 0x00, 0xff, 0xcc, 0x00, 0x06, 0x00, 0x10, 0x10, 0x05, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, +const std::array<u8, 287> ACCOUNT_BACKUP_JPEG{{ + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x48, + 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x06, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, + 0x05, 0x05, 0x06, 0x09, 0x06, 0x05, 0x06, 0x09, 0x0b, 0x08, 0x06, 0x06, 0x08, 0x0b, 0x0c, 0x0a, + 0x0a, 0x0b, 0x0a, 0x0a, 0x0c, 0x10, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x10, 0x0c, 0x0e, 0x0f, + 0x10, 0x0f, 0x0e, 0x0c, 0x13, 0x13, 0x14, 0x14, 0x13, 0x13, 0x1c, 0x1b, 0x1b, 0x1b, 0x1c, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x07, 0x07, + 0x07, 0x0d, 0x0c, 0x0d, 0x18, 0x10, 0x10, 0x18, 0x1a, 0x15, 0x11, 0x15, 0x1a, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xff, 0xc0, + 0x00, 0x11, 0x08, 0x00, 0x20, 0x00, 0x20, 0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, + 0x01, 0xff, 0xc4, 0x00, 0x14, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xc4, 0x00, 0x14, 0x10, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xc4, 0x00, + 0x14, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0xc4, 0x00, 0x14, 0x11, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, + 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xd9, }}; } diff --git a/src/core/constants.h b/src/core/constants.h index f916ce0b6..f1f67d3b8 100644 --- a/src/core/constants.h +++ b/src/core/constants.h @@ -12,6 +12,6 @@ namespace Core::Constants { // ACC Service - Blank JPEG used as user icon in absentia of real one. -extern const std::array<u8, 107> ACCOUNT_BACKUP_JPEG; +extern const std::array<u8, 287> ACCOUNT_BACKUP_JPEG; } // namespace Core::Constants diff --git a/src/core/hle/kernel/k_scoped_lock.h b/src/core/hle/kernel/k_scoped_lock.h index 59b3e32ae..a15640fd2 100644 --- a/src/core/hle/kernel/k_scoped_lock.h +++ b/src/core/hle/kernel/k_scoped_lock.h @@ -4,6 +4,7 @@ #pragma once #include <concepts> +#include <memory> #include <type_traits> namespace Kernel { diff --git a/src/core/hle/service/psc/psc.cpp b/src/core/hle/service/psc/psc.cpp index 25702703e..cd0cc9287 100644 --- a/src/core/hle/service/psc/psc.cpp +++ b/src/core/hle/service/psc/psc.cpp @@ -11,9 +11,9 @@ namespace Service::PSC { -class PSC_C final : public ServiceFramework<PSC_C> { +class IPmControl final : public ServiceFramework<IPmControl> { public: - explicit PSC_C(Core::System& system_) : ServiceFramework{system_, "psc:c"} { + explicit IPmControl(Core::System& system_) : ServiceFramework{system_, "psc:c"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "Initialize"}, @@ -23,8 +23,8 @@ public: {4, nullptr, "Cancel"}, {5, nullptr, "PrintModuleInformation"}, {6, nullptr, "GetModuleInformation"}, - {10, nullptr, "Unknown10"}, - {11, nullptr, "Unknown11"}, + {10, nullptr, "AcquireStateLock"}, + {11, nullptr, "HasStateLock"}, }; // clang-format on @@ -49,12 +49,12 @@ public: } }; -class PSC_M final : public ServiceFramework<PSC_M> { +class IPmService final : public ServiceFramework<IPmService> { public: - explicit PSC_M(Core::System& system_) : ServiceFramework{system_, "psc:m"} { + explicit IPmService(Core::System& system_) : ServiceFramework{system_, "psc:m"} { // clang-format off static const FunctionInfo functions[] = { - {0, &PSC_M::GetPmModule, "GetPmModule"}, + {0, &IPmService::GetPmModule, "GetPmModule"}, }; // clang-format on @@ -74,8 +74,8 @@ private: void LoopProcess(Core::System& system) { auto server_manager = std::make_unique<ServerManager>(system); - server_manager->RegisterNamedService("psc:c", std::make_shared<PSC_C>(system)); - server_manager->RegisterNamedService("psc:m", std::make_shared<PSC_M>(system)); + server_manager->RegisterNamedService("psc:c", std::make_shared<IPmControl>(system)); + server_manager->RegisterNamedService("psc:m", std::make_shared<IPmService>(system)); ServerManager::RunServer(std::move(server_manager)); } diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp index b19bc1b3e..2b99dd7ac 100644 --- a/src/core/hle/service/ssl/ssl.cpp +++ b/src/core/hle/service/ssl/ssl.cpp @@ -8,14 +8,36 @@ namespace Service::SSL { +// This is nn::ssl::sf::CertificateFormat enum class CertificateFormat : u32 { Pem = 1, Der = 2, }; +// This is nn::ssl::sf::ContextOption +enum class ContextOption : u32 { + None = 0, + CrlImportDateCheckEnable = 1, +}; + +// This is nn::ssl::sf::SslVersion +struct SslVersion { + union { + u32 raw{}; + + BitField<0, 1, u32> tls_auto; + BitField<3, 1, u32> tls_v10; + BitField<4, 1, u32> tls_v11; + BitField<5, 1, u32> tls_v12; + BitField<6, 1, u32> tls_v13; + BitField<24, 7, u32> api_version; + }; +}; + class ISslConnection final : public ServiceFramework<ISslConnection> { public: - explicit ISslConnection(Core::System& system_) : ServiceFramework{system_, "ISslConnection"} { + explicit ISslConnection(Core::System& system_, SslVersion version) + : ServiceFramework{system_, "ISslConnection"}, ssl_version{version} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "SetSocketDescriptor"}, @@ -59,11 +81,15 @@ public: RegisterHandlers(functions); } + +private: + SslVersion ssl_version; }; class ISslContext final : public ServiceFramework<ISslContext> { public: - explicit ISslContext(Core::System& system_) : ServiceFramework{system_, "ISslContext"} { + explicit ISslContext(Core::System& system_, SslVersion version) + : ServiceFramework{system_, "ISslContext"}, ssl_version{version} { static const FunctionInfo functions[] = { {0, &ISslContext::SetOption, "SetOption"}, {1, nullptr, "GetOption"}, @@ -84,17 +110,20 @@ public: } private: + SslVersion ssl_version; + void SetOption(HLERequestContext& ctx) { struct Parameters { - u8 enable; - u32 option; + ContextOption option; + s32 value; }; + static_assert(sizeof(Parameters) == 0x8, "Parameters is an invalid size"); IPC::RequestParser rp{ctx}; const auto parameters = rp.PopRaw<Parameters>(); - LOG_WARNING(Service_SSL, "(STUBBED) called. enable={}, option={}", parameters.enable, - parameters.option); + LOG_WARNING(Service_SSL, "(STUBBED) called. option={}, value={}", parameters.option, + parameters.value); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -105,7 +134,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); - rb.PushIpcInterface<ISslConnection>(system); + rb.PushIpcInterface<ISslConnection>(system, ssl_version); } void ImportServerPki(HLERequestContext& ctx) { @@ -142,20 +171,21 @@ private: } }; -class SSL final : public ServiceFramework<SSL> { +class ISslService final : public ServiceFramework<ISslService> { public: - explicit SSL(Core::System& system_) : ServiceFramework{system_, "ssl"} { + explicit ISslService(Core::System& system_) : ServiceFramework{system_, "ssl"} { // clang-format off static const FunctionInfo functions[] = { - {0, &SSL::CreateContext, "CreateContext"}, + {0, &ISslService::CreateContext, "CreateContext"}, {1, nullptr, "GetContextCount"}, {2, nullptr, "GetCertificates"}, {3, nullptr, "GetCertificateBufSize"}, {4, nullptr, "DebugIoctl"}, - {5, &SSL::SetInterfaceVersion, "SetInterfaceVersion"}, + {5, &ISslService::SetInterfaceVersion, "SetInterfaceVersion"}, {6, nullptr, "FlushSessionCache"}, {7, nullptr, "SetDebugOption"}, {8, nullptr, "GetDebugOption"}, + {8, nullptr, "ClearTls12FallbackFlag"}, }; // clang-format on @@ -163,20 +193,30 @@ public: } private: - u32 ssl_version{}; void CreateContext(HLERequestContext& ctx) { - LOG_WARNING(Service_SSL, "(STUBBED) called"); + struct Parameters { + SslVersion ssl_version; + INSERT_PADDING_BYTES(0x4); + u64 pid_placeholder; + }; + static_assert(sizeof(Parameters) == 0x10, "Parameters is an invalid size"); + + IPC::RequestParser rp{ctx}; + const auto parameters = rp.PopRaw<Parameters>(); + + LOG_WARNING(Service_SSL, "(STUBBED) called, api_version={}, pid_placeholder={}", + parameters.ssl_version.api_version, parameters.pid_placeholder); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); - rb.PushIpcInterface<ISslContext>(system); + rb.PushIpcInterface<ISslContext>(system, parameters.ssl_version); } void SetInterfaceVersion(HLERequestContext& ctx) { - LOG_DEBUG(Service_SSL, "called"); - IPC::RequestParser rp{ctx}; - ssl_version = rp.Pop<u32>(); + u32 ssl_version = rp.Pop<u32>(); + + LOG_DEBUG(Service_SSL, "called, ssl_version={}", ssl_version); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -186,7 +226,7 @@ private: void LoopProcess(Core::System& system) { auto server_manager = std::make_unique<ServerManager>(system); - server_manager->RegisterNamedService("ssl", std::make_shared<SSL>(system)); + server_manager->RegisterNamedService("ssl", std::make_shared<ISslService>(system)); ServerManager::RunServer(std::move(server_manager)); } diff --git a/src/core/hle/service/usb/usb.cpp b/src/core/hle/service/usb/usb.cpp index a2855e783..f29fff1dd 100644 --- a/src/core/hle/service/usb/usb.cpp +++ b/src/core/hle/service/usb/usb.cpp @@ -16,19 +16,19 @@ public: explicit IDsInterface(Core::System& system_) : ServiceFramework{system_, "IDsInterface"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "BindDevice"}, - {1, nullptr, "BindClientProcess"}, - {2, nullptr, "AddInterface"}, - {3, nullptr, "GetStateChangeEvent"}, - {4, nullptr, "GetState"}, - {5, nullptr, "ClearDeviceData"}, - {6, nullptr, "AddUsbStringDescriptor"}, - {7, nullptr, "DeleteUsbStringDescriptor"}, - {8, nullptr, "SetUsbDeviceDescriptor"}, - {9, nullptr, "SetBinaryObjectStore"}, - {10, nullptr, "Enable"}, - {11, nullptr, "Disable"}, - {12, nullptr, "Unknown12"}, + {0, nullptr, "AddEndpoint"}, + {1, nullptr, "GetSetupEvent"}, + {2, nullptr, "GetSetupPacket"}, + {3, nullptr, "Enable"}, + {4, nullptr, "Disable"}, + {5, nullptr, "CtrlIn"}, + {6, nullptr, "CtrlOut"}, + {7, nullptr, "GetCtrlInCompletionEvent"}, + {8, nullptr, "GetCtrlInUrbReport"}, + {9, nullptr, "GetCtrlOutCompletionEvent"}, + {10, nullptr, "GetCtrlOutUrbReport"}, + {11, nullptr, "CtrlStall"}, + {12, nullptr, "AppendConfigurationData"}, }; // clang-format on @@ -36,9 +36,9 @@ public: } }; -class USB_DS final : public ServiceFramework<USB_DS> { +class IDsRootSession final : public ServiceFramework<IDsRootSession> { public: - explicit USB_DS(Core::System& system_) : ServiceFramework{system_, "usb:ds"} { + explicit IDsRootSession(Core::System& system_) : ServiceFramework{system_, "usb:ds"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "OpenDsService"}, @@ -94,9 +94,9 @@ public: } }; -class USB_HS final : public ServiceFramework<USB_HS> { +class IClientRootSession final : public ServiceFramework<IClientRootSession> { public: - explicit USB_HS(Core::System& system_) : ServiceFramework{system_, "usb:hs"} { + explicit IClientRootSession(Core::System& system_) : ServiceFramework{system_, "usb:hs"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "BindClientProcess"}, @@ -107,7 +107,7 @@ public: {5, nullptr, "DestroyInterfaceAvailableEvent"}, {6, nullptr, "GetInterfaceStateChangeEvent"}, {7, nullptr, "AcquireUsbIf"}, - {8, nullptr, "ResetDevice"}, + {8, nullptr, "SetTestMode"}, }; // clang-format on @@ -134,12 +134,12 @@ public: } }; -class USB_PD final : public ServiceFramework<USB_PD> { +class IPdManager final : public ServiceFramework<IPdManager> { public: - explicit USB_PD(Core::System& system_) : ServiceFramework{system_, "usb:pd"} { + explicit IPdManager(Core::System& system_) : ServiceFramework{system_, "usb:pd"} { // clang-format off static const FunctionInfo functions[] = { - {0, &USB_PD::GetPdSession, "GetPdSession"}, + {0, &IPdManager::OpenSession, "OpenSession"}, }; // clang-format on @@ -147,7 +147,7 @@ public: } private: - void GetPdSession(HLERequestContext& ctx) { + void OpenSession(HLERequestContext& ctx) { LOG_DEBUG(Service_USB, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; @@ -178,12 +178,12 @@ public: } }; -class USB_PD_C final : public ServiceFramework<USB_PD_C> { +class IPdCradleManager final : public ServiceFramework<IPdCradleManager> { public: - explicit USB_PD_C(Core::System& system_) : ServiceFramework{system_, "usb:pd:c"} { + explicit IPdCradleManager(Core::System& system_) : ServiceFramework{system_, "usb:pd:c"} { // clang-format off static const FunctionInfo functions[] = { - {0, &USB_PD_C::GetPdCradleSession, "GetPdCradleSession"}, + {0, &IPdCradleManager::OpenCradleSession, "OpenCradleSession"}, }; // clang-format on @@ -191,18 +191,18 @@ public: } private: - void GetPdCradleSession(HLERequestContext& ctx) { + void OpenCradleSession(HLERequestContext& ctx) { + LOG_DEBUG(Service_USB, "called"); + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); rb.PushIpcInterface<IPdCradleSession>(system); - - LOG_DEBUG(Service_USB, "called"); } }; -class USB_PM final : public ServiceFramework<USB_PM> { +class IPmMainService final : public ServiceFramework<IPmMainService> { public: - explicit USB_PM(Core::System& system_) : ServiceFramework{system_, "usb:pm"} { + explicit IPmMainService(Core::System& system_) : ServiceFramework{system_, "usb:pm"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetPowerEvent"}, @@ -221,11 +221,11 @@ public: void LoopProcess(Core::System& system) { auto server_manager = std::make_unique<ServerManager>(system); - server_manager->RegisterNamedService("usb:ds", std::make_shared<USB_DS>(system)); - server_manager->RegisterNamedService("usb:hs", std::make_shared<USB_HS>(system)); - server_manager->RegisterNamedService("usb:pd", std::make_shared<USB_PD>(system)); - server_manager->RegisterNamedService("usb:pd:c", std::make_shared<USB_PD_C>(system)); - server_manager->RegisterNamedService("usb:pm", std::make_shared<USB_PM>(system)); + server_manager->RegisterNamedService("usb:ds", std::make_shared<IDsRootSession>(system)); + server_manager->RegisterNamedService("usb:hs", std::make_shared<IClientRootSession>(system)); + server_manager->RegisterNamedService("usb:pd", std::make_shared<IPdManager>(system)); + server_manager->RegisterNamedService("usb:pd:c", std::make_shared<IPdCradleManager>(system)); + server_manager->RegisterNamedService("usb:pm", std::make_shared<IPmMainService>(system)); ServerManager::RunServer(std::move(server_manager)); } diff --git a/src/input_common/helpers/joycon_protocol/joycon_types.h b/src/input_common/helpers/joycon_protocol/joycon_types.h index b91934990..2e50a99a8 100644 --- a/src/input_common/helpers/joycon_protocol/joycon_types.h +++ b/src/input_common/helpers/joycon_protocol/joycon_types.h @@ -95,6 +95,18 @@ enum class PasivePadButton : u32 { ZL_ZR = 0x8000, }; +enum class PasivePadStick : u8 { + Right = 0x00, + RightDown = 0x01, + Down = 0x02, + DownLeft = 0x03, + Left = 0x04, + LeftUp = 0x05, + Up = 0x06, + UpRight = 0x07, + Neutral = 0x08, +}; + enum class OutputReport : u8 { RUMBLE_AND_SUBCMD = 0x01, FW_UPDATE_PKT = 0x03, diff --git a/src/input_common/helpers/joycon_protocol/poller.cpp b/src/input_common/helpers/joycon_protocol/poller.cpp index 9bb15e935..ab48352b8 100644 --- a/src/input_common/helpers/joycon_protocol/poller.cpp +++ b/src/input_common/helpers/joycon_protocol/poller.cpp @@ -12,7 +12,7 @@ JoyconPoller::JoyconPoller(ControllerType device_type_, JoyStickCalibration left : device_type{device_type_}, left_stick_calibration{left_stick_calibration_}, right_stick_calibration{right_stick_calibration_}, motion_calibration{motion_calibration_} {} -void JoyconPoller::SetCallbacks(const Joycon::JoyconCallbacks& callbacks_) { +void JoyconPoller::SetCallbacks(const JoyconCallbacks& callbacks_) { callbacks = std::move(callbacks_); } @@ -22,13 +22,13 @@ void JoyconPoller::ReadActiveMode(std::span<u8> buffer, const MotionStatus& moti memcpy(&data, buffer.data(), sizeof(InputReportActive)); switch (device_type) { - case Joycon::ControllerType::Left: + case ControllerType::Left: UpdateActiveLeftPadInput(data, motion_status); break; - case Joycon::ControllerType::Right: + case ControllerType::Right: UpdateActiveRightPadInput(data, motion_status); break; - case Joycon::ControllerType::Pro: + case ControllerType::Pro: UpdateActiveProPadInput(data, motion_status); break; default: @@ -47,13 +47,13 @@ void JoyconPoller::ReadPassiveMode(std::span<u8> buffer) { memcpy(&data, buffer.data(), sizeof(InputReportPassive)); switch (device_type) { - case Joycon::ControllerType::Left: + case ControllerType::Left: UpdatePasiveLeftPadInput(data); break; - case Joycon::ControllerType::Right: + case ControllerType::Right: UpdatePasiveRightPadInput(data); break; - case Joycon::ControllerType::Pro: + case ControllerType::Pro: UpdatePasiveProPadInput(data); break; default: @@ -211,13 +211,11 @@ void JoyconPoller::UpdateActiveProPadInput(const InputReportActive& input, } void JoyconPoller::UpdatePasiveLeftPadInput(const InputReportPassive& input) { - static constexpr std::array<Joycon::PasivePadButton, 11> left_buttons{ - Joycon::PasivePadButton::Down_A, Joycon::PasivePadButton::Right_X, - Joycon::PasivePadButton::Left_B, Joycon::PasivePadButton::Up_Y, - Joycon::PasivePadButton::SL, Joycon::PasivePadButton::SR, - Joycon::PasivePadButton::L_R, Joycon::PasivePadButton::ZL_ZR, - Joycon::PasivePadButton::Minus, Joycon::PasivePadButton::Capture, - Joycon::PasivePadButton::StickL, + static constexpr std::array<PasivePadButton, 11> left_buttons{ + PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B, + PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR, + PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Minus, + PasivePadButton::Capture, PasivePadButton::StickL, }; for (auto left_button : left_buttons) { @@ -225,16 +223,19 @@ void JoyconPoller::UpdatePasiveLeftPadInput(const InputReportPassive& input) { const int button = static_cast<int>(left_button); callbacks.on_button_data(button, button_status); } + + const auto [left_axis_x, left_axis_y] = + GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state)); + callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickX), left_axis_x); + callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickY), left_axis_y); } void JoyconPoller::UpdatePasiveRightPadInput(const InputReportPassive& input) { - static constexpr std::array<Joycon::PasivePadButton, 11> right_buttons{ - Joycon::PasivePadButton::Down_A, Joycon::PasivePadButton::Right_X, - Joycon::PasivePadButton::Left_B, Joycon::PasivePadButton::Up_Y, - Joycon::PasivePadButton::SL, Joycon::PasivePadButton::SR, - Joycon::PasivePadButton::L_R, Joycon::PasivePadButton::ZL_ZR, - Joycon::PasivePadButton::Plus, Joycon::PasivePadButton::Home, - Joycon::PasivePadButton::StickR, + static constexpr std::array<PasivePadButton, 11> right_buttons{ + PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B, + PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR, + PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Plus, + PasivePadButton::Home, PasivePadButton::StickR, }; for (auto right_button : right_buttons) { @@ -242,17 +243,20 @@ void JoyconPoller::UpdatePasiveRightPadInput(const InputReportPassive& input) { const int button = static_cast<int>(right_button); callbacks.on_button_data(button, button_status); } + + const auto [right_axis_x, right_axis_y] = + GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state)); + callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickX), right_axis_x); + callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickY), right_axis_y); } void JoyconPoller::UpdatePasiveProPadInput(const InputReportPassive& input) { - static constexpr std::array<Joycon::PasivePadButton, 14> pro_buttons{ - Joycon::PasivePadButton::Down_A, Joycon::PasivePadButton::Right_X, - Joycon::PasivePadButton::Left_B, Joycon::PasivePadButton::Up_Y, - Joycon::PasivePadButton::SL, Joycon::PasivePadButton::SR, - Joycon::PasivePadButton::L_R, Joycon::PasivePadButton::ZL_ZR, - Joycon::PasivePadButton::Minus, Joycon::PasivePadButton::Plus, - Joycon::PasivePadButton::Capture, Joycon::PasivePadButton::Home, - Joycon::PasivePadButton::StickL, Joycon::PasivePadButton::StickR, + static constexpr std::array<PasivePadButton, 14> pro_buttons{ + PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B, + PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR, + PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Minus, + PasivePadButton::Plus, PasivePadButton::Capture, PasivePadButton::Home, + PasivePadButton::StickL, PasivePadButton::StickR, }; for (auto pro_button : pro_buttons) { @@ -260,6 +264,15 @@ void JoyconPoller::UpdatePasiveProPadInput(const InputReportPassive& input) { const int button = static_cast<int>(pro_button); callbacks.on_button_data(button, button_status); } + + const auto [left_axis_x, left_axis_y] = + GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state && 0xf)); + const auto [right_axis_x, right_axis_y] = + GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state >> 4)); + callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickX), left_axis_x); + callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickY), left_axis_y); + callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickX), right_axis_x); + callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickY), right_axis_y); } f32 JoyconPoller::GetAxisValue(u16 raw_value, Joycon::JoyStickAxisCalibration calibration) const { @@ -270,6 +283,30 @@ f32 JoyconPoller::GetAxisValue(u16 raw_value, Joycon::JoyStickAxisCalibration ca return value / calibration.min; } +std::pair<f32, f32> JoyconPoller::GetPassiveAxisValue(PasivePadStick raw_value) const { + switch (raw_value) { + case PasivePadStick::Right: + return {1.0f, 0.0f}; + case PasivePadStick::RightDown: + return {1.0f, -1.0f}; + case PasivePadStick::Down: + return {0.0f, -1.0f}; + case PasivePadStick::DownLeft: + return {-1.0f, -1.0f}; + case PasivePadStick::Left: + return {-1.0f, 0.0f}; + case PasivePadStick::LeftUp: + return {-1.0f, 1.0f}; + case PasivePadStick::Up: + return {0.0f, 1.0f}; + case PasivePadStick::UpRight: + return {1.0f, 1.0f}; + case PasivePadStick::Neutral: + default: + return {0.0f, 0.0f}; + } +} + f32 JoyconPoller::GetAccelerometerValue(s16 raw, const MotionSensorCalibration& cal, AccelerometerSensitivity sensitivity) const { const f32 value = raw * (1.0f / (cal.scale - cal.offset)) * 4; diff --git a/src/input_common/helpers/joycon_protocol/poller.h b/src/input_common/helpers/joycon_protocol/poller.h index 354d41dad..5c897f070 100644 --- a/src/input_common/helpers/joycon_protocol/poller.h +++ b/src/input_common/helpers/joycon_protocol/poller.h @@ -22,7 +22,7 @@ public: JoyStickCalibration right_stick_calibration_, MotionCalibration motion_calibration_); - void SetCallbacks(const Joycon::JoyconCallbacks& callbacks_); + void SetCallbacks(const JoyconCallbacks& callbacks_); /// Handles data from passive packages void ReadPassiveMode(std::span<u8> buffer); @@ -51,7 +51,10 @@ private: void UpdatePasiveProPadInput(const InputReportPassive& buffer); /// Returns a calibrated joystick axis from raw axis data - f32 GetAxisValue(u16 raw_value, Joycon::JoyStickAxisCalibration calibration) const; + f32 GetAxisValue(u16 raw_value, JoyStickAxisCalibration calibration) const; + + /// Returns a digital joystick axis from passive axis data + std::pair<f32, f32> GetPassiveAxisValue(PasivePadStick raw_value) const; /// Returns a calibrated accelerometer axis from raw motion data f32 GetAccelerometerValue(s16 raw, const MotionSensorCalibration& cal, @@ -75,7 +78,7 @@ private: JoyStickCalibration right_stick_calibration{}; MotionCalibration motion_calibration{}; - Joycon::JoyconCallbacks callbacks{}; + JoyconCallbacks callbacks{}; }; } // namespace InputCommon::Joycon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 06fd40851..2a150ccdc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -55,6 +55,19 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; +enum class ObtainBufferSynchronize : u32 { + NoSynchronize = 0, + FullSynchronize = 1, + SynchronizeNoDirty = 2, +}; + +enum class ObtainBufferOperation : u32 { + DoNothing = 0, + MarkAsWritten = 1, + DiscardWrite = 2, + MarkQuery = 3, +}; + using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; @@ -191,6 +204,10 @@ public: bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); + [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, + ObtainBufferSynchronize sync_info, + ObtainBufferOperation post_op); + /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); @@ -642,6 +659,42 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { } template <class P> +std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, + ObtainBufferSynchronize sync_info, + ObtainBufferOperation post_op) { + const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return {&slot_buffers[NULL_BUFFER_ID], 0}; + } + const BufferId buffer_id = FindBuffer(*cpu_addr, size); + Buffer& buffer = slot_buffers[buffer_id]; + + // synchronize op + switch (sync_info) { + case ObtainBufferSynchronize::FullSynchronize: + SynchronizeBuffer(buffer, *cpu_addr, size); + break; + default: + break; + } + + switch (post_op) { + case ObtainBufferOperation::MarkAsWritten: + MarkWrittenBuffer(buffer_id, *cpu_addr, size); + break; + case ObtainBufferOperation::DiscardWrite: { + IntervalType interval{*cpu_addr, size}; + ClearDownload(interval); + break; + } + default: + break; + } + + return {&buffer, buffer.Offset(*cpu_addr)}; +} + +template <class P> void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) { const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 7762c7d96..e68850dc5 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -14,7 +14,13 @@ #include "video_core/textures/decoders.h" MICROPROFILE_DECLARE(GPU_DMAEngine); +MICROPROFILE_DECLARE(GPU_DMAEngineBL); +MICROPROFILE_DECLARE(GPU_DMAEngineLB); +MICROPROFILE_DECLARE(GPU_DMAEngineBB); MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); +MICROPROFILE_DEFINE(GPU_DMAEngineBL, "GPU", "DMA Engine Block - Linear", MP_RGB(224, 224, 128)); +MICROPROFILE_DEFINE(GPU_DMAEngineLB, "GPU", "DMA Engine Linear - Block", MP_RGB(224, 224, 128)); +MICROPROFILE_DEFINE(GPU_DMAEngineBB, "GPU", "DMA Engine Block - Block", MP_RGB(224, 224, 128)); namespace Tegra::Engines { @@ -72,6 +78,7 @@ void MaxwellDMA::Launch() { memory_manager.FlushCaching(); if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. + MICROPROFILE_SCOPE(GPU_DMAEngineBB); CopyBlockLinearToBlockLinear(); ReleaseSemaphore(); return; @@ -87,8 +94,10 @@ void MaxwellDMA::Launch() { } } else { if (!is_src_pitch && is_dst_pitch) { + MICROPROFILE_SCOPE(GPU_DMAEngineBL); CopyBlockLinearToPitch(); } else { + MICROPROFILE_SCOPE(GPU_DMAEngineLB); CopyPitchToBlockLinear(); } } @@ -153,21 +162,35 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyBlockLinearToPitch() { - UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); - UNIMPLEMENTED_IF(regs.src_params.layer != 0); - - const bool is_remapping = regs.launch_dma.remap_enable != 0; - - // Optimized path for micro copies. - const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; - if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && - regs.src_params.height > GOB_SIZE_Y) { - FastCopyBlockLinearToPitch(); + UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); + + u32 bytes_per_pixel = 1; + DMA::ImageOperand src_operand; + src_operand.bytes_per_pixel = bytes_per_pixel; + src_operand.params = regs.src_params; + src_operand.address = regs.offset_in; + + DMA::BufferOperand dst_operand; + dst_operand.pitch = regs.pitch_out; + dst_operand.width = regs.line_length_in; + dst_operand.height = regs.line_count; + dst_operand.address = regs.offset_out; + DMA::ImageCopy copy_info{}; + copy_info.length_x = regs.line_length_in; + copy_info.length_y = regs.line_count; + auto& accelerate = rasterizer->AccessAccelerateDMA(); + if (accelerate.ImageToBuffer(copy_info, src_operand, dst_operand)) { return; } + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); + UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); + UNIMPLEMENTED_IF(regs.src_params.block_size.depth == 0 && regs.src_params.depth != 1); + // Deswizzle the input and copy it over. - const Parameters& src_params = regs.src_params; + const DMA::Parameters& src_params = regs.src_params; + + const bool is_remapping = regs.launch_dma.remap_enable != 0; const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; @@ -187,7 +210,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { x_offset >>= bpp_shift; } - const u32 bytes_per_pixel = base_bpp << bpp_shift; + bytes_per_pixel = base_bpp << bpp_shift; const u32 height = src_params.height; const u32 depth = src_params.depth; const u32 block_height = src_params.block_size.height; @@ -195,11 +218,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); + const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); - memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(dst_operand.address, write_buffer.data(), dst_size); UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, @@ -216,6 +240,24 @@ void MaxwellDMA::CopyPitchToBlockLinear() { const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; + u32 bytes_per_pixel = 1; + DMA::ImageOperand dst_operand; + dst_operand.bytes_per_pixel = bytes_per_pixel; + dst_operand.params = regs.dst_params; + dst_operand.address = regs.offset_out; + DMA::BufferOperand src_operand; + src_operand.pitch = regs.pitch_in; + src_operand.width = regs.line_length_in; + src_operand.height = regs.line_count; + src_operand.address = regs.offset_in; + DMA::ImageCopy copy_info{}; + copy_info.length_x = regs.line_length_in; + copy_info.length_y = regs.line_count; + auto& accelerate = rasterizer->AccessAccelerateDMA(); + if (accelerate.BufferToImage(copy_info, src_operand, dst_operand)) { + return; + } + const auto& dst_params = regs.dst_params; const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; @@ -233,7 +275,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { x_offset >>= bpp_shift; } - const u32 bytes_per_pixel = base_bpp << bpp_shift; + bytes_per_pixel = base_bpp << bpp_shift; const u32 height = dst_params.height; const u32 depth = dst_params.depth; const u32 block_height = dst_params.block_size.height; @@ -260,45 +302,14 @@ void MaxwellDMA::CopyPitchToBlockLinear() { memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } -void MaxwellDMA::FastCopyBlockLinearToPitch() { - const u32 bytes_per_pixel = 1U; - const size_t src_size = GOB_SIZE; - const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; - u32 pos_x = regs.src_params.origin.x; - u32 pos_y = regs.src_params.origin.y; - const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, - regs.src_params.block_size.height, bytes_per_pixel); - const u32 x_in_gob = 64 / bytes_per_pixel; - pos_x = pos_x % x_in_gob; - pos_y = pos_y % 8; - - read_buffer.resize_destructive(src_size); - write_buffer.resize_destructive(dst_size); - - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); - memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); - } - - UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, - regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, - regs.src_params.block_size.height, regs.src_params.block_size.depth, - regs.pitch_out); - - memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); -} - void MaxwellDMA::CopyBlockLinearToBlockLinear() { UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); const bool is_remapping = regs.launch_dma.remap_enable != 0; // Deswizzle the input and copy it over. - const Parameters& src = regs.src_params; - const Parameters& dst = regs.dst_params; + const DMA::Parameters& src = regs.src_params; + const DMA::Parameters& dst = regs.dst_params; const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 0e594fa74..69e26cb32 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -24,6 +24,54 @@ namespace VideoCore { class RasterizerInterface; } +namespace Tegra { +namespace DMA { + +union Origin { + BitField<0, 16, u32> x; + BitField<16, 16, u32> y; +}; +static_assert(sizeof(Origin) == 4); + +struct ImageCopy { + u32 length_x{}; + u32 length_y{}; +}; + +union BlockSize { + BitField<0, 4, u32> width; + BitField<4, 4, u32> height; + BitField<8, 4, u32> depth; + BitField<12, 4, u32> gob_height; +}; +static_assert(sizeof(BlockSize) == 4); + +struct Parameters { + BlockSize block_size; + u32 width; + u32 height; + u32 depth; + u32 layer; + Origin origin; +}; +static_assert(sizeof(Parameters) == 24); + +struct ImageOperand { + u32 bytes_per_pixel; + Parameters params; + GPUVAddr address; +}; + +struct BufferOperand { + u32 pitch; + u32 width; + u32 height; + GPUVAddr address; +}; + +} // namespace DMA +} // namespace Tegra + namespace Tegra::Engines { class AccelerateDMAInterface { @@ -32,6 +80,12 @@ public: virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; + + virtual bool ImageToBuffer(const DMA::ImageCopy& copy_info, const DMA::ImageOperand& src, + const DMA::BufferOperand& dst) = 0; + + virtual bool BufferToImage(const DMA::ImageCopy& copy_info, const DMA::BufferOperand& src, + const DMA::ImageOperand& dst) = 0; }; /** @@ -51,30 +105,6 @@ public: } }; - union BlockSize { - BitField<0, 4, u32> width; - BitField<4, 4, u32> height; - BitField<8, 4, u32> depth; - BitField<12, 4, u32> gob_height; - }; - static_assert(sizeof(BlockSize) == 4); - - union Origin { - BitField<0, 16, u32> x; - BitField<16, 16, u32> y; - }; - static_assert(sizeof(Origin) == 4); - - struct Parameters { - BlockSize block_size; - u32 width; - u32 height; - u32 depth; - u32 layer; - Origin origin; - }; - static_assert(sizeof(Parameters) == 24); - struct Semaphore { PackedGPUVAddr address; u32 payload; @@ -227,8 +257,6 @@ private: void CopyBlockLinearToBlockLinear(); - void FastCopyBlockLinearToPitch(); - void ReleaseSemaphore(); void ConsumeSinkImpl() override; @@ -261,17 +289,17 @@ private: u32 reserved05[0x3f]; PackedGPUVAddr offset_in; PackedGPUVAddr offset_out; - u32 pitch_in; - u32 pitch_out; + s32 pitch_in; + s32 pitch_out; u32 line_length_in; u32 line_count; u32 reserved06[0xb6]; u32 remap_consta_value; u32 remap_constb_value; RemapConst remap_const; - Parameters dst_params; + DMA::Parameters dst_params; u32 reserved07[0x1]; - Parameters src_params; + DMA::Parameters src_params; u32 reserved08[0x275]; u32 pm_trigger_end; u32 reserved09[0x3ba]; diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 51f896e43..0c59e6a1f 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -22,6 +22,14 @@ public: explicit AccelerateDMA(); bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; + bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) override { + return false; + } + bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) override { + return false; + } }; class RasterizerNull final : public VideoCore::RasterizerAccelerated, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0c45832ae..7e21fc43d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,6 +56,16 @@ public: bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; + bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) override { + return false; + } + + bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) override { + return false; + } + private: BufferCache& buffer_cache; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 719edbcfb..f085d53a1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -172,7 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache}, + query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -756,7 +756,9 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} +AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_, + Scheduler& scheduler_) + : buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {} bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { std::scoped_lock lock{buffer_cache.mutex}; @@ -768,6 +770,234 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } +bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + auto query_image = texture_cache.ObtainImage(src, false); + if (!query_image) { + return false; + } + auto* image = query_image->first; + auto [level, base] = query_image->second; + const u32 buffer_size = static_cast<u32>(dst.pitch * dst.height); + const auto [buffer, offset] = buffer_cache.ObtainBuffer( + dst.address, buffer_size, VideoCommon::ObtainBufferSynchronize::FullSynchronize, + VideoCommon::ObtainBufferOperation::MarkAsWritten); + + const bool is_rescaled = image->IsRescaled(); + if (is_rescaled) { + image->ScaleDown(); + } + VkImageSubresourceLayers subresources{ + .aspectMask = image->AspectMask(), + .mipLevel = level, + .baseArrayLayer = base, + .layerCount = 1, + }; + const u32 bpp = VideoCore::Surface::BytesPerBlock(image->info.format); + const auto convert = [old_bpp = src.bytes_per_pixel, bpp](u32 value) { + return (old_bpp * value) / bpp; + }; + const u32 base_x = convert(src.params.origin.x.Value()); + const u32 base_y = src.params.origin.y.Value(); + const u32 length_x = convert(copy_info.length_x); + const u32 length_y = copy_info.length_y; + VkOffset3D image_offset{ + .x = static_cast<s32>(base_x), + .y = static_cast<s32>(base_y), + .z = 0, + }; + VkExtent3D image_extent{ + .width = length_x, + .height = length_y, + .depth = 1, + }; + auto buff_info(dst); + buff_info.pitch = convert(dst.pitch); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_image = image->Handle(), dst_buffer = buffer->Handle(), + buffer_offset = offset, subresources, image_offset, image_extent, + buff_info](vk::CommandBuffer cmdbuf) { + const std::array buffer_copy_info{ + VkBufferImageCopy{ + .bufferOffset = buffer_offset, + .bufferRowLength = buff_info.pitch, + .bufferImageHeight = buff_info.height, + .imageSubresource = subresources, + .imageOffset = image_offset, + .imageExtent = image_extent, + }, + }; + const VkImageSubresourceRange range{ + .aspectMask = subresources.aspectMask, + .baseMipLevel = subresources.mipLevel, + .levelCount = 1, + .baseArrayLayer = subresources.baseArrayLayer, + .layerCount = 1, + }; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = range, + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = range, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_barriers); + cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_buffer, + buffer_copy_info); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, WRITE_BARRIER, nullptr, post_barriers); + }); + if (is_rescaled) { + image->ScaleUp(true); + } + return true; +} + +bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + auto query_image = texture_cache.ObtainImage(dst, true); + if (!query_image) { + return false; + } + auto* image = query_image->first; + auto [level, base] = query_image->second; + const u32 buffer_size = static_cast<u32>(src.pitch * src.height); + const auto [buffer, offset] = buffer_cache.ObtainBuffer( + src.address, buffer_size, VideoCommon::ObtainBufferSynchronize::FullSynchronize, + VideoCommon::ObtainBufferOperation::DoNothing); + const bool is_rescaled = image->IsRescaled(); + if (is_rescaled) { + image->ScaleDown(true); + } + VkImageSubresourceLayers subresources{ + .aspectMask = image->AspectMask(), + .mipLevel = level, + .baseArrayLayer = base, + .layerCount = 1, + }; + const u32 bpp = VideoCore::Surface::BytesPerBlock(image->info.format); + const auto convert = [old_bpp = dst.bytes_per_pixel, bpp](u32 value) { + return (old_bpp * value) / bpp; + }; + const u32 base_x = convert(dst.params.origin.x.Value()); + const u32 base_y = dst.params.origin.y.Value(); + const u32 length_x = convert(copy_info.length_x); + const u32 length_y = copy_info.length_y; + VkOffset3D image_offset{ + .x = static_cast<s32>(base_x), + .y = static_cast<s32>(base_y), + .z = 0, + }; + VkExtent3D image_extent{ + .width = length_x, + .height = length_y, + .depth = 1, + }; + auto buff_info(src); + buff_info.pitch = convert(src.pitch); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image = image->Handle(), src_buffer = buffer->Handle(), + buffer_offset = offset, subresources, image_offset, image_extent, + buff_info](vk::CommandBuffer cmdbuf) { + const std::array buffer_copy_info{ + VkBufferImageCopy{ + .bufferOffset = buffer_offset, + .bufferRowLength = buff_info.pitch, + .bufferImageHeight = buff_info.height, + .imageSubresource = subresources, + .imageOffset = image_offset, + .imageExtent = image_extent, + }, + }; + const VkImageSubresourceRange range{ + .aspectMask = subresources.aspectMask, + .baseMipLevel = subresources.mipLevel, + .levelCount = 1, + .baseArrayLayer = subresources.baseArrayLayer, + .layerCount = 1, + }; + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = range, + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = range, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, READ_BARRIER, {}, pre_barriers); + cmdbuf.CopyBufferToImage(src_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, buffer_copy_info); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, nullptr, nullptr, post_barriers); + }); + if (is_rescaled) { + image->ScaleUp(); + } + return true; +} + void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d->regs; UpdateViewportsState(regs); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index a0508b57c..7746c5434 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -45,14 +45,23 @@ class StateTracker; class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { public: - explicit AccelerateDMA(BufferCache& buffer_cache); + explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache, + Scheduler& scheduler); bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; + bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) override; + + bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) override; + private: BufferCache& buffer_cache; + TextureCache& texture_cache; + Scheduler& scheduler; }; class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 80adb70eb..8a204f93f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -864,13 +864,19 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); - std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { - return MakeBufferImageCopy(copy, true, src_aspect_mask); - }); + const auto bpp_in = BytesPerBlock(src.info.format) / DefaultBlockWidth(src.info.format); + const auto bpp_out = BytesPerBlock(dst.info.format) / DefaultBlockWidth(dst.info.format); + std::ranges::transform(copies, vk_in_copies.begin(), + [src_aspect_mask, bpp_in, bpp_out](const auto& copy) { + auto copy2 = copy; + copy2.src_offset.x = (bpp_out * copy.src_offset.x) / bpp_in; + copy2.extent.width = (bpp_out * copy.extent.width) / bpp_in; + return MakeBufferImageCopy(copy2, true, src_aspect_mask); + }); std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { return MakeBufferImageCopy(copy, false, dst_aspect_mask); }); - const u32 img_bpp = BytesPerBlock(src.info.format); + const u32 img_bpp = BytesPerBlock(dst.info.format); size_t total_size = 0; for (const auto& copy : copies) { total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index e9100091e..a1296b574 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -216,10 +216,51 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; - rescaleable = block.depth == 0; - rescaleable &= size.height > 256; + rescaleable = block.depth == 0 && size.height > 256; downscaleable = size.height > 512; } } +static PixelFormat ByteSizeToFormat(u32 bytes_per_pixel) { + switch (bytes_per_pixel) { + case 1: + return PixelFormat::R8_UINT; + case 2: + return PixelFormat::R8G8_UINT; + case 4: + return PixelFormat::A8B8G8R8_UINT; + case 8: + return PixelFormat::R16G16B16A16_UINT; + case 16: + return PixelFormat::R32G32B32A32_UINT; + default: + UNIMPLEMENTED(); + return PixelFormat::Invalid; + } +} + +ImageInfo::ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept { + const u32 bytes_per_pixel = config.bytes_per_pixel; + format = ByteSizeToFormat(bytes_per_pixel); + type = config.params.block_size.depth > 0 ? ImageType::e3D : ImageType::e2D; + num_samples = 1; + block = Extent3D{ + .width = config.params.block_size.width, + .height = config.params.block_size.height, + .depth = config.params.block_size.depth, + }; + size = Extent3D{ + .width = config.params.width, + .height = config.params.height, + .depth = config.params.depth, + }; + tile_width_spacing = 0; + resources.levels = 1; + resources.layers = 1; + layer_stride = CalculateLayerStride(*this); + maybe_unaligned_layer_stride = CalculateLayerSize(*this); + rescaleable = block.depth == 0 && size.height > 256; + downscaleable = size.height > 512; +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 93755e15e..a12f5b44f 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -5,6 +5,7 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/maxwell_dma.h" #include "video_core/surface.h" #include "video_core/texture_cache/types.h" @@ -19,6 +20,7 @@ struct ImageInfo { explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; + explicit ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept; PixelFormat format = PixelFormat::Invalid; ImageType type = ImageType::e1D; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9dd152fbe..335338434 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1359,6 +1359,75 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag } template <class P> +ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { + std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + ImageId image_id{}; + boost::container::small_vector<ImageId, 1> image_ids; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) + [[unlikely]] { + const bool strict_size = True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, false, true)) { + image_id = existing_image_id; + image_ids.push_back(existing_image_id); + return true; + } + } else if (IsSubCopy(info, existing_image, gpu_addr)) { + image_id = existing_image_id; + image_ids.push_back(existing_image_id); + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + if (image_ids.size() <= 1) [[likely]] { + return image_id; + } + auto image_ids_compare = [this](ImageId a, ImageId b) { + auto& image_a = slot_images[a]; + auto& image_b = slot_images[b]; + return image_a.modification_tick < image_b.modification_tick; + }; + return *std::ranges::max_element(image_ids, image_ids_compare); +} + +template <class P> +std::optional<std::pair<typename TextureCache<P>::Image*, std::pair<u32, u32>>> +TextureCache<P>::ObtainImage(const Tegra::DMA::ImageOperand& operand, bool mark_as_modified) { + ImageInfo dst_info(operand); + ImageId dst_id = FindDMAImage(dst_info, operand.address); + if (!dst_id) { + return std::nullopt; + } + auto& image = slot_images[dst_id]; + auto base = image.TryFindBase(operand.address); + if (!base) { + return std::nullopt; + } + if (False(image.flags & ImageFlagBits::GpuModified)) { + // No need to waste time on an image that's synced with guest + return std::nullopt; + } + PrepareImage(dst_id, mark_as_modified, false); + auto& new_image = slot_images[dst_id]; + lru_cache.Touch(new_image.lru_index, frame_tick); + return std::make_pair(&new_image, std::make_pair(base->level, base->layer)); +} + +template <class P> SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 013836933..848a5d9ea 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -209,6 +209,9 @@ public: /// Pop asynchronous downloads void PopAsyncFlushes(); + [[nodiscard]] std::optional<std::pair<Image*, std::pair<u32, u32>>> ObtainImage( + const Tegra::DMA::ImageOperand& operand, bool mark_as_modified); + /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); @@ -300,6 +303,8 @@ private: /// Remove joined images from the cache [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); + /// Return a blit image pair from the given guest blit parameters [[nodiscard]] std::optional<BlitImages> GetBlitImages( const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 0453456b4..a0e10643f 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 { Format = 1 << 1, Samples = 1 << 2, ForceBrokenViews = 1 << 3, + FormatBpp = 1 << 4, }; DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 697f86641..de37db684 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -743,6 +743,44 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } +std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, + u32 down_shift) { + std::vector<ImageCopy> copies; + copies.reserve(src.resources.levels); + const bool is_3d = src.type == ImageType::e3D; + for (s32 level = 0; level < src.resources.levels; ++level) { + ImageCopy& copy = copies.emplace_back(); + copy.src_subresource = SubresourceLayers{ + .base_level = level, + .base_layer = 0, + .num_layers = src.resources.layers, + }; + copy.dst_subresource = SubresourceLayers{ + .base_level = level, + .base_layer = 0, + .num_layers = src.resources.layers, + }; + copy.src_offset = Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }; + copy.dst_offset = Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }; + const Extent3D mip_size = AdjustMipSize(src.size, level); + copy.extent = AdjustSamplesSize(mip_size, src.num_samples); + if (is_3d) { + copy.extent.depth = src.size.depth; + } + copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1); + copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1); + } + return copies; +} + bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { const GPUVAddr address = config.Address(); if (address == 0) { @@ -999,6 +1037,20 @@ bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 } } +bool IsBlockLinearSizeCompatibleBPPRelaxed(const ImageInfo& lhs, const ImageInfo& rhs, + u32 lhs_level, u32 rhs_level) noexcept { + ASSERT(lhs.type != ImageType::Linear); + ASSERT(rhs.type != ImageType::Linear); + const auto lhs_bpp = BytesPerBlock(lhs.format); + const auto rhs_bpp = BytesPerBlock(rhs.format); + const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); + const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); + return Common::AlignUpLog2(lhs_size.width * lhs_bpp, GOB_SIZE_X_SHIFT) == + Common::AlignUpLog2(rhs_size.width * rhs_bpp, GOB_SIZE_X_SHIFT) && + Common::AlignUpLog2(lhs_size.height, GOB_SIZE_Y_SHIFT) == + Common::AlignUpLog2(rhs_size.height, GOB_SIZE_Y_SHIFT); +} + bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { ASSERT(lhs.type == ImageType::Linear); ASSERT(rhs.type == ImageType::Linear); @@ -1073,7 +1125,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const // Format checking is relaxed, but we still have to check for matching bytes per block. // This avoids creating a view for blits on UE4 titles where formats with different bytes // per block are aliased. - if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) { + if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format) && + False(options & RelaxedOptions::FormatBpp)) { return std::nullopt; } } else { @@ -1088,10 +1141,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const if (existing.type != candidate.type) { return std::nullopt; } - if (False(options & RelaxedOptions::Samples)) { - if (existing.num_samples != candidate.num_samples) { - return std::nullopt; - } + if (False(options & RelaxedOptions::Samples) && existing.num_samples != candidate.num_samples) { + return std::nullopt; } if (existing.resources.levels < candidate.resources.levels + base->level) { return std::nullopt; @@ -1101,14 +1152,16 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const if (mip_depth < candidate.size.depth + base->layer) { return std::nullopt; } - } else { - if (existing.resources.layers < candidate.resources.layers + base->layer) { - return std::nullopt; - } + } else if (existing.resources.layers < candidate.resources.layers + base->layer) { + return std::nullopt; } const bool strict_size = False(options & RelaxedOptions::Size); if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { - return std::nullopt; + if (False(options & RelaxedOptions::FormatBpp)) { + return std::nullopt; + } else if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) { + return std::nullopt; + } } // TODO: compare block sizes return base; @@ -1120,6 +1173,31 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr .has_value(); } +bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr) { + const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); + if (!base) { + return false; + } + const ImageInfo& existing = image.info; + if (existing.resources.levels < candidate.resources.levels + base->level) { + return false; + } + if (existing.type == ImageType::e3D) { + const u32 mip_depth = std::max(1U, existing.size.depth << base->level); + if (mip_depth < candidate.size.depth + base->layer) { + return false; + } + } else { + if (existing.resources.layers < candidate.resources.layers + base->layer) { + return false; + } + } + if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) { + return false; + } + return true; +} + void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { const auto original_dst_format = dst_info.format; diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index d103db8ae..84aa6880d 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -56,6 +56,10 @@ struct OverlapResult { SubresourceBase base, u32 up_scale = 1, u32 down_shift = 0); +[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, + u32 up_scale = 1, + u32 down_shift = 0); + [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, @@ -88,6 +92,9 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept; +[[nodiscard]] bool IsBlockLinearSizeCompatibleBPPRelaxed(const ImageInfo& lhs, const ImageInfo& rhs, + u32 lhs_level, u32 rhs_level) noexcept; + [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, VAddr cpu_addr, const ImageBase& overlap, @@ -106,6 +113,9 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views, bool native_bgr); +[[nodiscard]] bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr); + void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src); |