diff options
39 files changed, 2983 insertions, 1109 deletions
diff --git a/.gitmodules b/.gitmodules index 26b4e5272..3a49c4874 100644 --- a/.gitmodules +++ b/.gitmodules @@ -43,3 +43,6 @@ [submodule "externals/zstd"] path = externals/zstd url = https://github.com/facebook/zstd +[submodule "sirit"] + path = externals/sirit + url = https://github.com/ReinUsesLisp/sirit diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index aa3319eb1..3f8b6cda8 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -72,6 +72,11 @@ if (USE_DISCORD_PRESENCE) target_include_directories(discord-rpc INTERFACE ./discord-rpc/include) endif() +# Sirit +if (ENABLE_VULKAN) + add_subdirectory(sirit) +endif() + if (ENABLE_WEB_SERVICE) # LibreSSL set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "") diff --git a/externals/sirit b/externals/sirit new file mode 160000 +Subproject f7c4b07a7e14edb1dcd93bc9879c823423705c2 diff --git a/src/common/swap.h b/src/common/swap.h index b3eab1324..71932c2bb 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -21,11 +21,6 @@ #if defined(_MSC_VER) #include <cstdlib> -#elif defined(__linux__) -#include <byteswap.h> -#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ - defined(__NetBSD__) || defined(__OpenBSD__) -#include <sys/endian.h> #endif #include <cstring> #include "common/common_types.h" @@ -62,86 +57,49 @@ namespace Common { #ifdef _MSC_VER -inline u16 swap16(u16 _data) { - return _byteswap_ushort(_data); +[[nodiscard]] inline u16 swap16(u16 data) noexcept { + return _byteswap_ushort(data); } -inline u32 swap32(u32 _data) { - return _byteswap_ulong(_data); +[[nodiscard]] inline u32 swap32(u32 data) noexcept { + return _byteswap_ulong(data); } -inline u64 swap64(u64 _data) { - return _byteswap_uint64(_data); +[[nodiscard]] inline u64 swap64(u64 data) noexcept { + return _byteswap_uint64(data); } -#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6) -inline u16 swap16(u16 _data) { - u32 data = _data; - __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data)); - return (u16)data; -} -inline u32 swap32(u32 _data) { - __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data)); - return _data; -} -inline u64 swap64(u64 _data) { - return ((u64)swap32(_data) << 32) | swap32(_data >> 32); -} -#elif __linux__ -inline u16 swap16(u16 _data) { - return bswap_16(_data); -} -inline u32 swap32(u32 _data) { - return bswap_32(_data); -} -inline u64 swap64(u64 _data) { - return bswap_64(_data); -} -#elif __APPLE__ -inline __attribute__((always_inline)) u16 swap16(u16 _data) { - return (_data >> 8) | (_data << 8); -} -inline __attribute__((always_inline)) u32 swap32(u32 _data) { - return __builtin_bswap32(_data); -} -inline __attribute__((always_inline)) u64 swap64(u64 _data) { - return __builtin_bswap64(_data); -} -#elif defined(__Bitrig__) || defined(__OpenBSD__) +#elif defined(__clang__) || defined(__GNUC__) +#if defined(__Bitrig__) || defined(__OpenBSD__) // redefine swap16, swap32, swap64 as inline functions #undef swap16 #undef swap32 #undef swap64 -inline u16 swap16(u16 _data) { - return __swap16(_data); -} -inline u32 swap32(u32 _data) { - return __swap32(_data); -} -inline u64 swap64(u64 _data) { - return __swap64(_data); -} -#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) -inline u16 swap16(u16 _data) { - return bswap16(_data); +#endif +[[nodiscard]] inline u16 swap16(u16 data) noexcept { + return __builtin_bswap16(data); } -inline u32 swap32(u32 _data) { - return bswap32(_data); +[[nodiscard]] inline u32 swap32(u32 data) noexcept { + return __builtin_bswap32(data); } -inline u64 swap64(u64 _data) { - return bswap64(_data); +[[nodiscard]] inline u64 swap64(u64 data) noexcept { + return __builtin_bswap64(data); } #else -// Slow generic implementation. -inline u16 swap16(u16 data) { +// Generic implementation. +[[nodiscard]] inline u16 swap16(u16 data) noexcept { return (data >> 8) | (data << 8); } -inline u32 swap32(u32 data) { - return (swap16(data) << 16) | swap16(data >> 16); +[[nodiscard]] inline u32 swap32(u32 data) noexcept { + return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) | + ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24); } -inline u64 swap64(u64 data) { - return ((u64)swap32(data) << 32) | swap32(data >> 32); +[[nodiscard]] inline u64 swap64(u64 data) noexcept { + return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) | + ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) | + ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) | + ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56); } #endif -inline float swapf(float f) { +[[nodiscard]] inline float swapf(float f) noexcept { static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t."); u32 value; @@ -153,7 +111,7 @@ inline float swapf(float f) { return f; } -inline double swapd(double f) { +[[nodiscard]] inline double swapd(double f) noexcept { static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t."); u64 value; diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 49145911b..dc96e35d5 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -14,6 +14,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" @@ -99,7 +100,7 @@ public: } void CallSVC(u32 swi) override { - Kernel::CallSVC(swi); + Kernel::CallSVC(parent.system, swi); } void AddTicks(u64 ticks) override { @@ -112,14 +113,14 @@ public: // Always execute at least one tick. amortized_ticks = std::max<u64>(amortized_ticks, 1); - parent.core_timing.AddTicks(amortized_ticks); + parent.system.CoreTiming().AddTicks(amortized_ticks); num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - return std::max(parent.core_timing.GetDowncount(), 0); + return std::max(parent.system.CoreTiming().GetDowncount(), 0); } u64 GetCNTPCT() override { - return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks()); + return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); } ARM_Dynarmic& parent; @@ -129,7 +130,7 @@ public: }; std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { - auto* current_process = Core::CurrentProcess(); + auto* current_process = system.Kernel().CurrentProcess(); auto** const page_table = current_process->VMManager().page_table.pointers.data(); Dynarmic::A64::UserConfig config; @@ -171,10 +172,10 @@ void ARM_Dynarmic::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, +ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing}, - core_index{core_index}, core_timing{core_timing}, + : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, + core_index{core_index}, system{system}, exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { ThreadContext ctx{}; inner_unicorn.SaveContext(ctx); diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index d867c2a50..c1db254e8 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -12,19 +12,15 @@ #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" -namespace Core::Timing { -class CoreTiming; -} - namespace Core { class ARM_Dynarmic_Callbacks; class DynarmicExclusiveMonitor; +class System; class ARM_Dynarmic final : public ARM_Interface { public: - ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, - std::size_t core_index); + ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic() override; void MapBackingMemory(VAddr address, std::size_t size, u8* memory, @@ -63,7 +59,7 @@ private: ARM_Unicorn inner_unicorn; std::size_t core_index; - Timing::CoreTiming& core_timing; + System& system; DynarmicExclusiveMonitor& exclusive_monitor; }; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 27309280c..4e07fe8b5 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -10,7 +10,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" -#include "core/memory.h" namespace Core { @@ -49,20 +48,6 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_ } } -static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) { - u32 esr{}; - CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); - - auto ec = esr >> 26; - auto iss = esr & 0xFFFFFF; - - switch (ec) { - case 0x15: // SVC - Kernel::CallSVC(iss); - break; - } -} - static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, void* user_data) { ARM_Interface::ThreadContext ctx{}; @@ -72,7 +57,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return {}; } -ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} { +ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc)); auto fpv = 3 << 20; @@ -177,7 +162,7 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000, 0)); } else { - ExecuteInstructions(std::max(core_timing.GetDowncount(), 0)); + ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0)); } } @@ -190,7 +175,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64)); void ARM_Unicorn::ExecuteInstructions(int num_instructions) { MICROPROFILE_SCOPE(ARM_Jit_Unicorn); CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); - core_timing.AddTicks(num_instructions); + system.CoreTiming().AddTicks(num_instructions); if (GDBStub::IsServerEnabled()) { if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); @@ -273,4 +258,20 @@ void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) { last_bkpt_hit = true; } +void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) { + u32 esr{}; + CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); + + const auto ec = esr >> 26; + const auto iss = esr & 0xFFFFFF; + + auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data); + + switch (ec) { + case 0x15: // SVC + Kernel::CallSVC(arm_instance->system, iss); + break; + } +} + } // namespace Core diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 1e44f0736..209fc16ad 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -9,15 +9,13 @@ #include "core/arm/arm_interface.h" #include "core/gdbstub/gdbstub.h" -namespace Core::Timing { -class CoreTiming; -} - namespace Core { +class System; + class ARM_Unicorn final : public ARM_Interface { public: - explicit ARM_Unicorn(Timing::CoreTiming& core_timing); + explicit ARM_Unicorn(System& system); ~ARM_Unicorn() override; void MapBackingMemory(VAddr address, std::size_t size, u8* memory, @@ -47,8 +45,10 @@ public: void RecordBreak(GDBStub::BreakpointAddress bkpt); private: + static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); + uc_engine* uc{}; - Timing::CoreTiming& core_timing; + System& system; GDBStub::BreakpointAddress last_bkpt{}; bool last_bkpt_hit = false; }; diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index e75741db0..ba63c3e61 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -55,13 +55,13 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { if (Settings::values.use_cpu_jit) { #ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); + arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); #else - arm_interface = std::make_unique<ARM_Unicorn>(); + arm_interface = std::make_unique<ARM_Unicorn>(system); LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif } else { - arm_interface = std::make_unique<ARM_Unicorn>(core_timing); + arm_interface = std::make_unique<ARM_Unicorn>(system); } scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); diff --git a/src/core/hle/kernel/client_port.h b/src/core/hle/kernel/client_port.h index 6cd607206..4921ad4f0 100644 --- a/src/core/hle/kernel/client_port.h +++ b/src/core/hle/kernel/client_port.h @@ -25,7 +25,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::ClientPort; + static constexpr HandleType HANDLE_TYPE = HandleType::ClientPort; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index b1f39aad7..09cdff588 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h @@ -29,7 +29,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::ClientSession; + static constexpr HandleType HANDLE_TYPE = HandleType::ClientSession; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index f060f2a3b..dda52f4c0 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -85,7 +85,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::Process; + static constexpr HandleType HANDLE_TYPE = HandleType::Process; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 2eb9dcbb7..84215f572 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h @@ -31,7 +31,7 @@ public: return reset_type; } - static const HandleType HANDLE_TYPE = HandleType::ReadableEvent; + static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h index 70e09858a..2613a6bb5 100644 --- a/src/core/hle/kernel/resource_limit.h +++ b/src/core/hle/kernel/resource_limit.h @@ -41,7 +41,7 @@ public: return GetTypeName(); } - static const HandleType HANDLE_TYPE = HandleType::ResourceLimit; + static constexpr HandleType HANDLE_TYPE = HandleType::ResourceLimit; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index fef573b71..dc88a1ebd 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h @@ -43,7 +43,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::ServerPort; + static constexpr HandleType HANDLE_TYPE = HandleType::ServerPort; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index a6b2cf06a..696a82cd9 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -28,11 +28,9 @@ ServerSession::~ServerSession() { // the emulated application. // Decrease the port's connection count. - if (parent->port) + if (parent->port) { parent->port->ConnectionClosed(); - - // TODO(Subv): Wake up all the ClientSession's waiting threads and set - // the SendSyncRequest result to 0xC920181A. + } parent->server = nullptr; } @@ -74,9 +72,6 @@ void ServerSession::ClientDisconnected() { handler->ClientDisconnected(this); } - // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set - // their WaitSynchronization result to 0xC920181A. - // Clean up the list of client threads with pending requests, they are unneeded now that the // client endpoint is closed. pending_requesting_threads.clear(); diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 09b835ff8..738df30f8 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -46,7 +46,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::ServerSession; + static constexpr HandleType HANDLE_TYPE = HandleType::ServerSession; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index 37e18c443..c2b6155e1 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h @@ -76,7 +76,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::SharedMemory; + static constexpr HandleType HANDLE_TYPE = HandleType::SharedMemory; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 2fd07ab34..e5d4d6b55 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -131,16 +131,15 @@ enum class ResourceLimitValueType { LimitValue, }; -ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_type, - ResourceLimitValueType value_type) { +ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, + u32 resource_type, ResourceLimitValueType value_type) { const auto type = static_cast<ResourceType>(resource_type); if (!IsValidResourceType(type)) { LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); return ERR_INVALID_ENUM_VALUE; } - const auto& kernel = Core::System::GetInstance().Kernel(); - const auto* const current_process = kernel.CurrentProcess(); + const auto* const current_process = system.Kernel().CurrentProcess(); ASSERT(current_process != nullptr); const auto resource_limit_object = @@ -160,7 +159,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Handle resource_limit, u32 resource_ty } // Anonymous namespace /// Set the process heap to a given Size. It can both extend and shrink the heap. -static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) { +static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) { LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size); // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. @@ -175,7 +174,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) { return ERR_INVALID_SIZE; } - auto& vm_manager = Core::System::GetInstance().Kernel().CurrentProcess()->VMManager(); + auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); const auto alloc_result = vm_manager.SetHeapSize(heap_size); if (alloc_result.Failed()) { return alloc_result.Code(); @@ -185,7 +184,7 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) { return RESULT_SUCCESS; } -static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { +static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); if (!Common::Is4KBAligned(addr)) { @@ -217,7 +216,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { return ERR_INVALID_MEMORY_PERMISSIONS; } - auto* const current_process = Core::CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); auto& vm_manager = current_process->VMManager(); if (!vm_manager.IsWithinAddressSpace(addr, size)) { @@ -242,7 +241,8 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { return vm_manager.ReprotectRange(addr, size, converted_permissions); } -static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attribute) { +static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask, + u32 attribute) { LOG_DEBUG(Kernel_SVC, "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address, size, mask, attribute); @@ -280,7 +280,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr return ERR_INVALID_COMBINATION; } - auto& vm_manager = Core::CurrentProcess()->VMManager(); + auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); if (!vm_manager.IsWithinAddressSpace(address, size)) { LOG_ERROR(Kernel_SVC, "Given address (0x{:016X}) is outside the bounds of the address space.", address); @@ -291,11 +291,11 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr } /// Maps a memory range into a different range. -static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { +static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - auto& vm_manager = Core::CurrentProcess()->VMManager(); + auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); if (result.IsError()) { @@ -306,11 +306,11 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { } /// Unmaps a region that was previously mapped with svcMapMemory -static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { +static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - auto& vm_manager = Core::CurrentProcess()->VMManager(); + auto& vm_manager = system.Kernel().CurrentProcess()->VMManager(); const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); if (result.IsError()) { @@ -321,7 +321,8 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { } /// Connect to an OS service given the port name, returns the handle to the port to out -static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address) { +static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, + VAddr port_name_address) { if (!Memory::IsValidVirtualAddress(port_name_address)) { LOG_ERROR(Kernel_SVC, "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", @@ -340,8 +341,8 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); - auto& kernel = Core::System::GetInstance().Kernel(); - auto it = kernel.FindNamedPort(port_name); + auto& kernel = system.Kernel(); + const auto it = kernel.FindNamedPort(port_name); if (!kernel.IsValidNamedPort(it)) { LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); return ERR_NOT_FOUND; @@ -353,14 +354,14 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address CASCADE_RESULT(client_session, client_port->Connect()); // Return the client session - auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); CASCADE_RESULT(*out_handle, handle_table.Create(client_session)); return RESULT_SUCCESS; } /// Makes a blocking IPC call to an OS service. -static ResultCode SendSyncRequest(Handle handle) { - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); +static ResultCode SendSyncRequest(Core::System& system, Handle handle) { + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); SharedPtr<ClientSession> session = handle_table.Get<ClientSession>(handle); if (!session) { LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); @@ -369,18 +370,18 @@ static ResultCode SendSyncRequest(Handle handle) { LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); - Core::System::GetInstance().PrepareReschedule(); + system.PrepareReschedule(); // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server // responds and cause a reschedule. - return session->SendSyncRequest(GetCurrentThread()); + return session->SendSyncRequest(system.CurrentScheduler().GetCurrentThread()); } /// Get the ID for the specified thread. -static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) { +static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", thread_handle); @@ -392,10 +393,10 @@ static ResultCode GetThreadId(u64* thread_id, Handle thread_handle) { } /// Gets the ID of the specified process or a specified thread's owning process. -static ResultCode GetProcessId(u64* process_id, Handle handle) { +static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Process> process = handle_table.Get<Process>(handle); if (process) { *process_id = process->GetProcessID(); @@ -437,8 +438,8 @@ static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thr }; /// Wait for the given handles to synchronize, timeout after the specified nanoseconds -static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 handle_count, - s64 nano_seconds) { +static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address, + u64 handle_count, s64 nano_seconds) { LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}", handles_address, handle_count, nano_seconds); @@ -457,11 +458,11 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 return ERR_OUT_OF_RANGE; } - auto* const thread = GetCurrentThread(); + auto* const thread = system.CurrentScheduler().GetCurrentThread(); using ObjectPtr = Thread::ThreadWaitObjects::value_type; Thread::ThreadWaitObjects objects(handle_count); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); for (u64 i = 0; i < handle_count; ++i) { const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle)); @@ -507,16 +508,16 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 thread->WakeAfterDelay(nano_seconds); thread->SetWakeupCallback(DefaultThreadWakeupCallback); - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_TIMEOUT; } /// Resumes a thread waiting on WaitSynchronization -static ResultCode CancelSynchronization(Handle thread_handle) { +static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", @@ -531,8 +532,8 @@ static ResultCode CancelSynchronization(Handle thread_handle) { } /// Attempts to locks a mutex, creating it if it does not already exist -static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr, - Handle requesting_thread_handle) { +static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle, + VAddr mutex_addr, Handle requesting_thread_handle) { LOG_TRACE(Kernel_SVC, "called holding_thread_handle=0x{:08X}, mutex_addr=0x{:X}, " "requesting_current_thread_handle=0x{:08X}", @@ -549,13 +550,13 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr, return ERR_INVALID_ADDRESS; } - auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle, requesting_thread_handle); } /// Unlock a mutex -static ResultCode ArbitrateUnlock(VAddr mutex_addr) { +static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); if (Memory::IsKernelVirtualAddress(mutex_addr)) { @@ -569,7 +570,7 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) { return ERR_INVALID_ADDRESS; } - auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); return current_process->GetMutex().Release(mutex_addr); } @@ -592,7 +593,7 @@ struct BreakReason { }; /// Break program execution -static void Break(u32 reason, u64 info1, u64 info2) { +static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { BreakReason break_reason{reason}; bool has_dumped_buffer{}; @@ -670,22 +671,24 @@ static void Break(u32 reason, u64 info1, u64 info2) { Debug_Emulated, "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", reason, info1, info2); + handle_debug_buffer(info1, info2); - Core::System::GetInstance() - .ArmInterface(static_cast<std::size_t>(GetCurrentThread()->GetProcessorID())) - .LogBacktrace(); + + auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); + const auto thread_processor_id = current_thread->GetProcessorID(); + system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); ASSERT(false); - Core::CurrentProcess()->PrepareForTermination(); + system.Kernel().CurrentProcess()->PrepareForTermination(); // Kill the current thread - GetCurrentThread()->Stop(); - Core::System::GetInstance().PrepareReschedule(); + current_thread->Stop(); + system.PrepareReschedule(); } } /// Used to output a message on a debug hardware unit - does nothing on a retail unit -static void OutputDebugString(VAddr address, u64 len) { +static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) { if (len == 0) { return; } @@ -696,7 +699,8 @@ static void OutputDebugString(VAddr address, u64 len) { } /// Gets system/memory information for the current process -static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) { +static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle, + u64 info_sub_id) { LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id, info_sub_id, handle); @@ -754,7 +758,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) return ERR_INVALID_ENUM_VALUE; } - const auto& current_process_handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& current_process_handle_table = + system.Kernel().CurrentProcess()->GetHandleTable(); const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle)); if (!process) { return ERR_INVALID_HANDLE; @@ -844,7 +849,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) return ERR_INVALID_COMBINATION; } - Process* const current_process = Core::CurrentProcess(); + Process* const current_process = system.Kernel().CurrentProcess(); HandleTable& handle_table = current_process->GetHandleTable(); const auto resource_limit = current_process->GetResourceLimit(); if (!resource_limit) { @@ -875,7 +880,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) return ERR_INVALID_COMBINATION; } - *result = Core::CurrentProcess()->GetRandomEntropy(info_sub_id); + *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); return RESULT_SUCCESS; case GetInfoType::PrivilegedProcessId: @@ -892,15 +897,14 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) return ERR_INVALID_COMBINATION; } - const auto thread = - Core::CurrentProcess()->GetHandleTable().Get<Thread>(static_cast<Handle>(handle)); + const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<Thread>( + static_cast<Handle>(handle)); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", static_cast<Handle>(handle)); return ERR_INVALID_HANDLE; } - const auto& system = Core::System::GetInstance(); const auto& core_timing = system.CoreTiming(); const auto& scheduler = system.CurrentScheduler(); const auto* const current_thread = scheduler.GetCurrentThread(); @@ -927,13 +931,13 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id) } /// Sets the thread activity -static ResultCode SetThreadActivity(Handle handle, u32 activity) { +static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); if (activity > static_cast<u32>(ThreadActivity::Paused)) { return ERR_INVALID_ENUM_VALUE; } - const auto* current_process = Core::CurrentProcess(); + const auto* current_process = system.Kernel().CurrentProcess(); const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); @@ -950,7 +954,7 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) { return ERR_INVALID_HANDLE; } - if (thread == GetCurrentThread()) { + if (thread == system.CurrentScheduler().GetCurrentThread()) { LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); return ERR_BUSY; } @@ -960,10 +964,10 @@ static ResultCode SetThreadActivity(Handle handle, u32 activity) { } /// Gets the thread context -static ResultCode GetThreadContext(VAddr thread_context, Handle handle) { +static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, Handle handle) { LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle); - const auto* current_process = Core::CurrentProcess(); + const auto* current_process = system.Kernel().CurrentProcess(); const SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); @@ -980,7 +984,7 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) { return ERR_INVALID_HANDLE; } - if (thread == GetCurrentThread()) { + if (thread == system.CurrentScheduler().GetCurrentThread()) { LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); return ERR_BUSY; } @@ -1001,10 +1005,10 @@ static ResultCode GetThreadContext(VAddr thread_context, Handle handle) { } /// Gets the priority for the specified thread -static ResultCode GetThreadPriority(u32* priority, Handle handle) { +static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) { LOG_TRACE(Kernel_SVC, "called"); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle); @@ -1016,7 +1020,7 @@ static ResultCode GetThreadPriority(u32* priority, Handle handle) { } /// Sets the priority for the specified thread -static ResultCode SetThreadPriority(Handle handle, u32 priority) { +static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { LOG_TRACE(Kernel_SVC, "called"); if (priority > THREADPRIO_LOWEST) { @@ -1027,7 +1031,7 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) { return ERR_INVALID_THREAD_PRIORITY; } - const auto* const current_process = Core::CurrentProcess(); + const auto* const current_process = system.Kernel().CurrentProcess(); SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle); if (!thread) { @@ -1037,18 +1041,18 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) { thread->SetPriority(priority); - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } /// Get which CPU core is executing the current thread -static u32 GetCurrentProcessorNumber() { +static u32 GetCurrentProcessorNumber(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); - return GetCurrentThread()->GetProcessorID(); + return system.CurrentScheduler().GetCurrentThread()->GetProcessorID(); } -static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size, - u32 permissions) { +static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr, + u64 size, u32 permissions) { LOG_TRACE(Kernel_SVC, "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}", shared_memory_handle, addr, size, permissions); @@ -1082,7 +1086,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s return ERR_INVALID_MEMORY_PERMISSIONS; } - auto* const current_process = Core::CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); if (!shared_memory) { LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", @@ -1100,7 +1104,8 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare); } -static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) { +static ResultCode UnmapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr, + u64 size) { LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}", shared_memory_handle, addr, size); @@ -1125,7 +1130,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 return ERR_INVALID_ADDRESS_STATE; } - auto* const current_process = Core::CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); auto shared_memory = current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle); if (!shared_memory) { LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", @@ -1143,10 +1148,11 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 return shared_memory->Unmap(*current_process, addr, size); } -static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address, - Handle process_handle, VAddr address) { +static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, + VAddr page_info_address, Handle process_handle, + VAddr address) { LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); SharedPtr<Process> process = handle_table.Get<Process>(process_handle); if (!process) { LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", @@ -1172,20 +1178,20 @@ static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_ return RESULT_SUCCESS; } -static ResultCode QueryMemory(VAddr memory_info_address, VAddr page_info_address, - VAddr query_address) { +static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, + VAddr page_info_address, VAddr query_address) { LOG_TRACE(Kernel_SVC, "called, memory_info_address=0x{:016X}, page_info_address=0x{:016X}, " "query_address=0x{:016X}", memory_info_address, page_info_address, query_address); - return QueryProcessMemory(memory_info_address, page_info_address, CurrentProcess, + return QueryProcessMemory(system, memory_info_address, page_info_address, CurrentProcess, query_address); } /// Exits the current process -static void ExitProcess() { - auto* current_process = Core::CurrentProcess(); +static void ExitProcess(Core::System& system) { + auto* current_process = system.Kernel().CurrentProcess(); LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, @@ -1194,20 +1200,20 @@ static void ExitProcess() { current_process->PrepareForTermination(); // Kill the current thread - GetCurrentThread()->Stop(); + system.CurrentScheduler().GetCurrentThread()->Stop(); - Core::System::GetInstance().PrepareReschedule(); + system.PrepareReschedule(); } /// Creates a new thread -static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top, - u32 priority, s32 processor_id) { +static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg, + VAddr stack_top, u32 priority, s32 processor_id) { LOG_TRACE(Kernel_SVC, "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, " "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}", entry_point, arg, stack_top, priority, processor_id, *out_handle); - auto* const current_process = Core::CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); if (processor_id == THREADPROCESSORID_IDEAL) { // Set the target CPU to the one specified by the process. @@ -1239,7 +1245,7 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V } const std::string name = fmt::format("thread-{:X}", entry_point); - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system.Kernel(); CASCADE_RESULT(SharedPtr<Thread> thread, Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top, *current_process)); @@ -1253,16 +1259,16 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V thread->SetGuestHandle(*new_guest_handle); *out_handle = *new_guest_handle; - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } /// Starts the thread for the provided handle -static ResultCode StartThread(Handle thread_handle) { +static ResultCode StartThread(Core::System& system, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", @@ -1275,16 +1281,14 @@ static ResultCode StartThread(Handle thread_handle) { thread->ResumeFromWait(); if (thread->GetStatus() == ThreadStatus::Ready) { - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); } return RESULT_SUCCESS; } /// Called when a thread exits -static void ExitThread() { - auto& system = Core::System::GetInstance(); - +static void ExitThread(Core::System& system) { LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); @@ -1294,7 +1298,7 @@ static void ExitThread() { } /// Sleep the current thread -static void SleepThread(s64 nanoseconds) { +static void SleepThread(Core::System& system, s64 nanoseconds) { LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); enum class SleepType : s64 { @@ -1303,7 +1307,6 @@ static void SleepThread(s64 nanoseconds) { YieldAndWaitForLoadBalancing = -2, }; - auto& system = Core::System::GetInstance(); auto& scheduler = system.CurrentScheduler(); auto* const current_thread = scheduler.GetCurrentThread(); @@ -1332,8 +1335,9 @@ static void SleepThread(s64 nanoseconds) { } /// Wait process wide key atomic -static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr, - Handle thread_handle, s64 nano_seconds) { +static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_addr, + VAddr condition_variable_addr, Handle thread_handle, + s64 nano_seconds) { LOG_TRACE( Kernel_SVC, "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}", @@ -1353,7 +1357,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var return ERR_INVALID_ADDRESS; } - auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); const auto& handle_table = current_process->GetHandleTable(); SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); ASSERT(thread); @@ -1363,7 +1367,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var return release_result; } - SharedPtr<Thread> current_thread = GetCurrentThread(); + SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread(); current_thread->SetCondVarWaitAddress(condition_variable_addr); current_thread->SetMutexWaitAddress(mutex_addr); current_thread->SetWaitHandle(thread_handle); @@ -1374,19 +1378,20 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var // Note: Deliberately don't attempt to inherit the lock owner's priority. - Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } /// Signal process wide key -static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target) { +static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, + s32 target) { LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", condition_variable_addr, target); - const auto RetrieveWaitingThreads = [](std::size_t core_index, - std::vector<SharedPtr<Thread>>& waiting_threads, - VAddr condvar_addr) { - const auto& scheduler = Core::System::GetInstance().Scheduler(core_index); + const auto RetrieveWaitingThreads = [&system](std::size_t core_index, + std::vector<SharedPtr<Thread>>& waiting_threads, + VAddr condvar_addr) { + const auto& scheduler = system.Scheduler(core_index); const auto& thread_list = scheduler.GetThreadList(); for (const auto& thread : thread_list) { @@ -1425,9 +1430,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target // liberate Cond Var Thread. thread->SetCondVarWaitAddress(0); - std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex(); - - auto& monitor = Core::System::GetInstance().Monitor(); + const std::size_t current_core = system.CurrentCoreIndex(); + auto& monitor = system.Monitor(); // Atomically read the value of the mutex. u32 mutex_val = 0; @@ -1456,7 +1460,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target thread->SetLockOwner(nullptr); thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); } else { // Atomically signal that the mutex now has a waiting thread. do { @@ -1472,7 +1476,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target // The mutex is already owned by some other thread, make this thread wait on it. const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); auto owner = handle_table.Get<Thread>(owner_handle); ASSERT(owner); ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); @@ -1487,14 +1491,17 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target } // Wait for an address (via Address Arbiter) -static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) { +static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, + s64 timeout) { LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address, type, value, timeout); + // If the passed address is a kernel virtual address, return invalid memory state. if (Memory::IsKernelVirtualAddress(address)) { LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); return ERR_INVALID_ADDRESS_STATE; } + // If the address is not properly aligned to 4 bytes, return invalid address. if (!Common::IsWordAligned(address)) { LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); @@ -1502,20 +1509,22 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout } const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type); - auto& address_arbiter = - Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter(); + auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); } // Signals to an address (via Address Arbiter) -static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) { +static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, + s32 num_to_wake) { LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", address, type, value, num_to_wake); + // If the passed address is a kernel virtual address, return invalid memory state. if (Memory::IsKernelVirtualAddress(address)) { LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address); return ERR_INVALID_ADDRESS_STATE; } + // If the address is not properly aligned to 4 bytes, return invalid address. if (!Common::IsWordAligned(address)) { LOG_ERROR(Kernel_SVC, "Address is not word aligned, address={:016X}", address); @@ -1523,16 +1532,15 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to } const auto signal_type = static_cast<AddressArbiter::SignalType>(type); - auto& address_arbiter = - Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter(); + auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); } /// This returns the total CPU ticks elapsed since the CPU was powered-on -static u64 GetSystemTick() { +static u64 GetSystemTick(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); - auto& core_timing = Core::System::GetInstance().CoreTiming(); + auto& core_timing = system.CoreTiming(); const u64 result{core_timing.GetTicks()}; // Advance time to defeat dumb games that busy-wait for the frame to end. @@ -1542,18 +1550,18 @@ static u64 GetSystemTick() { } /// Close a handle -static ResultCode CloseHandle(Handle handle) { +static ResultCode CloseHandle(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle); - auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); return handle_table.Close(handle); } /// Clears the signaled state of an event or process. -static ResultCode ResetSignal(Handle handle) { +static ResultCode ResetSignal(Core::System& system, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); auto event = handle_table.Get<ReadableEvent>(handle); if (event) { @@ -1570,7 +1578,8 @@ static ResultCode ResetSignal(Handle handle) { } /// Creates a TransferMemory object -static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) { +static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size, + u32 permissions) { LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size, permissions); @@ -1598,7 +1607,7 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 return ERR_INVALID_MEMORY_PERMISSIONS; } - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system.Kernel(); auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms); auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); @@ -1611,7 +1620,8 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 return RESULT_SUCCESS; } -static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) { +static ResultCode MapTransferMemory(Core::System& system, Handle handle, VAddr address, u64 size, + u32 permission_raw) { LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}", handle, address, size, permission_raw); @@ -1645,7 +1655,7 @@ static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 return ERR_INVALID_STATE; } - const auto& kernel = Core::System::GetInstance().Kernel(); + const auto& kernel = system.Kernel(); const auto* const current_process = kernel.CurrentProcess(); const auto& handle_table = current_process->GetHandleTable(); @@ -1667,7 +1677,8 @@ static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 return transfer_memory->MapMemory(address, size, permissions); } -static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) { +static ResultCode UnmapTransferMemory(Core::System& system, Handle handle, VAddr address, + u64 size) { LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle, address, size); @@ -1692,7 +1703,7 @@ static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) { return ERR_INVALID_ADDRESS_STATE; } - const auto& kernel = Core::System::GetInstance().Kernel(); + const auto& kernel = system.Kernel(); const auto* const current_process = kernel.CurrentProcess(); const auto& handle_table = current_process->GetHandleTable(); @@ -1714,10 +1725,11 @@ static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) { return transfer_memory->UnmapMemory(address, size); } -static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) { +static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, + u64* mask) { LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", @@ -1731,11 +1743,12 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) return RESULT_SUCCESS; } -static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) { +static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core, + u64 mask) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle, mask, core); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", @@ -1780,8 +1793,8 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) { return RESULT_SUCCESS; } -static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permissions, - u32 remote_permissions) { +static ResultCode CreateSharedMemory(Core::System& system, Handle* handle, u64 size, + u32 local_permissions, u32 remote_permissions) { LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size, local_permissions, remote_permissions); if (size == 0) { @@ -1817,7 +1830,7 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss return ERR_INVALID_MEMORY_PERMISSIONS; } - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system.Kernel(); auto process = kernel.CurrentProcess(); auto& handle_table = process->GetHandleTable(); auto shared_mem_handle = SharedMemory::Create(kernel, process, size, local_perms, remote_perms); @@ -1826,10 +1839,10 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss return RESULT_SUCCESS; } -static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) { +static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) { LOG_DEBUG(Kernel_SVC, "called"); - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system.Kernel(); const auto [readable_event, writable_event] = WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent"); @@ -1854,10 +1867,10 @@ static ResultCode CreateEvent(Handle* write_handle, Handle* read_handle) { return RESULT_SUCCESS; } -static ResultCode ClearEvent(Handle handle) { +static ResultCode ClearEvent(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle); - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); auto writable_event = handle_table.Get<WritableEvent>(handle); if (writable_event) { @@ -1875,10 +1888,10 @@ static ResultCode ClearEvent(Handle handle) { return ERR_INVALID_HANDLE; } -static ResultCode SignalEvent(Handle handle) { +static ResultCode SignalEvent(Core::System& system, Handle handle) { LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle); - HandleTable& handle_table = Core::CurrentProcess()->GetHandleTable(); + HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); auto writable_event = handle_table.Get<WritableEvent>(handle); if (!writable_event) { @@ -1890,7 +1903,7 @@ static ResultCode SignalEvent(Handle handle) { return RESULT_SUCCESS; } -static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { +static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type); // This function currently only allows retrieving a process' status. @@ -1898,7 +1911,7 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { Status, }; - const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const auto process = handle_table.Get<Process>(process_handle); if (!process) { LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", @@ -1916,10 +1929,10 @@ static ResultCode GetProcessInfo(u64* out, Handle process_handle, u32 type) { return RESULT_SUCCESS; } -static ResultCode CreateResourceLimit(Handle* out_handle) { +static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) { LOG_DEBUG(Kernel_SVC, "called"); - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system.Kernel(); auto resource_limit = ResourceLimit::Create(kernel); auto* const current_process = kernel.CurrentProcess(); @@ -1934,11 +1947,11 @@ static ResultCode CreateResourceLimit(Handle* out_handle) { return RESULT_SUCCESS; } -static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_limit, - u32 resource_type) { +static ResultCode GetResourceLimitLimitValue(Core::System& system, u64* out_value, + Handle resource_limit, u32 resource_type) { LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); - const auto limit_value = RetrieveResourceLimitValue(resource_limit, resource_type, + const auto limit_value = RetrieveResourceLimitValue(system, resource_limit, resource_type, ResourceLimitValueType::LimitValue); if (limit_value.Failed()) { return limit_value.Code(); @@ -1948,11 +1961,11 @@ static ResultCode GetResourceLimitLimitValue(u64* out_value, Handle resource_lim return RESULT_SUCCESS; } -static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_limit, - u32 resource_type) { +static ResultCode GetResourceLimitCurrentValue(Core::System& system, u64* out_value, + Handle resource_limit, u32 resource_type) { LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}", resource_limit, resource_type); - const auto current_value = RetrieveResourceLimitValue(resource_limit, resource_type, + const auto current_value = RetrieveResourceLimitValue(system, resource_limit, resource_type, ResourceLimitValueType::CurrentValue); if (current_value.Failed()) { return current_value.Code(); @@ -1962,7 +1975,8 @@ static ResultCode GetResourceLimitCurrentValue(u64* out_value, Handle resource_l return RESULT_SUCCESS; } -static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource_type, u64 value) { +static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resource_limit, + u32 resource_type, u64 value) { LOG_DEBUG(Kernel_SVC, "called. Handle={:08X}, Resource type={}, Value={}", resource_limit, resource_type, value); @@ -1972,8 +1986,7 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource return ERR_INVALID_ENUM_VALUE; } - auto& kernel = Core::System::GetInstance().Kernel(); - auto* const current_process = kernel.CurrentProcess(); + auto* const current_process = system.Kernel().CurrentProcess(); ASSERT(current_process != nullptr); auto resource_limit_object = @@ -1997,8 +2010,8 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource return RESULT_SUCCESS; } -static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids, - u32 out_process_ids_size) { +static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, + VAddr out_process_ids, u32 out_process_ids_size) { LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}", out_process_ids, out_process_ids_size); @@ -2010,7 +2023,7 @@ static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids, return ERR_OUT_OF_RANGE; } - const auto& kernel = Core::System::GetInstance().Kernel(); + const auto& kernel = system.Kernel(); const auto& vm_manager = kernel.CurrentProcess()->VMManager(); const auto total_copy_size = out_process_ids_size * sizeof(u64); @@ -2034,8 +2047,8 @@ static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids, return RESULT_SUCCESS; } -ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thread_ids_size, - Handle debug_handle) { +ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids, + u32 out_thread_ids_size, Handle debug_handle) { // TODO: Handle this case when debug events are supported. UNIMPLEMENTED_IF(debug_handle != InvalidHandle); @@ -2049,7 +2062,7 @@ ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thr return ERR_OUT_OF_RANGE; } - const auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess(); + const auto* const current_process = system.Kernel().CurrentProcess(); const auto& vm_manager = current_process->VMManager(); const auto total_copy_size = out_thread_ids_size * sizeof(u64); @@ -2076,7 +2089,7 @@ ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thr namespace { struct FunctionDef { - using Func = void(); + using Func = void(Core::System&); u32 id; Func* func; @@ -2225,7 +2238,7 @@ static const FunctionDef* GetSVCInfo(u32 func_num) { MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); -void CallSVC(u32 immediate) { +void CallSVC(Core::System& system, u32 immediate) { MICROPROFILE_SCOPE(Kernel_SVC); // Lock the global kernel mutex when we enter the kernel HLE. @@ -2234,7 +2247,7 @@ void CallSVC(u32 immediate) { const FunctionDef* info = GetSVCInfo(immediate); if (info) { if (info->func) { - info->func(); + info->func(system); } else { LOG_CRITICAL(Kernel_SVC, "Unimplemented SVC function {}(..)", info->name); } diff --git a/src/core/hle/kernel/svc.h b/src/core/hle/kernel/svc.h index c37ae0f98..c5539ac1c 100644 --- a/src/core/hle/kernel/svc.h +++ b/src/core/hle/kernel/svc.h @@ -6,8 +6,12 @@ #include "common/common_types.h" +namespace Core { +class System; +} + namespace Kernel { -void CallSVC(u32 immediate); +void CallSVC(Core::System& system, u32 immediate); } // namespace Kernel diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index b3733680f..b3690b5f3 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -11,278 +11,312 @@ namespace Kernel { -static inline u64 Param(int n) { - return Core::CurrentArmInterface().GetReg(n); +static inline u64 Param(const Core::System& system, int n) { + return system.CurrentArmInterface().GetReg(n); } /** * HLE a function return from the current ARM userland process - * @param res Result to return + * @param system System context + * @param result Result to return */ -static inline void FuncReturn(u64 res) { - Core::CurrentArmInterface().SetReg(0, res); +static inline void FuncReturn(Core::System& system, u64 result) { + system.CurrentArmInterface().SetReg(0, result); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type ResultCode -template <ResultCode func(u64)> -void SvcWrap() { - FuncReturn(func(Param(0)).raw); +template <ResultCode func(Core::System&, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0)).raw); } -template <ResultCode func(u32)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0))).raw); +template <ResultCode func(Core::System&, u32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); } -template <ResultCode func(u32, u32)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw); +template <ResultCode func(Core::System&, u32, u32)> +void SvcWrap(Core::System& system) { + FuncReturn( + system, + func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); } -template <ResultCode func(u32*)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*)> +void SvcWrap(Core::System& system) { u32 param = 0; - const u32 retval = func(¶m).raw; - Core::CurrentArmInterface().SetReg(1, param); - FuncReturn(retval); + const u32 retval = func(system, ¶m).raw; + system.CurrentArmInterface().SetReg(1, param); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u32)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - u32 retval = func(¶m_1, static_cast<u32>(Param(1))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u32*)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u32*)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; u32 param_2 = 0; - const u32 retval = func(¶m_1, ¶m_2).raw; + const u32 retval = func(system, ¶m_1, ¶m_2).raw; - auto& arm_interface = Core::CurrentArmInterface(); + auto& arm_interface = system.CurrentArmInterface(); arm_interface.SetReg(1, param_1); arm_interface.SetReg(2, param_2); - FuncReturn(retval); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u64)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u64)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - const u32 retval = func(¶m_1, Param(1)).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u64, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u64, u32)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - const u32 retval = func(¶m_1, Param(1), static_cast<u32>(Param(2))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = + func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u64*, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u64*, u32)> +void SvcWrap(Core::System& system) { u64 param_1 = 0; - const u32 retval = func(¶m_1, static_cast<u32>(Param(1))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u64, s32)> -void SvcWrap() { - FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw); +template <ResultCode func(Core::System&, u64, s32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw); } -template <ResultCode func(u64, u32)> -void SvcWrap() { - FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw); +template <ResultCode func(Core::System&, u64, u32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); } -template <ResultCode func(u64*, u64)> -void SvcWrap() { +template <ResultCode func(Core::System&, u64*, u64)> +void SvcWrap(Core::System& system) { u64 param_1 = 0; - u32 retval = func(¶m_1, Param(1)).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u64*, u32, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u64*, u32, u32)> +void SvcWrap(Core::System& system) { u64 param_1 = 0; - u32 retval = func(¶m_1, static_cast<u32>(Param(1)), static_cast<u32>(Param(2))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1)), + static_cast<u32>(Param(system, 2))) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u32, u64)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw); +template <ResultCode func(Core::System&, u32, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); } -template <ResultCode func(u32, u32, u64)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw); +template <ResultCode func(Core::System&, u32, u32, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), + static_cast<u32>(Param(system, 1)), Param(system, 2)) + .raw); } -template <ResultCode func(u32, u32*, u64*)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32, u32*, u64*)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; u64 param_2 = 0; - ResultCode retval = func(static_cast<u32>(Param(2)), ¶m_1, ¶m_2); - Core::CurrentArmInterface().SetReg(1, param_1); - Core::CurrentArmInterface().SetReg(2, param_2); - FuncReturn(retval.raw); -} + const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), ¶m_1, ¶m_2); -template <ResultCode func(u64, u64, u32, u32)> -void SvcWrap() { - FuncReturn( - func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw); + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval.raw); } -template <ResultCode func(u64, u64, u32, u64)> -void SvcWrap() { - FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2)), Param(3)).raw); +template <ResultCode func(Core::System&, u64, u64, u32, u32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1), + static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) + .raw); } -template <ResultCode func(u32, u64, u32)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw); +template <ResultCode func(Core::System&, u64, u64, u32, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1), + static_cast<u32>(Param(system, 2)), Param(system, 3)) + .raw); } -template <ResultCode func(u64, u64, u64)> -void SvcWrap() { - FuncReturn(func(Param(0), Param(1), Param(2)).raw); +template <ResultCode func(Core::System&, u32, u64, u32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), + static_cast<u32>(Param(system, 2))) + .raw); } -template <ResultCode func(u64, u64, u32)> -void SvcWrap() { - FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2))).raw); +template <ResultCode func(Core::System&, u64, u64, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); } -template <ResultCode func(u32, u64, u64, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u64, u64, u32)> +void SvcWrap(Core::System& system) { FuncReturn( - func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw); + system, + func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); +} + +template <ResultCode func(Core::System&, u32, u64, u64, u32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), + Param(system, 2), static_cast<u32>(Param(system, 3))) + .raw); } -template <ResultCode func(u32, u64, u64)> -void SvcWrap() { - FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw); +template <ResultCode func(Core::System&, u32, u64, u64)> +void SvcWrap(Core::System& system) { + FuncReturn( + system, + func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); } -template <ResultCode func(u32*, u64, u64, s64)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u64, u64, s64)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - ResultCode retval = - func(¶m_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))); - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval.raw); + const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), + static_cast<s64>(Param(system, 3))) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u64, u64, u32, s64)> -void SvcWrap() { - FuncReturn( - func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw); +template <ResultCode func(Core::System&, u64, u64, u32, s64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1), + static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) + .raw); } -template <ResultCode func(u64*, u64, u64, u64)> -void SvcWrap() { +template <ResultCode func(Core::System&, u64*, u64, u64, u64)> +void SvcWrap(Core::System& system) { u64 param_1 = 0; - u32 retval = func(¶m_1, Param(1), Param(2), Param(3)).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = + func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u64, u64, u64, u32, s32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - u32 retval = func(¶m_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)), - static_cast<s32>(Param(5))) - .raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3), + static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u32*, u64, u64, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, u32*, u64, u64, u32)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - u32 retval = func(¶m_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), + static_cast<u32>(Param(system, 3))) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(Handle*, u64, u32, u32)> -void SvcWrap() { +template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> +void SvcWrap(Core::System& system) { u32 param_1 = 0; - u32 retval = - func(¶m_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw; - Core::CurrentArmInterface().SetReg(1, param_1); - FuncReturn(retval); + const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), + static_cast<u32>(Param(system, 3))) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); } -template <ResultCode func(u64, u32, s32, s64)> -void SvcWrap() { - FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)), - static_cast<s64>(Param(3))) - .raw); +template <ResultCode func(Core::System&, u64, u32, s32, s64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) + .raw); } -template <ResultCode func(u64, u32, s32, s32)> -void SvcWrap() { - FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)), - static_cast<s32>(Param(3))) - .raw); +template <ResultCode func(Core::System&, u64, u32, s32, s32)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), + static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) + .raw); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type u32 -template <u32 func()> -void SvcWrap() { - FuncReturn(func()); +template <u32 func(Core::System&)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system)); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type u64 -template <u64 func()> -void SvcWrap() { - FuncReturn(func()); +template <u64 func(Core::System&)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system)); } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Function wrappers that return type void -template <void func()> -void SvcWrap() { - func(); +template <void func(Core::System&)> +void SvcWrap(Core::System& system) { + func(system); } -template <void func(s64)> -void SvcWrap() { - func(static_cast<s64>(Param(0))); +template <void func(Core::System&, s64)> +void SvcWrap(Core::System& system) { + func(system, static_cast<s64>(Param(system, 0))); } -template <void func(u64, u64 len)> -void SvcWrap() { - func(Param(0), Param(1)); +template <void func(Core::System&, u64, u64)> +void SvcWrap(Core::System& system) { + func(system, Param(system, 0), Param(system, 1)); } -template <void func(u64, u64, u64)> -void SvcWrap() { - func(Param(0), Param(1), Param(2)); +template <void func(Core::System&, u64, u64, u64)> +void SvcWrap(Core::System& system) { + func(system, Param(system, 0), Param(system, 1), Param(system, 2)); } -template <void func(u32, u64, u64)> -void SvcWrap() { - func(static_cast<u32>(Param(0)), Param(1), Param(2)); +template <void func(Core::System&, u32, u64, u64)> +void SvcWrap(Core::System& system) { + func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); } } // namespace Kernel diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 73e5d1bb4..83c83e45a 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -106,7 +106,7 @@ public: return "Thread"; } - static const HandleType HANDLE_TYPE = HandleType::Thread; + static constexpr HandleType HANDLE_TYPE = HandleType::Thread; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/core/hle/kernel/writable_event.h b/src/core/hle/kernel/writable_event.h index c9068dd3d..d00c92a6b 100644 --- a/src/core/hle/kernel/writable_event.h +++ b/src/core/hle/kernel/writable_event.h @@ -37,7 +37,7 @@ public: return name; } - static const HandleType HANDLE_TYPE = HandleType::WritableEvent; + static constexpr HandleType HANDLE_TYPE = HandleType::WritableEvent; HandleType GetHandleType() const override { return HANDLE_TYPE; } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 242a0d1cd..114bed20d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -106,6 +106,8 @@ add_library(video_core STATIC textures/decoders.cpp textures/decoders.h textures/texture.h + texture_cache.cpp + texture_cache.h video_core.cpp video_core.h ) @@ -127,12 +129,14 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_sampler_cache.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_shader_decompiler.cpp + renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h) - target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) + target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) endif() @@ -140,3 +144,6 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad) +if (ENABLE_VULKAN) + target_link_libraries(video_core PRIVATE sirit) +endif() diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7ff1e6737..d250d5cbb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -299,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { BaseBindings base_bindings; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + // Prepare packed bindings + bind_ubo_pushbuffer.Setup(base_bindings.cbuf); + bind_ssbo_pushbuffer.Setup(base_bindings.gmem); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -321,8 +325,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); // Bind the emulation info buffer - glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, + static_cast<GLsizeiptr>(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; const auto [program_handle, next_bindings] = @@ -366,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { base_bindings = next_bindings; } + bind_ubo_pushbuffer.Bind(); + bind_ssbo_pushbuffer.Bind(); + SyncClipEnabled(clip_distances); gpu.dirty_flags.shaders = false; @@ -900,23 +907,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; const auto& entries = shader->GetShaderEntries().const_buffers; - constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; - std::array<GLuint, max_binds> bind_buffers; - std::array<GLintptr, max_binds> bind_offsets; - std::array<GLsizeiptr, max_binds> bind_sizes; - - ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points."); - // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; if (!buffer.enabled) { - // With disabled buffers set values as zero to unbind them - bind_buffers[bindpoint] = 0; - bind_offsets[bindpoint] = 0; - bind_sizes[bindpoint] = 0; + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); continue; } @@ -944,30 +942,19 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const GLintptr const_buffer_offset = buffer_cache.UploadMemory( buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); - // Prepare values for multibind - bind_buffers[bindpoint] = buffer_cache.GetHandle(); - bind_offsets[bindpoint] = const_buffer_offset; - bind_sizes[bindpoint] = size; + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); } - - // The first binding is reserved for emulation values - const GLuint ubo_base_binding = base_bindings.cbuf + 1; - glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), - bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLenum primitive_mode, BaseBindings base_bindings) { - // TODO(Rodrigo): Use ARB_multi_bind here const auto& entries = shader->GetShaderEntries().global_memory_entries; - - for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { - const auto& entry = entries[bindpoint]; - const u32 current_bindpoint = base_bindings.gmem + bindpoint; - const auto& region = global_cache.GetGlobalRegion(entry, stage); - - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); + for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry{entries[bindpoint]}; + const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, + static_cast<GLsizeiptr>(region->GetSizeInBytes())); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 54fbf48aa..e4c64ae71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,6 +28,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/utils.h" namespace Core { class System; @@ -229,6 +230,9 @@ private: PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; + BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; + BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index aa6da1944..55b6d8591 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -266,6 +266,10 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; + if (!params.is_tiled) { + const u32 bpp = params.GetFormatBpp() / 8; + params.pitch = config.width * bpp; + } params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; @@ -1175,10 +1179,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } + const bool old_compressed = + GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; + const bool new_compressed = + GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; + const bool compatible_formats = + GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && + !(old_compressed || new_compressed); // For compatible surfaces, we can just do fast glCopyImageSubData based copy - if (old_params.target == new_params.target && old_params.type == new_params.type && - old_params.depth == new_params.depth && old_params.depth == 1 && - GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) { + if (old_params.target == new_params.target && old_params.depth == new_params.depth && + old_params.depth == 1 && compatible_formats) { FastCopySurface(old_surface, new_surface); return new_surface; } @@ -1193,7 +1203,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - if (old_params.pixel_format == new_params.pixel_format) + if (compatible_formats) FastLayeredCopySurface(old_surface, new_surface); else { AccurateCopySurface(old_surface, new_surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3ea08ef7b..28e490b3c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -552,8 +552,7 @@ private: } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + - std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); + code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);"); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), final_offset, final_offset); diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index d84634cb3..84a987371 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -5,11 +5,39 @@ #include <string> #include <fmt/format.h> #include <glad/glad.h> +#include "common/assert.h" #include "common/common_types.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { +BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} + +BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; + +void BindBuffersRangePushBuffer::Setup(GLuint first_) { + first = first_; + buffers.clear(); + offsets.clear(); + sizes.clear(); +} + +void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { + buffers.push_back(buffer); + offsets.push_back(offset); + sizes.push_back(size); +} + +void BindBuffersRangePushBuffer::Bind() const { + const std::size_t count{buffers.size()}; + DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); + if (count == 0) { + return; + } + glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), + sizes.data()); +} + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { if (!GLAD_GL_KHR_debug) { return; // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 1fcb6fc11..aef45c9dc 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -5,11 +5,31 @@ #pragma once #include <string> +#include <vector> #include <glad/glad.h> #include "common/common_types.h" namespace OpenGL { +class BindBuffersRangePushBuffer { +public: + BindBuffersRangePushBuffer(GLenum target); + ~BindBuffersRangePushBuffer(); + + void Setup(GLuint first_); + + void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); + + void Bind() const; + +private: + GLenum target; + GLuint first; + std::vector<GLuint> buffers; + std::vector<GLintptr> offsets; + std::vector<GLsizeiptr> sizes; +}; + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); } // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp new file mode 100644 index 000000000..e0a6f5e87 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -0,0 +1,1379 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <functional> +#include <map> +#include <set> + +#include <fmt/format.h> + +#include <sirit/sirit.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_header.h" +#include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/shader/shader_ir.h" + +namespace Vulkan::VKShader { + +using Sirit::Id; +using Tegra::Shader::Attribute; +using Tegra::Shader::AttributeUse; +using Tegra::Shader::Register; +using namespace VideoCommon::Shader; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; +using Operation = const OperationNode&; + +// TODO(Rodrigo): Use rasterizer's value +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; +constexpr u32 STAGE_BINDING_STRIDE = 0x100; + +enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; + +struct SamplerImage { + Id image_type; + Id sampled_image_type; + Id sampler; +}; + +namespace { + +spv::Dim GetSamplerDim(const Sampler& sampler) { + switch (sampler.GetType()) { + case Tegra::Shader::TextureType::Texture1D: + return spv::Dim::Dim1D; + case Tegra::Shader::TextureType::Texture2D: + return spv::Dim::Dim2D; + case Tegra::Shader::TextureType::Texture3D: + return spv::Dim::Dim3D; + case Tegra::Shader::TextureType::TextureCube: + return spv::Dim::Cube; + default: + UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<u32>(sampler.GetType())); + return spv::Dim::Dim2D; + } +} + +/// Returns true if an attribute index is one of the 32 generic attributes +constexpr bool IsGenericAttribute(Attribute::Index attribute) { + return attribute >= Attribute::Index::Attribute_0 && + attribute <= Attribute::Index::Attribute_31; +} + +/// Returns the location of a generic attribute +constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) { + ASSERT(IsGenericAttribute(attribute)); + return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0); +} + +/// Returns true if an object has to be treated as precise +bool IsPrecise(Operation operand) { + const auto& meta = operand.GetMeta(); + + if (std::holds_alternative<MetaArithmetic>(meta)) { + return std::get<MetaArithmetic>(meta).precise; + } + if (std::holds_alternative<MetaHalfArithmetic>(meta)) { + return std::get<MetaHalfArithmetic>(meta).precise; + } + return false; +} + +} // namespace + +class SPIRVDecompiler : public Sirit::Module { +public: + explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) + : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { + AddCapability(spv::Capability::Shader); + AddExtension("SPV_KHR_storage_buffer_storage_class"); + AddExtension("SPV_KHR_variable_pointers"); + } + + void Decompile() { + AllocateBindings(); + AllocateLabels(); + + DeclareVertex(); + DeclareGeometry(); + DeclareFragment(); + DeclareRegisters(); + DeclarePredicates(); + DeclareLocalMemory(); + DeclareInternalFlags(); + DeclareInputAttributes(); + DeclareOutputAttributes(); + DeclareConstantBuffers(); + DeclareGlobalBuffers(); + DeclareSamplers(); + + execute_function = + Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void))); + Emit(OpLabel()); + + const u32 first_address = ir.GetBasicBlocks().begin()->first; + const Id loop_label = OpLabel("loop"); + const Id merge_label = OpLabel("merge"); + const Id dummy_label = OpLabel(); + const Id jump_label = OpLabel(); + continue_label = OpLabel("continue"); + + std::vector<Sirit::Literal> literals; + std::vector<Id> branch_labels; + for (const auto& pair : labels) { + const auto [literal, label] = pair; + literals.push_back(literal); + branch_labels.push_back(label); + } + + // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely + // that shaders will use 20 nested SSYs and PBKs. + constexpr u32 FLOW_STACK_SIZE = 20; + const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); + jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), + spv::StorageClass::Function, Constant(t_uint, first_address))); + flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), + spv::StorageClass::Function, ConstantNull(flow_stack_type))); + flow_stack_top = + Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); + + Name(jmp_to, "jmp_to"); + Name(flow_stack, "flow_stack"); + Name(flow_stack_top, "flow_stack_top"); + + Emit(OpBranch(loop_label)); + Emit(loop_label); + Emit(OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::Unroll)); + Emit(OpBranch(dummy_label)); + + Emit(dummy_label); + const Id default_branch = OpLabel(); + const Id jmp_to_load = Emit(OpLoad(t_uint, jmp_to)); + Emit(OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone)); + Emit(OpSwitch(jmp_to_load, default_branch, literals, branch_labels)); + + Emit(default_branch); + Emit(OpReturn()); + + for (const auto& pair : ir.GetBasicBlocks()) { + const auto& [address, bb] = pair; + Emit(labels.at(address)); + + VisitBasicBlock(bb); + + const auto next_it = labels.lower_bound(address + 1); + const Id next_label = next_it != labels.end() ? next_it->second : default_branch; + Emit(OpBranch(next_label)); + } + + Emit(jump_label); + Emit(OpBranch(continue_label)); + Emit(continue_label); + Emit(OpBranch(loop_label)); + Emit(merge_label); + Emit(OpReturn()); + Emit(OpFunctionEnd()); + } + + ShaderEntries GetShaderEntries() const { + ShaderEntries entries; + entries.const_buffers_base_binding = const_buffers_base_binding; + entries.global_buffers_base_binding = global_buffers_base_binding; + entries.samplers_base_binding = samplers_base_binding; + for (const auto& cbuf : ir.GetConstantBuffers()) { + entries.const_buffers.emplace_back(cbuf.second, cbuf.first); + } + for (const auto& gmem : ir.GetGlobalMemoryBases()) { + entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + } + for (const auto& sampler : ir.GetSamplers()) { + entries.samplers.emplace_back(sampler); + } + for (const auto& attr : ir.GetInputAttributes()) { + entries.attributes.insert(GetGenericAttributeLocation(attr.first)); + } + entries.clip_distances = ir.GetClipDistances(); + entries.shader_length = ir.GetLength(); + entries.entry_function = execute_function; + entries.interfaces = interfaces; + return entries; + } + +private: + using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation); + using OperationDecompilersArray = + std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; + + static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); + static constexpr u32 CBUF_STRIDE = 16; + + void AllocateBindings() { + const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; + u32 binding_iterator = binding_base; + + const auto Allocate = [&binding_iterator](std::size_t count) { + const u32 current_binding = binding_iterator; + binding_iterator += static_cast<u32>(count); + return current_binding; + }; + const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size()); + global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size()); + samplers_base_binding = Allocate(ir.GetSamplers().size()); + + ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE, + "Stage binding stride is too small"); + } + + void AllocateLabels() { + for (const auto& pair : ir.GetBasicBlocks()) { + const u32 address = pair.first; + labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); + } + } + + void DeclareVertex() { + if (stage != ShaderStage::Vertex) + return; + + DeclareVertexRedeclarations(); + } + + void DeclareGeometry() { + if (stage != ShaderStage::Geometry) + return; + + UNIMPLEMENTED(); + } + + void DeclareFragment() { + if (stage != ShaderStage::Fragment) + return; + + for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { + if (!IsRenderTargetUsed(rt)) { + continue; + } + + const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); + Name(id, fmt::format("frag_color{}", rt)); + Decorate(id, spv::Decoration::Location, rt); + + frag_colors[rt] = id; + interfaces.push_back(id); + } + + if (header.ps.omap.depth) { + frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); + Name(frag_depth, "frag_depth"); + Decorate(frag_depth, spv::Decoration::BuiltIn, + static_cast<u32>(spv::BuiltIn::FragDepth)); + + interfaces.push_back(frag_depth); + } + + frag_coord = DeclareBuiltIn(spv::BuiltIn::FragCoord, spv::StorageClass::Input, t_in_float4, + "frag_coord"); + front_facing = DeclareBuiltIn(spv::BuiltIn::FrontFacing, spv::StorageClass::Input, + t_in_bool, "front_facing"); + } + + void DeclareRegisters() { + for (const u32 gpr : ir.GetRegisters()) { + const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); + Name(id, fmt::format("gpr_{}", gpr)); + registers.emplace(gpr, AddGlobalVariable(id)); + } + } + + void DeclarePredicates() { + for (const auto pred : ir.GetPredicates()) { + const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); + Name(id, fmt::format("pred_{}", static_cast<u32>(pred))); + predicates.emplace(pred, AddGlobalVariable(id)); + } + } + + void DeclareLocalMemory() { + if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { + const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4); + const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); + const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); + Name(type_pointer, "LocalMemory"); + + local_memory = + OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); + AddGlobalVariable(Name(local_memory, "local_memory")); + } + } + + void DeclareInternalFlags() { + constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", + "overflow"}; + for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { + const auto flag_code = static_cast<InternalFlag>(flag); + const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); + internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); + } + } + + void DeclareInputAttributes() { + for (const auto element : ir.GetInputAttributes()) { + const Attribute::Index index = element.first; + if (!IsGenericAttribute(index)) { + continue; + } + + UNIMPLEMENTED_IF(stage == ShaderStage::Geometry); + + const u32 location = GetGenericAttributeLocation(index); + const Id id = OpVariable(t_in_float4, spv::StorageClass::Input); + Name(AddGlobalVariable(id), fmt::format("in_attr{}", location)); + input_attributes.emplace(index, id); + interfaces.push_back(id); + + Decorate(id, spv::Decoration::Location, location); + + if (stage != ShaderStage::Fragment) { + continue; + } + switch (header.ps.GetAttributeUse(location)) { + case AttributeUse::Constant: + Decorate(id, spv::Decoration::Flat); + break; + case AttributeUse::ScreenLinear: + Decorate(id, spv::Decoration::NoPerspective); + break; + case AttributeUse::Perspective: + // Default + break; + default: + UNREACHABLE_MSG("Unused attribute being fetched"); + } + } + } + + void DeclareOutputAttributes() { + for (const auto index : ir.GetOutputAttributes()) { + if (!IsGenericAttribute(index)) { + continue; + } + const auto location = GetGenericAttributeLocation(index); + const Id id = OpVariable(t_out_float4, spv::StorageClass::Output); + Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); + output_attributes.emplace(index, id); + interfaces.push_back(id); + + Decorate(id, spv::Decoration::Location, location); + } + } + + void DeclareConstantBuffers() { + u32 binding = const_buffers_base_binding; + for (const auto& entry : ir.GetConstantBuffers()) { + const auto [index, size] = entry; + const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); + AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); + + Decorate(id, spv::Decoration::Binding, binding++); + Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); + constant_buffers.emplace(index, id); + } + } + + void DeclareGlobalBuffers() { + u32 binding = global_buffers_base_binding; + for (const auto& entry : ir.GetGlobalMemoryBases()) { + const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); + AddGlobalVariable( + Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset))); + + Decorate(id, spv::Decoration::Binding, binding++); + Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); + global_buffers.emplace(entry, id); + } + } + + void DeclareSamplers() { + u32 binding = samplers_base_binding; + for (const auto& sampler : ir.GetSamplers()) { + const auto dim = GetSamplerDim(sampler); + const int depth = sampler.IsShadow() ? 1 : 0; + const int arrayed = sampler.IsArray() ? 1 : 0; + // TODO(Rodrigo): Sampled 1 indicates that the image will be used with a sampler. When + // SULD and SUST instructions are implemented, replace this value. + const int sampled = 1; + const Id image_type = + TypeImage(t_float, dim, depth, arrayed, false, sampled, spv::ImageFormat::Unknown); + const Id sampled_image_type = TypeSampledImage(image_type); + const Id pointer_type = + TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); + const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); + AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); + + sampler_images.insert( + {static_cast<u32>(sampler.GetIndex()), {image_type, sampled_image_type, id}}); + + Decorate(id, spv::Decoration::Binding, binding++); + Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); + } + } + + void DeclareVertexRedeclarations() { + vertex_index = DeclareBuiltIn(spv::BuiltIn::VertexIndex, spv::StorageClass::Input, + t_in_uint, "vertex_index"); + instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, + t_in_uint, "instance_index"); + + bool is_point_size_declared = false; + bool is_clip_distances_declared = false; + for (const auto index : ir.GetOutputAttributes()) { + if (index == Attribute::Index::PointSize) { + is_point_size_declared = true; + } else if (index == Attribute::Index::ClipDistances0123 || + index == Attribute::Index::ClipDistances4567) { + is_clip_distances_declared = true; + } + } + + std::vector<Id> members; + members.push_back(t_float4); + if (is_point_size_declared) { + members.push_back(t_float); + } + if (is_clip_distances_declared) { + members.push_back(TypeArray(t_float, Constant(t_uint, 8))); + } + + const Id gl_per_vertex_struct = Name(TypeStruct(members), "PerVertex"); + Decorate(gl_per_vertex_struct, spv::Decoration::Block); + + u32 declaration_index = 0; + const auto MemberDecorateBuiltIn = [&](spv::BuiltIn builtin, std::string name, + bool condition) { + if (!condition) + return u32{}; + MemberName(gl_per_vertex_struct, declaration_index, name); + MemberDecorate(gl_per_vertex_struct, declaration_index, spv::Decoration::BuiltIn, + static_cast<u32>(builtin)); + return declaration_index++; + }; + + position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); + point_size_index = + MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); + clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", + is_clip_distances_declared); + + const Id type_pointer = TypePointer(spv::StorageClass::Output, gl_per_vertex_struct); + per_vertex = OpVariable(type_pointer, spv::StorageClass::Output); + AddGlobalVariable(Name(per_vertex, "per_vertex")); + interfaces.push_back(per_vertex); + } + + void VisitBasicBlock(const NodeBlock& bb) { + for (const Node node : bb) { + static_cast<void>(Visit(node)); + } + } + + Id Visit(Node node) { + if (const auto operation = std::get_if<OperationNode>(node)) { + const auto operation_index = static_cast<std::size_t>(operation->GetCode()); + const auto decompiler = operation_decompilers[operation_index]; + if (decompiler == nullptr) { + UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); + } + return (this->*decompiler)(*operation); + + } else if (const auto gpr = std::get_if<GprNode>(node)) { + const u32 index = gpr->GetIndex(); + if (index == Register::ZeroIndex) { + return Constant(t_float, 0.0f); + } + return Emit(OpLoad(t_float, registers.at(index))); + + } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue())); + + } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + const auto value = [&]() -> Id { + switch (const auto index = predicate->GetIndex(); index) { + case Tegra::Shader::Pred::UnusedIndex: + return v_true; + case Tegra::Shader::Pred::NeverExecute: + return v_false; + default: + return Emit(OpLoad(t_bool, predicates.at(index))); + } + }(); + if (predicate->IsNegated()) { + return Emit(OpLogicalNot(t_bool, value)); + } + return value; + + } else if (const auto abuf = std::get_if<AbufNode>(node)) { + const auto attribute = abuf->GetIndex(); + const auto element = abuf->GetElement(); + + switch (attribute) { + case Attribute::Index::Position: + if (stage != ShaderStage::Fragment) { + UNIMPLEMENTED(); + break; + } else { + if (element == 3) { + return Constant(t_float, 1.0f); + } + return Emit(OpLoad(t_float, AccessElement(t_in_float, frag_coord, element))); + } + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == ShaderStage::Vertex); + switch (element) { + case 2: + return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index))); + case 3: + return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, vertex_index))); + } + UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); + return Constant(t_float, 0); + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == ShaderStage::Fragment); + if (element == 3) { + const Id is_front_facing = Emit(OpLoad(t_bool, front_facing)); + const Id true_value = + BitcastTo<Type::Float>(Constant(t_int, static_cast<s32>(-1))); + const Id false_value = BitcastTo<Type::Float>(Constant(t_int, 0)); + return Emit(OpSelect(t_float, is_front_facing, true_value, false_value)); + } + UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); + return Constant(t_float, 0.0f); + default: + if (IsGenericAttribute(attribute)) { + const Id pointer = + AccessElement(t_in_float, input_attributes.at(attribute), element); + return Emit(OpLoad(t_float, pointer)); + } + break; + } + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + + } else if (const auto cbuf = std::get_if<CbufNode>(node)) { + const Node offset = cbuf->GetOffset(); + const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); + + Id buffer_index{}; + Id buffer_element{}; + + if (const auto immediate = std::get_if<ImmediateNode>(offset)) { + // Direct access + const u32 offset_imm = immediate->GetValue(); + ASSERT(offset_imm % 4 == 0); + buffer_index = Constant(t_uint, offset_imm / 16); + buffer_element = Constant(t_uint, (offset_imm / 4) % 4); + + } else if (std::holds_alternative<OperationNode>(*offset)) { + // Indirect access + // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which + // emits sub-optimal code on GLSL from my testing). + const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); + const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); + const Id final_offset = Emit( + OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); + buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); + buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); + + } else { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + + const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), + buffer_index, buffer_element)); + return Emit(OpLoad(t_float, pointer)); + + } else if (const auto gmem = std::get_if<GmemNode>(node)) { + const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); + const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress())); + const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress())); + + Id offset = Emit(OpISub(t_uint, real, base)); + offset = Emit(OpUDiv(t_uint, offset, Constant(t_uint, 4u))); + return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer, + Constant(t_uint, 0u), offset)))); + + } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + // It's invalid to call conditional on nested nodes, use an operation instead + const Id true_label = OpLabel(); + const Id skip_label = OpLabel(); + Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); + Emit(true_label); + + VisitBasicBlock(conditional->GetCode()); + + Emit(OpBranch(skip_label)); + Emit(skip_label); + return {}; + + } else if (const auto comment = std::get_if<CommentNode>(node)) { + Name(Emit(OpUndef(t_void)), comment->GetText()); + return {}; + } + + UNREACHABLE(); + return {}; + } + + template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type> + Id Unary(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + const Id op_a = VisitOperand<type_a>(operation, 0); + + const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a))); + if (IsPrecise(operation)) { + Decorate(value, spv::Decoration::NoContraction); + } + return value; + } + + template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type, + Type type_b = type_a> + Id Binary(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + const Id op_a = VisitOperand<type_a>(operation, 0); + const Id op_b = VisitOperand<type_b>(operation, 1); + + const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b))); + if (IsPrecise(operation)) { + Decorate(value, spv::Decoration::NoContraction); + } + return value; + } + + template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type, + Type type_b = type_a, Type type_c = type_b> + Id Ternary(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + const Id op_a = VisitOperand<type_a>(operation, 0); + const Id op_b = VisitOperand<type_b>(operation, 1); + const Id op_c = VisitOperand<type_c>(operation, 2); + + const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c))); + if (IsPrecise(operation)) { + Decorate(value, spv::Decoration::NoContraction); + } + return value; + } + + template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type, + Type type_b = type_a, Type type_c = type_b, Type type_d = type_c> + Id Quaternary(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + const Id op_a = VisitOperand<type_a>(operation, 0); + const Id op_b = VisitOperand<type_b>(operation, 1); + const Id op_c = VisitOperand<type_c>(operation, 2); + const Id op_d = VisitOperand<type_d>(operation, 3); + + const Id value = + BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c, op_d))); + if (IsPrecise(operation)) { + Decorate(value, spv::Decoration::NoContraction); + } + return value; + } + + Id Assign(Operation operation) { + const Node dest = operation[0]; + const Node src = operation[1]; + + Id target{}; + if (const auto gpr = std::get_if<GprNode>(dest)) { + if (gpr->GetIndex() == Register::ZeroIndex) { + // Writing to Register::ZeroIndex is a no op + return {}; + } + target = registers.at(gpr->GetIndex()); + + } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + target = [&]() -> Id { + switch (const auto attribute = abuf->GetIndex(); attribute) { + case Attribute::Index::Position: + return AccessElement(t_out_float, per_vertex, position_index, + abuf->GetElement()); + case Attribute::Index::PointSize: + return AccessElement(t_out_float, per_vertex, point_size_index); + case Attribute::Index::ClipDistances0123: + return AccessElement(t_out_float, per_vertex, clip_distances_index, + abuf->GetElement()); + case Attribute::Index::ClipDistances4567: + return AccessElement(t_out_float, per_vertex, clip_distances_index, + abuf->GetElement() + 4); + default: + if (IsGenericAttribute(attribute)) { + return AccessElement(t_out_float, output_attributes.at(attribute), + abuf->GetElement()); + } + UNIMPLEMENTED_MSG("Unhandled output attribute: {}", + static_cast<u32>(attribute)); + return {}; + } + }(); + + } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress())); + address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4))); + target = Emit(OpAccessChain(t_prv_float, local_memory, {address})); + } + + Emit(OpStore(target, Visit(src))); + return {}; + } + + Id HNegate(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HMergeF32(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HMergeH0(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HMergeH1(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HPack2(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id LogicalAssign(Operation operation) { + const Node dest = operation[0]; + const Node src = operation[1]; + + Id target{}; + if (const auto pred = std::get_if<PredicateNode>(dest)) { + ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); + + const auto index = pred->GetIndex(); + switch (index) { + case Tegra::Shader::Pred::NeverExecute: + case Tegra::Shader::Pred::UnusedIndex: + // Writing to these predicates is a no-op + return {}; + } + target = predicates.at(index); + + } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + target = internal_flags.at(static_cast<u32>(flag->GetFlag())); + } + + Emit(OpStore(target, Visit(src))); + return {}; + } + + Id LogicalPick2(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id LogicalAll2(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id LogicalAny2(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id GetTextureSampler(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex())); + return Emit(OpLoad(entry.sampled_image_type, entry.sampler)); + } + + Id GetTextureImage(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex())); + return Emit(OpImage(entry.image_type, GetTextureSampler(operation))); + } + + Id GetTextureCoordinates(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + std::vector<Id> coords; + for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { + coords.push_back(Visit(operation[i])); + } + if (meta->sampler.IsArray()) { + const Id array_integer = BitcastTo<Type::Int>(Visit(meta->array)); + coords.push_back(Emit(OpConvertSToF(t_float, array_integer))); + } + if (meta->sampler.IsShadow()) { + coords.push_back(Visit(meta->depth_compare)); + } + + const std::array<Id, 4> t_float_lut = {nullptr, t_float2, t_float3, t_float4}; + return coords.size() == 1 + ? coords[0] + : Emit(OpCompositeConstruct(t_float_lut.at(coords.size() - 1), coords)); + } + + Id GetTextureElement(Operation operation, Id sample_value) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + ASSERT(meta); + return Emit(OpCompositeExtract(t_float, sample_value, meta->element)); + } + + Id Texture(Operation operation) { + const Id texture = Emit(OpImageSampleImplicitLod(t_float4, GetTextureSampler(operation), + GetTextureCoordinates(operation))); + return GetTextureElement(operation, texture); + } + + Id TextureLod(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + const Id texture = Emit(OpImageSampleExplicitLod( + t_float4, GetTextureSampler(operation), GetTextureCoordinates(operation), + spv::ImageOperandsMask::Lod, Visit(meta->lod))); + return GetTextureElement(operation, texture); + } + + Id TextureGather(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + const auto coords = GetTextureCoordinates(operation); + + Id texture; + if (meta->sampler.IsShadow()) { + texture = Emit(OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, + Visit(meta->component))); + } else { + u32 component_value = 0; + if (meta->component) { + const auto component = std::get_if<ImmediateNode>(meta->component); + ASSERT_MSG(component, "Component is not an immediate value"); + component_value = component->GetValue(); + } + texture = Emit(OpImageGather(t_float4, GetTextureSampler(operation), coords, + Constant(t_uint, component_value))); + } + + return GetTextureElement(operation, texture); + } + + Id TextureQueryDimensions(Operation operation) { + const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); + const auto image_id = GetTextureImage(operation); + AddCapability(spv::Capability::ImageQuery); + + if (meta->element == 3) { + return BitcastTo<Type::Float>(Emit(OpImageQueryLevels(t_int, image_id))); + } + + const Id lod = VisitOperand<Type::Uint>(operation, 0); + const std::size_t coords_count = [&]() { + switch (const auto type = meta->sampler.GetType(); type) { + case Tegra::Shader::TextureType::Texture1D: + return 1; + case Tegra::Shader::TextureType::Texture2D: + case Tegra::Shader::TextureType::TextureCube: + return 2; + case Tegra::Shader::TextureType::Texture3D: + return 3; + default: + UNREACHABLE_MSG("Invalid texture type={}", static_cast<u32>(type)); + return 2; + } + }(); + + if (meta->element >= coords_count) { + return Constant(t_float, 0.0f); + } + + const std::array<Id, 3> types = {t_int, t_int2, t_int3}; + const Id sizes = Emit(OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod)); + const Id size = Emit(OpCompositeExtract(t_int, sizes, meta->element)); + return BitcastTo<Type::Float>(size); + } + + Id TextureQueryLod(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id TexelFetch(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id Branch(Operation operation) { + const auto target = std::get_if<ImmediateNode>(operation[0]); + UNIMPLEMENTED_IF(!target); + + Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue()))); + BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + return {}; + } + + Id PushFlowStack(Operation operation) { + const auto target = std::get_if<ImmediateNode>(operation[0]); + ASSERT(target); + + const Id current = Emit(OpLoad(t_uint, flow_stack_top)); + const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); + const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); + + Emit(OpStore(access, Constant(t_uint, target->GetValue()))); + Emit(OpStore(flow_stack_top, next)); + return {}; + } + + Id PopFlowStack(Operation operation) { + const Id current = Emit(OpLoad(t_uint, flow_stack_top)); + const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); + const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); + const Id target = Emit(OpLoad(t_uint, access)); + + Emit(OpStore(flow_stack_top, previous)); + Emit(OpStore(jmp_to, target)); + BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + return {}; + } + + Id Exit(Operation operation) { + switch (stage) { + case ShaderStage::Vertex: { + // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't + // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. + const Id position = AccessElement(t_float4, per_vertex, position_index); + Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); + depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); + depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); + Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); + break; + } + case ShaderStage::Fragment: { + const auto SafeGetRegister = [&](u32 reg) { + // TODO(Rodrigo): Replace with contains once C++20 releases + if (const auto it = registers.find(reg); it != registers.end()) { + return Emit(OpLoad(t_float, it->second)); + } + return Constant(t_float, 0.0f); + }; + + UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, + "Sample mask write is unimplemented"); + + // TODO(Rodrigo): Alpha testing + + // Write the color outputs using the data in the shader registers, disabled + // rendertargets/components are skipped in the register assignment. + u32 current_reg = 0; + for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { + // TODO(Subv): Figure out how dual-source blending is configured in the Switch. + for (u32 component = 0; component < 4; ++component) { + if (header.ps.IsColorComponentOutputEnabled(rt, component)) { + Emit(OpStore(AccessElement(t_out_float, frag_colors.at(rt), component), + SafeGetRegister(current_reg))); + ++current_reg; + } + } + } + if (header.ps.omap.depth) { + // The depth output is always 2 registers after the last color output, and + // current_reg already contains one past the last color register. + Emit(OpStore(frag_depth, SafeGetRegister(current_reg + 1))); + } + break; + } + } + + BranchingOp([&]() { Emit(OpReturn()); }); + return {}; + } + + Id Discard(Operation operation) { + BranchingOp([&]() { Emit(OpKill()); }); + return {}; + } + + Id EmitVertex(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id EndPrimitive(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id YNegate(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, + const std::string& name) { + const Id id = OpVariable(type, storage); + Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); + AddGlobalVariable(Name(id, name)); + interfaces.push_back(id); + return id; + } + + bool IsRenderTargetUsed(u32 rt) const { + for (u32 component = 0; component < 4; ++component) { + if (header.ps.IsColorComponentOutputEnabled(rt, component)) { + return true; + } + } + return false; + } + + template <typename... Args> + Id AccessElement(Id pointer_type, Id composite, Args... elements_) { + std::vector<Id> members; + auto elements = {elements_...}; + for (const auto element : elements) { + members.push_back(Constant(t_uint, element)); + } + + return Emit(OpAccessChain(pointer_type, composite, members)); + } + + template <Type type> + Id VisitOperand(Operation operation, std::size_t operand_index) { + const Id value = Visit(operation[operand_index]); + + switch (type) { + case Type::Bool: + case Type::Bool2: + case Type::Float: + return value; + case Type::Int: + return Emit(OpBitcast(t_int, value)); + case Type::Uint: + return Emit(OpBitcast(t_uint, value)); + case Type::HalfFloat: + UNIMPLEMENTED(); + } + UNREACHABLE(); + return value; + } + + template <Type type> + Id BitcastFrom(Id value) { + switch (type) { + case Type::Bool: + case Type::Bool2: + case Type::Float: + return value; + case Type::Int: + case Type::Uint: + return Emit(OpBitcast(t_float, value)); + case Type::HalfFloat: + UNIMPLEMENTED(); + } + UNREACHABLE(); + return value; + } + + template <Type type> + Id BitcastTo(Id value) { + switch (type) { + case Type::Bool: + case Type::Bool2: + UNREACHABLE(); + case Type::Float: + return Emit(OpBitcast(t_float, value)); + case Type::Int: + return Emit(OpBitcast(t_int, value)); + case Type::Uint: + return Emit(OpBitcast(t_uint, value)); + case Type::HalfFloat: + UNIMPLEMENTED(); + } + UNREACHABLE(); + return value; + } + + Id GetTypeDefinition(Type type) { + switch (type) { + case Type::Bool: + return t_bool; + case Type::Bool2: + return t_bool2; + case Type::Float: + return t_float; + case Type::Int: + return t_int; + case Type::Uint: + return t_uint; + case Type::HalfFloat: + UNIMPLEMENTED(); + } + UNREACHABLE(); + return {}; + } + + void BranchingOp(std::function<void()> call) { + const Id true_label = OpLabel(); + const Id skip_label = OpLabel(); + Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten)); + Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0)); + Emit(true_label); + call(); + + Emit(skip_label); + } + + static constexpr OperationDecompilersArray operation_decompilers = { + &SPIRVDecompiler::Assign, + + &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, + Type::Float>, + + &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, + &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, + &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, + &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, + + &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, + &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, + &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, + + &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, + &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, + &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, + &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, + &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, + + &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, + &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, + &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, + &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, + &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, + &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, + + &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, + &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, + &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, + &SPIRVDecompiler::HNegate, + &SPIRVDecompiler::HMergeF32, + &SPIRVDecompiler::HMergeH0, + &SPIRVDecompiler::HMergeH1, + &SPIRVDecompiler::HPack2, + + &SPIRVDecompiler::LogicalAssign, + &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, + &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, + &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, + &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, + &SPIRVDecompiler::LogicalPick2, + &SPIRVDecompiler::LogicalAll2, + &SPIRVDecompiler::LogicalAny2, + + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, + &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool>, + + &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, + &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, + + &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, + &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, + + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, + + &SPIRVDecompiler::Texture, + &SPIRVDecompiler::TextureLod, + &SPIRVDecompiler::TextureGather, + &SPIRVDecompiler::TextureQueryDimensions, + &SPIRVDecompiler::TextureQueryLod, + &SPIRVDecompiler::TexelFetch, + + &SPIRVDecompiler::Branch, + &SPIRVDecompiler::PushFlowStack, + &SPIRVDecompiler::PopFlowStack, + &SPIRVDecompiler::Exit, + &SPIRVDecompiler::Discard, + + &SPIRVDecompiler::EmitVertex, + &SPIRVDecompiler::EndPrimitive, + + &SPIRVDecompiler::YNegate, + }; + + const ShaderIR& ir; + const ShaderStage stage; + const Tegra::Shader::Header header; + + const Id t_void = Name(TypeVoid(), "void"); + + const Id t_bool = Name(TypeBool(), "bool"); + const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); + + const Id t_int = Name(TypeInt(32, true), "int"); + const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); + const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); + const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); + + const Id t_uint = Name(TypeInt(32, false), "uint"); + const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); + const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); + const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); + + const Id t_float = Name(TypeFloat(32), "float"); + const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); + const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); + const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); + + const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); + const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); + + const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); + + const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); + const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); + const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); + const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); + + const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); + const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); + + const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); + const Id t_cbuf_array = + Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), + spv::Decoration::ArrayStride, CBUF_STRIDE); + const Id t_cbuf_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); + + const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_array = + Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4u), "GmemArray"); + const Id t_gmem_struct = MemberDecorate( + Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); + + const Id v_float_zero = Constant(t_float, 0.0f); + const Id v_true = ConstantTrue(t_bool); + const Id v_false = ConstantFalse(t_bool); + + Id per_vertex{}; + std::map<u32, Id> registers; + std::map<Tegra::Shader::Pred, Id> predicates; + Id local_memory{}; + std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; + std::map<Attribute::Index, Id> input_attributes; + std::map<Attribute::Index, Id> output_attributes; + std::map<u32, Id> constant_buffers; + std::map<GlobalMemoryBase, Id> global_buffers; + std::map<u32, SamplerImage> sampler_images; + + Id instance_index{}; + Id vertex_index{}; + std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; + Id frag_depth{}; + Id frag_coord{}; + Id front_facing{}; + + u32 position_index{}; + u32 point_size_index{}; + u32 clip_distances_index{}; + + std::vector<Id> interfaces; + + u32 const_buffers_base_binding{}; + u32 global_buffers_base_binding{}; + u32 samplers_base_binding{}; + + Id execute_function{}; + Id jmp_to{}; + Id flow_stack_top{}; + Id flow_stack{}; + Id continue_label{}; + std::map<u32, Id> labels; +}; + +DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { + auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); + decompiler->Decompile(); + return {std::move(decompiler), decompiler->GetShaderEntries()}; +} + +} // namespace Vulkan::VKShader diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h new file mode 100644 index 000000000..329d8fa38 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -0,0 +1,80 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <set> +#include <utility> +#include <vector> + +#include <sirit/sirit.h> + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { +class ShaderIR; +} + +namespace Vulkan::VKShader { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +using SamplerEntry = VideoCommon::Shader::Sampler; + +constexpr u32 DESCRIPTOR_SET = 0; + +class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { +public: + explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index) + : VideoCommon::Shader::ConstBuffer{entry}, index{index} {} + + constexpr u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + +class GlobalBufferEntry { +public: + explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + + u32 GetCbufIndex() const { + return cbuf_index; + } + + u32 GetCbufOffset() const { + return cbuf_offset; + } + +private: + u32 cbuf_index{}; + u32 cbuf_offset{}; +}; + +struct ShaderEntries { + u32 const_buffers_base_binding{}; + u32 global_buffers_base_binding{}; + u32 samplers_base_binding{}; + std::vector<ConstBufferEntry> const_buffers; + std::vector<GlobalBufferEntry> global_buffers; + std::vector<SamplerEntry> samplers; + std::set<u32> attributes; + std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + std::size_t shader_length{}; + Sirit::Id entry_function{}; + std::vector<Sirit::Id> interfaces; +}; + +using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; + +DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); + +} // namespace Vulkan::VKShader diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp new file mode 100644 index 000000000..e96eba7cc --- /dev/null +++ b/src/video_core/texture_cache.cpp @@ -0,0 +1,386 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/surface.h" +#include "video_core/texture_cache.h" +#include "video_core/textures/decoders.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using VideoCore::Surface::SurfaceTarget; + +using VideoCore::Surface::ComponentTypeFromDepthFormat; +using VideoCore::Surface::ComponentTypeFromRenderTarget; +using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::PixelFormatFromTextureFormat; +using VideoCore::Surface::SurfaceTargetFromTextureType; + +constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { + return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); +} + +SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config) { + SurfaceParams params; + params.is_tiled = config.tic.IsTiled(); + params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; + params.pixel_format = + PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.depth = config.tic.Depth(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + params.unaligned_height = config.tic.Height(); + params.num_levels = config.tic.max_mip_level + 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { + SurfaceParams params; + params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.block_width = 1 << std::min(block_width, 5U); + params.block_height = 1 << std::min(block_height, 5U); + params.block_depth = 1 << std::min(block_depth, 5U); + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromDepthFormat(format); + params.component_type = ComponentTypeFromDepthFormat(format); + params.type = GetFormatType(params.pixel_format); + params.width = zeta_width; + params.height = zeta_height; + params.unaligned_height = zeta_height; + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + SurfaceParams params; + params.is_tiled = + config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.block_width = 1 << config.memory_layout.block_width; + params.block_height = 1 << config.memory_layout.block_height; + params.block_depth = 1 << config.memory_layout.block_depth; + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + if (params.is_tiled) { + params.width = config.width; + } else { + const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; + params.pitch = config.width; + params.width = params.pitch / bpp; + } + params.height = config.height; + params.depth = 1; + params.unaligned_height = config.height; + params.target = SurfaceTarget::Texture2D; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +SurfaceParams SurfaceParams::CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params{}; + params.is_tiled = !config.linear; + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.tile_width_spacing = 1; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.num_levels = 1; + + params.CalculateCachedValues(); + return params; +} + +u32 SurfaceParams::GetMipWidth(u32 level) const { + return std::max(1U, width >> level); +} + +u32 SurfaceParams::GetMipHeight(u32 level) const { + return std::max(1U, height >> level); +} + +u32 SurfaceParams::GetMipDepth(u32 level) const { + return IsLayered() ? depth : std::max(1U, depth >> level); +} + +bool SurfaceParams::IsLayered() const { + switch (target) { + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + case SurfaceTarget::TextureCubemap: + return true; + default: + return false; + } +} + +u32 SurfaceParams::GetMipBlockHeight(u32 level) const { + // Auto block resizing algorithm from: + // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c + if (level == 0) { + return block_height; + } + const u32 height{GetMipHeight(level)}; + const u32 default_block_height{GetDefaultBlockHeight(pixel_format)}; + const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; + u32 block_height = 16; + while (block_height > 1 && blocks_in_y <= block_height * 4) { + block_height >>= 1; + } + return block_height; +} + +u32 SurfaceParams::GetMipBlockDepth(u32 level) const { + if (level == 0) + return block_depth; + if (target != SurfaceTarget::Texture3D) + return 1; + + const u32 depth{GetMipDepth(level)}; + u32 block_depth = 32; + while (block_depth > 1 && depth * 2 <= block_depth) { + block_depth >>= 1; + } + if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { + return 16; + } + return block_depth; +} + +std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); + } + return offset; +} + +std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetInnerMipmapMemorySize(i, true, false, false); + } + return offset; +} + +std::size_t SurfaceParams::GetGuestLayerSize() const { + return GetInnerMemorySize(false, true, false); +} + +std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, IsLayered(), false); +} + +bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { + if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != + std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, + view_params.component_type, view_params.type)) { + return false; + } + + const SurfaceTarget view_target{view_params.target}; + if (view_target == target) { + return true; + } + + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + return false; + case SurfaceTarget::Texture1DArray: + return view_target == SurfaceTarget::Texture1D; + case SurfaceTarget::Texture2DArray: + return view_target == SurfaceTarget::Texture2D; + case SurfaceTarget::TextureCubemap: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray; + case SurfaceTarget::TextureCubeArray: + return view_target == SurfaceTarget::Texture2D || + view_target == SurfaceTarget::Texture2DArray || + view_target == SurfaceTarget::TextureCubemap; + default: + UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target)); + return false; + } +} + +bool SurfaceParams::IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; +} + +void SurfaceParams::CalculateCachedValues() { + guest_size_in_bytes = GetInnerMemorySize(false, false, false); + + // ASTC is uncompressed in software, in emulated as RGBA8 + if (IsPixelFormatASTC(pixel_format)) { + host_size_in_bytes = width * height * depth * 4; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } + + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: + num_layers = 1; + break; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + num_layers = depth; + break; + default: + UNREACHABLE(); + } +} + +std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, + bool uncompressed) const { + const bool tiled{as_host_size ? false : is_tiled}; + const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; + const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; + const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)}; + const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)}; + const u32 depth{layer_only ? 1U : GetMipDepth(level)}; + return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height, + depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); +} + +std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, + bool uncompressed) const { + std::size_t size = 0; + for (u32 level = 0; level < num_levels; ++level) { + size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); + } + if (!as_host_size && is_tiled) { + size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + return size; +} + +std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const { + std::map<u64, std::pair<u32, u32>> view_offset_map; + switch (target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + case SurfaceTarget::Texture3D: { + constexpr u32 layer = 0; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t offset{GetGuestMipmapLevelOffset(level)}; + view_offset_map.insert({offset, {layer, level}}); + } + break; + } + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: { + const std::size_t layer_size{GetGuestLayerSize()}; + for (u32 level = 0; level < num_levels; ++level) { + const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; + for (u32 layer = 0; layer < num_layers; ++layer) { + const auto layer_offset{static_cast<std::size_t>(layer_size * layer)}; + const std::size_t offset{level_offset + layer_offset}; + view_offset_map.insert({offset, {layer, level}}); + } + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target)); + } + return view_offset_map; +} + +bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { + return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && + IsInBounds(view_params, layer, level); +} + +bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { + return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); +} + +bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { + if (view_params.target != SurfaceTarget::Texture3D) { + return true; + } + return view_params.depth == GetMipDepth(level); +} + +bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { + return layer + view_params.num_layers <= num_layers && + level + view_params.num_levels <= num_levels; +} + +std::size_t HasheableSurfaceParams::Hash() const { + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); +} + +bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { + return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, + height, depth, pitch, unaligned_height, num_levels, pixel_format, + component_type, type, target) == + std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, + rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, + rhs.type, rhs.target); +} + +std::size_t ViewKey::Hash() const { + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); +} + +bool ViewKey::operator==(const ViewKey& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h new file mode 100644 index 000000000..041551691 --- /dev/null +++ b/src/video_core/texture_cache.h @@ -0,0 +1,586 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <list> +#include <memory> +#include <set> +#include <tuple> +#include <type_traits> +#include <unordered_map> + +#include <boost/icl/interval_map.hpp> +#include <boost/range/iterator_range.hpp> + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" + +namespace Core { +class System; +} + +namespace Tegra::Texture { +struct FullTextureInfo; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +class HasheableSurfaceParams { +public: + std::size_t Hash() const; + + bool operator==(const HasheableSurfaceParams& rhs) const; + +protected: + // Avoid creation outside of a managed environment. + HasheableSurfaceParams() = default; + + bool is_tiled; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; +}; + +class SurfaceParams final : public HasheableSurfaceParams { +public: + /// Creates SurfaceCachedParams from a texture configuration. + static SurfaceParams CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceCachedParams for a depth buffer configuration. + static SurfaceParams CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + + /// Creates SurfaceCachedParams from a framebuffer configuration. + static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + + /// Creates SurfaceCachedParams from a Fermi2D surface configuration. + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + + bool IsTiled() const { + return is_tiled; + } + + u32 GetBlockWidth() const { + return block_width; + } + + u32 GetTileWidthSpacing() const { + return tile_width_spacing; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetDepth() const { + return depth; + } + + u32 GetPitch() const { + return pitch; + } + + u32 GetNumLevels() const { + return num_levels; + } + + VideoCore::Surface::PixelFormat GetPixelFormat() const { + return pixel_format; + } + + VideoCore::Surface::ComponentType GetComponentType() const { + return component_type; + } + + VideoCore::Surface::SurfaceTarget GetTarget() const { + return target; + } + + VideoCore::Surface::SurfaceType GetType() const { + return type; + } + + std::size_t GetGuestSizeInBytes() const { + return guest_size_in_bytes; + } + + std::size_t GetHostSizeInBytes() const { + return host_size_in_bytes; + } + + u32 GetNumLayers() const { + return num_layers; + } + + /// Returns the width of a given mipmap level. + u32 GetMipWidth(u32 level) const; + + /// Returns the height of a given mipmap level. + u32 GetMipHeight(u32 level) const; + + /// Returns the depth of a given mipmap level. + u32 GetMipDepth(u32 level) const; + + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; + + /// Returns the block height of a given mipmap level. + u32 GetMipBlockHeight(u32 level) const; + + /// Returns the block depth of a given mipmap level. + u32 GetMipBlockDepth(u32 level) const; + + /// Returns the offset in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapLevelOffset(u32 level) const; + + /// Returns the offset in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapLevelOffset(u32 level) const; + + /// Returns the size of a layer in bytes in guest memory. + std::size_t GetGuestLayerSize() const; + + /// Returns the size of a layer in bytes in host memory for a given mipmap level. + std::size_t GetHostLayerSize(u32 level) const; + + /// Returns true if another surface can be familiar with this. This is a loosely defined term + /// that reflects the possibility of these two surface parameters potentially being part of a + /// bigger superset. + bool IsFamiliar(const SurfaceParams& view_params) const; + + /// Returns true if the pixel format is a depth and/or stencil format. + bool IsPixelFormatZeta() const; + + /// Creates a map that redirects an address difference to a layer and mipmap level. + std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const; + + /// Returns true if the passed surface view parameters is equal or a valid subset of this. + bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + +private: + /// Calculates values that can be deduced from HasheableSurfaceParams. + void CalculateCachedValues(); + + /// Returns the size of a given mipmap level. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, + bool uncompressed) const; + + /// Returns the size of all mipmap levels and aligns as needed. + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + + /// Returns true if the passed view width and height match the size of this params in a given + /// mipmap level. + bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view depth match the size of this params in a given mipmap level. + bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view layers and mipmap levels are in bounds. + bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + + std::size_t guest_size_in_bytes; + std::size_t host_size_in_bytes; + u32 num_layers; +}; + +struct ViewKey { + std::size_t Hash() const; + + bool operator==(const ViewKey& rhs) const; + + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::SurfaceParams> { + std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash<VideoCommon::ViewKey> { + std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace VideoCommon { + +template <typename TView, typename TExecutionContext> +class SurfaceBase { + static_assert(std::is_trivially_copyable_v<TExecutionContext>); + +public: + virtual void LoadBuffer() = 0; + + virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0; + + virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + + TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { + // It can't be a view if it's in a prior address. + return {}; + } + + const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)}; + const auto it{view_offset_map.find(relative_offset)}; + if (it == view_offset_map.end()) { + // Couldn't find an aligned view. + return {}; + } + const auto [layer, level] = it->second; + + if (!params.IsViewValid(view_params, layer, level)) { + return {}; + } + + return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + } + + VAddr GetCpuAddr() const { + ASSERT(is_registered); + return cpu_addr; + } + + u8* GetHostPtr() const { + ASSERT(is_registered); + return host_ptr; + } + + CacheAddr GetCacheAddr() const { + ASSERT(is_registered); + return cache_addr; + } + + std::size_t GetSizeInBytes() const { + return params.GetGuestSizeInBytes(); + } + + void MarkAsModified(bool is_modified_) { + is_modified = is_modified_; + } + + const SurfaceParams& GetSurfaceParams() const { + return params; + } + + TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { + TView* view{TryGetView(view_addr, view_params)}; + ASSERT(view != nullptr); + return view; + } + + void Register(VAddr cpu_addr_, u8* host_ptr_) { + ASSERT(!is_registered); + is_registered = true; + cpu_addr = cpu_addr_; + host_ptr = host_ptr_; + cache_addr = ToCacheAddr(host_ptr_); + } + + void Register(VAddr cpu_addr_) { + Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); + } + + void Unregister() { + ASSERT(is_registered); + is_registered = false; + } + + bool IsRegistered() const { + return is_registered; + } + +protected: + explicit SurfaceBase(const SurfaceParams& params) + : params{params}, view_offset_map{params.CreateViewOffsetMap()} {} + + ~SurfaceBase() = default; + + virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0; + + bool IsModified() const { + return is_modified; + } + + const SurfaceParams params; + +private: + TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { + const ViewKey key{base_layer, num_layers, base_level, num_levels}; + const auto [entry, is_cache_miss] = views.try_emplace(key); + auto& view{entry->second}; + if (is_cache_miss) { + view = CreateView(key); + } + return view.get(); + } + + const std::map<u64, std::pair<u32, u32>> view_offset_map; + + VAddr cpu_addr{}; + u8* host_ptr{}; + CacheAddr cache_addr{}; + bool is_modified{}; + bool is_registered{}; + std::unordered_map<ViewKey, std::unique_ptr<TView>> views; +}; + +template <typename TSurface, typename TView, typename TExecutionContext> +class TextureCache { + static_assert(std::is_trivially_copyable_v<TExecutionContext>); + using ResultType = std::tuple<TView*, TExecutionContext>; + using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>; + using IntervalType = typename IntervalMap::interval_type; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + for (TSurface* surface : GetSurfacesInRegion(addr, size)) { + if (!surface->IsRegistered()) { + // Skip duplicates + continue; + } + Unregister(surface); + } + } + + ResultType GetTextureSurface(TExecutionContext exctx, + const Tegra::Texture::FullTextureInfo& config) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; + if (!cpu_addr) { + return {{}, exctx}; + } + const auto params{SurfaceParams::CreateForTexture(system, config)}; + return GetSurfaceView(exctx, *cpu_addr, params, true); + } + + ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + const auto& regs{system.GPU().Maxwell3D().regs}; + if (!regs.zeta.Address() || !regs.zeta_enable) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; + if (!cpu_addr) { + return {{}, exctx}; + } + + const auto depth_params{SurfaceParams::CreateForDepthBuffer( + system, regs.zeta_width, regs.zeta_height, regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); + } + + ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, + bool preserve_contents) { + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + + const auto& regs{system.GPU().Maxwell3D().regs}; + if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || + regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + const auto cpu_addr{memory_manager.GpuToCpuAddress( + config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; + if (!cpu_addr) { + return {{}, exctx}; + } + + return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + } + + ResultType GetFermiSurface(TExecutionContext exctx, + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; + ASSERT(cpu_addr); + return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), + true); + } + + TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; + return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + } + +protected: + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer} {} + + ~TextureCache() = default; + + virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector<TSurface*>& overlaps) = 0; + + virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; + + void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { + surface->Register(cpu_addr, host_ptr); + registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + } + + void Unregister(TSurface* surface) { + registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); + surface->Unregister(); + } + + TSurface* GetUncachedSurface(const SurfaceParams& params) { + if (TSurface* surface = TryGetReservedSurface(params); surface) + return surface; + // No reserved surface available, create a new one and reserve it + auto new_surface{CreateSurface(params)}; + TSurface* surface{new_surface.get()}; + ReserveSurface(params, std::move(new_surface)); + return surface; + } + + Core::System& system; + +private: + ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, + bool preserve_contents) { + const auto host_ptr{Memory::GetPointer(cpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + if (overlaps.empty()) { + return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + } + + if (overlaps.size() == 1) { + if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) + return {view, exctx}; + } + + TView* fast_view; + std::tie(fast_view, exctx) = + TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); + + for (TSurface* surface : overlaps) { + if (!fast_view) { + // Flush even when we don't care about the contents, to preserve memory not written + // by the new surface. + exctx = surface->FlushBuffer(exctx); + } + Unregister(surface); + } + + if (fast_view) { + return {fast_view, exctx}; + } + + return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + } + + ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents) { + TSurface* new_surface{GetUncachedSurface(params)}; + Register(new_surface, cpu_addr, host_ptr); + if (preserve_contents) { + exctx = LoadSurface(exctx, new_surface); + } + return {new_surface->GetView(cpu_addr, params), exctx}; + } + + TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { + surface->LoadBuffer(); + exctx = surface->UploadTexture(exctx); + surface->MarkAsModified(false); + return exctx; + } + + std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { + if (size == 0) { + return {}; + } + const IntervalType interval{cache_addr, cache_addr + size}; + + std::vector<TSurface*> surfaces; + for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { + surfaces.push_back(*pair.second.begin()); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) { + surface_reserve[params].push_back(std::move(surface)); + } + + TSurface* TryGetReservedSurface(const SurfaceParams& params) { + auto search{surface_reserve.find(params)}; + if (search == surface_reserve.end()) { + return {}; + } + for (auto& surface : search->second) { + if (!surface->IsRegistered()) { + return surface.get(); + } + } + return {}; + } + + IntervalType GetSurfaceInterval(TSurface* surface) const { + return IntervalType::right_open(surface->GetCacheAddr(), + surface->GetCacheAddr() + surface->GetSizeInBytes()); + } + + VideoCore::RasterizerInterface& rasterizer; + + IntervalMap registered_surfaces; + + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve; +}; + +} // namespace VideoCommon diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 732a1bf89..2eb86d6e5 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -56,8 +56,6 @@ add_executable(yuzu debugger/graphics/graphics_breakpoints.cpp debugger/graphics/graphics_breakpoints.h debugger/graphics/graphics_breakpoints_p.h - debugger/graphics/graphics_surface.cpp - debugger/graphics/graphics_surface.h debugger/console.cpp debugger/console.h debugger/profiler.cpp diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp deleted file mode 100644 index f2d14becf..000000000 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ /dev/null @@ -1,516 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <QBoxLayout> -#include <QComboBox> -#include <QDebug> -#include <QFileDialog> -#include <QLabel> -#include <QMessageBox> -#include <QMouseEvent> -#include <QPushButton> -#include <QScrollArea> -#include <QSpinBox> -#include "common/vector_math.h" -#include "core/core.h" -#include "core/memory.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" -#include "video_core/textures/decoders.h" -#include "video_core/textures/texture.h" -#include "yuzu/debugger/graphics/graphics_surface.h" -#include "yuzu/util/spinbox.h" - -static Tegra::Texture::TextureFormat ConvertToTextureFormat( - Tegra::RenderTargetFormat render_target_format) { - switch (render_target_format) { - case Tegra::RenderTargetFormat::RGBA8_UNORM: - return Tegra::Texture::TextureFormat::A8R8G8B8; - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return Tegra::Texture::TextureFormat::A2B10G10R10; - default: - UNIMPLEMENTED_MSG("Unimplemented RT format"); - return Tegra::Texture::TextureFormat::A8R8G8B8; - } -} - -SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_) - : QLabel(parent), surface_widget(surface_widget_) {} - -SurfacePicture::~SurfacePicture() = default; - -void SurfacePicture::mousePressEvent(QMouseEvent* event) { - // Only do something while the left mouse button is held down - if (!(event->buttons() & Qt::LeftButton)) - return; - - if (pixmap() == nullptr) - return; - - if (surface_widget) - surface_widget->Pick(event->x() * pixmap()->width() / width(), - event->y() * pixmap()->height() / height()); -} - -void SurfacePicture::mouseMoveEvent(QMouseEvent* event) { - // We also want to handle the event if the user moves the mouse while holding down the LMB - mousePressEvent(event); -} - -GraphicsSurfaceWidget::GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context, - QWidget* parent) - : BreakPointObserverDock(debug_context, tr("Maxwell Surface Viewer"), parent), - surface_source(Source::RenderTarget0) { - setObjectName("MaxwellSurface"); - - surface_source_list = new QComboBox; - surface_source_list->addItem(tr("Render Target 0")); - surface_source_list->addItem(tr("Render Target 1")); - surface_source_list->addItem(tr("Render Target 2")); - surface_source_list->addItem(tr("Render Target 3")); - surface_source_list->addItem(tr("Render Target 4")); - surface_source_list->addItem(tr("Render Target 5")); - surface_source_list->addItem(tr("Render Target 6")); - surface_source_list->addItem(tr("Render Target 7")); - surface_source_list->addItem(tr("Z Buffer")); - surface_source_list->addItem(tr("Custom")); - surface_source_list->setCurrentIndex(static_cast<int>(surface_source)); - - surface_address_control = new CSpinBox; - surface_address_control->SetBase(16); - surface_address_control->SetRange(0, 0x7FFFFFFFFFFFFFFF); - surface_address_control->SetPrefix("0x"); - - unsigned max_dimension = 16384; // TODO: Find actual maximum - - surface_width_control = new QSpinBox; - surface_width_control->setRange(0, max_dimension); - - surface_height_control = new QSpinBox; - surface_height_control->setRange(0, max_dimension); - - surface_picker_x_control = new QSpinBox; - surface_picker_x_control->setRange(0, max_dimension - 1); - - surface_picker_y_control = new QSpinBox; - surface_picker_y_control->setRange(0, max_dimension - 1); - - // clang-format off - // Color formats sorted by Maxwell texture format index - const QStringList surface_formats{ - tr("None"), - QStringLiteral("R32_G32_B32_A32"), - QStringLiteral("R32_G32_B32"), - QStringLiteral("R16_G16_B16_A16"), - QStringLiteral("R32_G32"), - QStringLiteral("R32_B24G8"), - QStringLiteral("ETC2_RGB"), - QStringLiteral("X8B8G8R8"), - QStringLiteral("A8R8G8B8"), - QStringLiteral("A2B10G10R10"), - QStringLiteral("ETC2_RGB_PTA"), - QStringLiteral("ETC2_RGBA"), - QStringLiteral("R16_G16"), - QStringLiteral("G8R24"), - QStringLiteral("G24R8"), - QStringLiteral("R32"), - QStringLiteral("BC6H_SF16"), - QStringLiteral("BC6H_UF16"), - QStringLiteral("A4B4G4R4"), - QStringLiteral("A5B5G5R1"), - QStringLiteral("A1B5G5R5"), - QStringLiteral("B5G6R5"), - QStringLiteral("B6G5R5"), - QStringLiteral("BC7U"), - QStringLiteral("G8R8"), - QStringLiteral("EAC"), - QStringLiteral("EACX2"), - QStringLiteral("R16"), - QStringLiteral("Y8_VIDEO"), - QStringLiteral("R8"), - QStringLiteral("G4R4"), - QStringLiteral("R1"), - QStringLiteral("E5B9G9R9_SHAREDEXP"), - QStringLiteral("BF10GF11RF11"), - QStringLiteral("G8B8G8R8"), - QStringLiteral("B8G8R8G8"), - QStringLiteral("DXT1"), - QStringLiteral("DXT23"), - QStringLiteral("DXT45"), - QStringLiteral("DXN1"), - QStringLiteral("DXN2"), - QStringLiteral("Z24S8"), - QStringLiteral("X8Z24"), - QStringLiteral("S8Z24"), - QStringLiteral("X4V4Z24__COV4R4V"), - QStringLiteral("X4V4Z24__COV8R8V"), - QStringLiteral("V8Z24__COV4R12V"), - QStringLiteral("ZF32"), - QStringLiteral("ZF32_X24S8"), - QStringLiteral("X8Z24_X20V4S8__COV4R4V"), - QStringLiteral("X8Z24_X20V4S8__COV8R8V"), - QStringLiteral("ZF32_X20V4X8__COV4R4V"), - QStringLiteral("ZF32_X20V4X8__COV8R8V"), - QStringLiteral("ZF32_X20V4S8__COV4R4V"), - QStringLiteral("ZF32_X20V4S8__COV8R8V"), - QStringLiteral("X8Z24_X16V8S8__COV4R12V"), - QStringLiteral("ZF32_X16V8X8__COV4R12V"), - QStringLiteral("ZF32_X16V8S8__COV4R12V"), - QStringLiteral("Z16"), - QStringLiteral("V8Z24__COV8R24V"), - QStringLiteral("X8Z24_X16V8S8__COV8R24V"), - QStringLiteral("ZF32_X16V8X8__COV8R24V"), - QStringLiteral("ZF32_X16V8S8__COV8R24V"), - QStringLiteral("ASTC_2D_4X4"), - QStringLiteral("ASTC_2D_5X5"), - QStringLiteral("ASTC_2D_6X6"), - QStringLiteral("ASTC_2D_8X8"), - QStringLiteral("ASTC_2D_10X10"), - QStringLiteral("ASTC_2D_12X12"), - QStringLiteral("ASTC_2D_5X4"), - QStringLiteral("ASTC_2D_6X5"), - QStringLiteral("ASTC_2D_8X6"), - QStringLiteral("ASTC_2D_10X8"), - QStringLiteral("ASTC_2D_12X10"), - QStringLiteral("ASTC_2D_8X5"), - QStringLiteral("ASTC_2D_10X5"), - QStringLiteral("ASTC_2D_10X6"), - }; - // clang-format on - - surface_format_control = new QComboBox; - surface_format_control->addItems(surface_formats); - - surface_info_label = new QLabel(); - surface_info_label->setWordWrap(true); - - surface_picture_label = new SurfacePicture(0, this); - surface_picture_label->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed); - surface_picture_label->setAlignment(Qt::AlignLeft | Qt::AlignTop); - surface_picture_label->setScaledContents(false); - - auto scroll_area = new QScrollArea(); - scroll_area->setBackgroundRole(QPalette::Dark); - scroll_area->setWidgetResizable(false); - scroll_area->setWidget(surface_picture_label); - - save_surface = new QPushButton(QIcon::fromTheme("document-save"), tr("Save")); - - // Connections - connect(this, &GraphicsSurfaceWidget::Update, this, &GraphicsSurfaceWidget::OnUpdate); - connect(surface_source_list, qOverload<int>(&QComboBox::currentIndexChanged), this, - &GraphicsSurfaceWidget::OnSurfaceSourceChanged); - connect(surface_address_control, &CSpinBox::ValueChanged, this, - &GraphicsSurfaceWidget::OnSurfaceAddressChanged); - connect(surface_width_control, qOverload<int>(&QSpinBox::valueChanged), this, - &GraphicsSurfaceWidget::OnSurfaceWidthChanged); - connect(surface_height_control, qOverload<int>(&QSpinBox::valueChanged), this, - &GraphicsSurfaceWidget::OnSurfaceHeightChanged); - connect(surface_format_control, qOverload<int>(&QComboBox::currentIndexChanged), this, - &GraphicsSurfaceWidget::OnSurfaceFormatChanged); - connect(surface_picker_x_control, qOverload<int>(&QSpinBox::valueChanged), this, - &GraphicsSurfaceWidget::OnSurfacePickerXChanged); - connect(surface_picker_y_control, qOverload<int>(&QSpinBox::valueChanged), this, - &GraphicsSurfaceWidget::OnSurfacePickerYChanged); - connect(save_surface, &QPushButton::clicked, this, &GraphicsSurfaceWidget::SaveSurface); - - auto main_widget = new QWidget; - auto main_layout = new QVBoxLayout; - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("Source:"))); - sub_layout->addWidget(surface_source_list); - main_layout->addLayout(sub_layout); - } - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("GPU Address:"))); - sub_layout->addWidget(surface_address_control); - main_layout->addLayout(sub_layout); - } - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("Width:"))); - sub_layout->addWidget(surface_width_control); - main_layout->addLayout(sub_layout); - } - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("Height:"))); - sub_layout->addWidget(surface_height_control); - main_layout->addLayout(sub_layout); - } - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("Format:"))); - sub_layout->addWidget(surface_format_control); - main_layout->addLayout(sub_layout); - } - main_layout->addWidget(scroll_area); - - auto info_layout = new QHBoxLayout; - { - auto xy_layout = new QVBoxLayout; - { - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("X:"))); - sub_layout->addWidget(surface_picker_x_control); - xy_layout->addLayout(sub_layout); - } - { - auto sub_layout = new QHBoxLayout; - sub_layout->addWidget(new QLabel(tr("Y:"))); - sub_layout->addWidget(surface_picker_y_control); - xy_layout->addLayout(sub_layout); - } - } - info_layout->addLayout(xy_layout); - surface_info_label->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Minimum); - info_layout->addWidget(surface_info_label); - } - main_layout->addLayout(info_layout); - - main_layout->addWidget(save_surface); - main_widget->setLayout(main_layout); - setWidget(main_widget); - - // Load current data - TODO: Make sure this works when emulation is not running - if (debug_context && debug_context->at_breakpoint) { - emit Update(); - widget()->setEnabled(debug_context->at_breakpoint); - } else { - widget()->setEnabled(false); - } -} - -void GraphicsSurfaceWidget::OnBreakPointHit(Tegra::DebugContext::Event event, void* data) { - emit Update(); - widget()->setEnabled(true); -} - -void GraphicsSurfaceWidget::OnResumed() { - widget()->setEnabled(false); -} - -void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) { - surface_source = static_cast<Source>(new_value); - emit Update(); -} - -void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) { - if (surface_address != new_value) { - surface_address = static_cast<GPUVAddr>(new_value); - - surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom)); - emit Update(); - } -} - -void GraphicsSurfaceWidget::OnSurfaceWidthChanged(int new_value) { - if (surface_width != static_cast<unsigned>(new_value)) { - surface_width = static_cast<unsigned>(new_value); - - surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom)); - emit Update(); - } -} - -void GraphicsSurfaceWidget::OnSurfaceHeightChanged(int new_value) { - if (surface_height != static_cast<unsigned>(new_value)) { - surface_height = static_cast<unsigned>(new_value); - - surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom)); - emit Update(); - } -} - -void GraphicsSurfaceWidget::OnSurfaceFormatChanged(int new_value) { - if (surface_format != static_cast<Tegra::Texture::TextureFormat>(new_value)) { - surface_format = static_cast<Tegra::Texture::TextureFormat>(new_value); - - surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom)); - emit Update(); - } -} - -void GraphicsSurfaceWidget::OnSurfacePickerXChanged(int new_value) { - if (surface_picker_x != new_value) { - surface_picker_x = new_value; - Pick(surface_picker_x, surface_picker_y); - } -} - -void GraphicsSurfaceWidget::OnSurfacePickerYChanged(int new_value) { - if (surface_picker_y != new_value) { - surface_picker_y = new_value; - Pick(surface_picker_x, surface_picker_y); - } -} - -void GraphicsSurfaceWidget::Pick(int x, int y) { - surface_picker_x_control->setValue(x); - surface_picker_y_control->setValue(y); - - if (x < 0 || x >= static_cast<int>(surface_width) || y < 0 || - y >= static_cast<int>(surface_height)) { - surface_info_label->setText(tr("Pixel out of bounds")); - surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter); - return; - } - - surface_info_label->setText(QString("Raw: <Unimplemented>\n(%1)").arg("<Unimplemented>")); - surface_info_label->setAlignment(Qt::AlignLeft | Qt::AlignVCenter); -} - -void GraphicsSurfaceWidget::OnUpdate() { - auto& gpu = Core::System::GetInstance().GPU(); - - QPixmap pixmap; - - switch (surface_source) { - case Source::RenderTarget0: - case Source::RenderTarget1: - case Source::RenderTarget2: - case Source::RenderTarget3: - case Source::RenderTarget4: - case Source::RenderTarget5: - case Source::RenderTarget6: - case Source::RenderTarget7: { - // TODO: Store a reference to the registers in the debug context instead of accessing them - // directly... - - const auto& registers = gpu.Maxwell3D().regs; - const auto& rt = registers.rt[static_cast<std::size_t>(surface_source) - - static_cast<std::size_t>(Source::RenderTarget0)]; - - surface_address = rt.Address(); - surface_width = rt.width; - surface_height = rt.height; - if (rt.format != Tegra::RenderTargetFormat::NONE) { - surface_format = ConvertToTextureFormat(rt.format); - } - - break; - } - - case Source::Custom: { - // Keep user-specified values - break; - } - - default: - qDebug() << "Unknown surface source " << static_cast<int>(surface_source); - break; - } - - surface_address_control->SetValue(surface_address); - surface_width_control->setValue(surface_width); - surface_height_control->setValue(surface_height); - surface_format_control->setCurrentIndex(static_cast<int>(surface_format)); - - if (surface_address == 0) { - surface_picture_label->hide(); - surface_info_label->setText(tr("(invalid surface address)")); - surface_info_label->setAlignment(Qt::AlignCenter); - surface_picker_x_control->setEnabled(false); - surface_picker_y_control->setEnabled(false); - save_surface->setEnabled(false); - return; - } - - // TODO: Implement a good way to visualize alpha components! - - QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); - - // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. - // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. - auto unswizzled_data = Tegra::Texture::UnswizzleTexture( - gpu.MemoryManager().GetPointer(surface_address), 1, 1, - Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U); - - auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, - surface_width, surface_height); - - surface_picture_label->show(); - - for (unsigned int y = 0; y < surface_height; ++y) { - for (unsigned int x = 0; x < surface_width; ++x) { - Common::Vec4<u8> color; - color[0] = texture_data[x + y * surface_width + 0]; - color[1] = texture_data[x + y * surface_width + 1]; - color[2] = texture_data[x + y * surface_width + 2]; - color[3] = texture_data[x + y * surface_width + 3]; - decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); - } - } - - pixmap = QPixmap::fromImage(decoded_image); - surface_picture_label->setPixmap(pixmap); - surface_picture_label->resize(pixmap.size()); - - // Update the info with pixel data - surface_picker_x_control->setEnabled(true); - surface_picker_y_control->setEnabled(true); - Pick(surface_picker_x, surface_picker_y); - - // Enable saving the converted pixmap to file - save_surface->setEnabled(true); -} - -void GraphicsSurfaceWidget::SaveSurface() { - const QString png_filter = tr("Portable Network Graphic (*.png)"); - const QString bin_filter = tr("Binary data (*.bin)"); - - QString selected_filter; - const QString filename = QFileDialog::getSaveFileName( - this, tr("Save Surface"), - QStringLiteral("texture-0x%1.png").arg(QString::number(surface_address, 16)), - QStringLiteral("%1;;%2").arg(png_filter, bin_filter), &selected_filter); - - if (filename.isEmpty()) { - // If the user canceled the dialog, don't save anything. - return; - } - - if (selected_filter == png_filter) { - const QPixmap* const pixmap = surface_picture_label->pixmap(); - ASSERT_MSG(pixmap != nullptr, "No pixmap set"); - - QFile file{filename}; - if (!file.open(QIODevice::WriteOnly)) { - QMessageBox::warning(this, tr("Error"), tr("Failed to open file '%1'").arg(filename)); - return; - } - - if (!pixmap->save(&file, "PNG")) { - QMessageBox::warning(this, tr("Error"), - tr("Failed to save surface data to file '%1'").arg(filename)); - } - } else if (selected_filter == bin_filter) { - auto& gpu = Core::System::GetInstance().GPU(); - const std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address); - - const u8* const buffer = Memory::GetPointer(*address); - ASSERT_MSG(buffer != nullptr, "Memory not accessible"); - - QFile file{filename}; - if (!file.open(QIODevice::WriteOnly)) { - QMessageBox::warning(this, tr("Error"), tr("Failed to open file '%1'").arg(filename)); - return; - } - - const int size = - surface_width * surface_height * Tegra::Texture::BytesPerPixel(surface_format); - const QByteArray data(reinterpret_cast<const char*>(buffer), size); - if (file.write(data) != data.size()) { - QMessageBox::warning( - this, tr("Error"), - tr("Failed to completely write surface data to file. The saved data will " - "likely be corrupt.")); - } - } else { - UNREACHABLE_MSG("Unhandled filter selected"); - } -} diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h deleted file mode 100644 index 89445b18f..000000000 --- a/src/yuzu/debugger/graphics/graphics_surface.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <QLabel> -#include <QPushButton> -#include "video_core/memory_manager.h" -#include "video_core/textures/texture.h" -#include "yuzu/debugger/graphics/graphics_breakpoint_observer.h" - -class QComboBox; -class QSpinBox; -class CSpinBox; - -class GraphicsSurfaceWidget; - -class SurfacePicture : public QLabel { - Q_OBJECT - -public: - explicit SurfacePicture(QWidget* parent = nullptr, - GraphicsSurfaceWidget* surface_widget = nullptr); - ~SurfacePicture() override; - -protected slots: - void mouseMoveEvent(QMouseEvent* event) override; - void mousePressEvent(QMouseEvent* event) override; - -private: - GraphicsSurfaceWidget* surface_widget; -}; - -class GraphicsSurfaceWidget : public BreakPointObserverDock { - Q_OBJECT - - using Event = Tegra::DebugContext::Event; - - enum class Source { - RenderTarget0 = 0, - RenderTarget1 = 1, - RenderTarget2 = 2, - RenderTarget3 = 3, - RenderTarget4 = 4, - RenderTarget5 = 5, - RenderTarget6 = 6, - RenderTarget7 = 7, - ZBuffer = 8, - Custom = 9, - }; - -public: - explicit GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext> debug_context, - QWidget* parent = nullptr); - void Pick(int x, int y); - -public slots: - void OnSurfaceSourceChanged(int new_value); - void OnSurfaceAddressChanged(qint64 new_value); - void OnSurfaceWidthChanged(int new_value); - void OnSurfaceHeightChanged(int new_value); - void OnSurfaceFormatChanged(int new_value); - void OnSurfacePickerXChanged(int new_value); - void OnSurfacePickerYChanged(int new_value); - void OnUpdate(); - -signals: - void Update(); - -private: - void OnBreakPointHit(Tegra::DebugContext::Event event, void* data) override; - void OnResumed() override; - - void SaveSurface(); - - QComboBox* surface_source_list; - CSpinBox* surface_address_control; - QSpinBox* surface_width_control; - QSpinBox* surface_height_control; - QComboBox* surface_format_control; - - SurfacePicture* surface_picture_label; - QSpinBox* surface_picker_x_control; - QSpinBox* surface_picker_y_control; - QLabel* surface_info_label; - QPushButton* save_surface; - - Source surface_source; - GPUVAddr surface_address; - unsigned surface_width; - unsigned surface_height; - Tegra::Texture::TextureFormat surface_format; - int surface_picker_x = 0; - int surface_picker_y = 0; -}; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index d5a328d92..ca231d710 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -90,7 +90,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "yuzu/configuration/configure_dialog.h" #include "yuzu/debugger/console.h" #include "yuzu/debugger/graphics/graphics_breakpoints.h" -#include "yuzu/debugger/graphics/graphics_surface.h" #include "yuzu/debugger/profiler.h" #include "yuzu/debugger/wait_tree.h" #include "yuzu/discord.h" @@ -483,11 +482,6 @@ void GMainWindow::InitializeDebugWidgets() { graphicsBreakpointsWidget->hide(); debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction()); - graphicsSurfaceWidget = new GraphicsSurfaceWidget(debug_context, this); - addDockWidget(Qt::RightDockWidgetArea, graphicsSurfaceWidget); - graphicsSurfaceWidget->hide(); - debug_menu->addAction(graphicsSurfaceWidget->toggleViewAction()); - waitTreeWidget = new WaitTreeWidget(this); addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget); waitTreeWidget->hide(); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index c727e942c..85e3810f2 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -23,7 +23,6 @@ class EmuThread; class GameList; class GImageInfo; class GraphicsBreakPointsWidget; -class GraphicsSurfaceWidget; class GRenderWindow; class LoadingScreen; class MicroProfileDialog; @@ -240,7 +239,6 @@ private: ProfilerWidget* profilerWidget; MicroProfileDialog* microProfileDialog; GraphicsBreakPointsWidget* graphicsBreakpointsWidget; - GraphicsSurfaceWidget* graphicsSurfaceWidget; WaitTreeWidget* waitTreeWidget; QAction* actions_recent_files[max_recent_files_item]; |