diff options
Diffstat (limited to 'src')
119 files changed, 1879 insertions, 803 deletions
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h index fe2231a6c..4c16f6e03 100644 --- a/src/common/lz4_compression.h +++ b/src/common/lz4_compression.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <vector> #include "common/common_types.h" diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h index e0a64b035..e9de941c8 100644 --- a/src/common/zstd_compression.h +++ b/src/common/zstd_compression.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <vector> #include "common/common_types.h" diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 4dfd41b43..978b1518f 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -7,6 +7,10 @@ #include <array> #include "common/common_types.h" +namespace Common { +struct PageTable; +} + namespace Kernel { enum class VMAPermission : u8; } @@ -49,8 +53,14 @@ public: /// Clear all instruction cache virtual void ClearInstructionCache() = 0; - /// Notify CPU emulation that page tables have changed - virtual void PageTableChanged() = 0; + /// Notifies CPU emulation that the current page table has changed. + /// + /// @param new_page_table The new page table. + /// @param new_address_space_size_in_bits The new usable size of the address space in bits. + /// This can be either 32, 36, or 39 on official software. + /// + virtual void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) = 0; /** * Set the Program Counter to an address diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index dc96e35d5..44307fa19 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -14,7 +14,6 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" @@ -129,18 +128,16 @@ public: u64 tpidr_el0 = 0; }; -std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { - auto* current_process = system.Kernel().CurrentProcess(); - auto** const page_table = current_process->VMManager().page_table.pointers.data(); - +std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { Dynarmic::A64::UserConfig config; // Callbacks config.callbacks = cb.get(); // Memory - config.page_table = reinterpret_cast<void**>(page_table); - config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth(); + config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); + config.page_table_address_space_bits = address_space_bits; config.silently_mirror_page_table = false; // Multi-process state @@ -176,12 +173,7 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, core_index{core_index}, system{system}, - exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { - ThreadContext ctx{}; - inner_unicorn.SaveContext(ctx); - PageTableChanged(); - LoadContext(ctx); -} + exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} ARM_Dynarmic::~ARM_Dynarmic() = default; @@ -276,8 +268,9 @@ void ARM_Dynarmic::ClearExclusiveState() { jit->ClearExclusiveState(); } -void ARM_Dynarmic::PageTableChanged() { - jit = MakeJit(); +void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + jit = MakeJit(page_table, new_address_space_size_in_bits); } DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index c1db254e8..b701e97a3 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -48,10 +48,12 @@ public: void ClearExclusiveState() override; void ClearInstructionCache() override; - void PageTableChanged() override; + void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) override; private: - std::unique_ptr<Dynarmic::A64::Jit> MakeJit() const; + std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const; friend class ARM_Dynarmic_Callbacks; std::unique_ptr<ARM_Dynarmic_Callbacks> cb; diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 209fc16ad..34e974b4d 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -41,7 +41,7 @@ public: void Run() override; void Step() override; void ClearInstructionCache() override; - void PageTableChanged() override{}; + void PageTableChanged(Common::PageTable&, std::size_t) override {} void RecordBreak(GDBStub::BreakpointAddress bkpt); private: diff --git a/src/core/core.cpp b/src/core/core.cpp index bc9e887b6..175a5f2ea 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include <array> -#include <map> #include <memory> -#include <thread> #include <utility> #include "common/file_util.h" @@ -38,8 +36,6 @@ #include "frontend/applets/software_keyboard.h" #include "frontend/applets/web_browser.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -81,7 +77,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, return vfs->OpenFile(path, FileSys::Mode::Read); } struct System::Impl { - explicit Impl(System& system) : kernel{system} {} + explicit Impl(System& system) : kernel{system}, cpu_core_manager{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); @@ -99,6 +95,7 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); + cpu_core_manager.Initialize(); kernel.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( @@ -120,9 +117,6 @@ struct System::Impl { if (web_browser == nullptr) web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); - auto main_process = Kernel::Process::Create(system, "main"); - kernel.MakeCurrentProcess(main_process.get()); - telemetry_session = std::make_unique<Core::TelemetrySession>(); service_manager = std::make_shared<Service::SM::ServiceManager>(); @@ -134,15 +128,9 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } - is_powered_on = true; - - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); - } else { - gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); - } + gpu_core = VideoCore::CreateGPU(system); - cpu_core_manager.Initialize(system); + is_powered_on = true; LOG_DEBUG(Core, "Initialized OK"); @@ -179,7 +167,8 @@ struct System::Impl { return init_result; } - const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())}; + auto main_process = Kernel::Process::Create(system, "main"); + const auto [load_result, load_parameters] = app_loader->Load(*main_process); if (load_result != Loader::ResultStatus::Success) { LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); Shutdown(); @@ -187,6 +176,16 @@ struct System::Impl { return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + static_cast<u32>(load_result)); } + kernel.MakeCurrentProcess(main_process.get()); + + // Main process has been loaded and been made current. + // Begin GPU and CPU execution. + gpu_core->Start(); + cpu_core_manager.StartThreads(); + + // All threads are started, begin main process execution, now that we're in the clear. + main_process->Run(load_parameters->main_thread_priority, + load_parameters->main_thread_stack_size); status = ResultStatus::Success; return status; diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 93bc5619c..8fcb4eeb1 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -19,17 +19,19 @@ void RunCpuCore(const System& system, Cpu& cpu_state) { } } // Anonymous namespace -CpuCoreManager::CpuCoreManager() = default; +CpuCoreManager::CpuCoreManager(System& system) : system{system} {} CpuCoreManager::~CpuCoreManager() = default; -void CpuCoreManager::Initialize(System& system) { +void CpuCoreManager::Initialize() { barrier = std::make_unique<CpuBarrier>(); exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); for (std::size_t index = 0; index < cores.size(); ++index) { cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); } +} +void CpuCoreManager::StartThreads() { // Create threads for CPU cores 1-3, and build thread_to_cpu map // CPU core 0 is run on the main thread thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h index a4d70ec56..2cbbf8216 100644 --- a/src/core/cpu_core_manager.h +++ b/src/core/cpu_core_manager.h @@ -18,7 +18,7 @@ class System; class CpuCoreManager { public: - CpuCoreManager(); + explicit CpuCoreManager(System& system); CpuCoreManager(const CpuCoreManager&) = delete; CpuCoreManager(CpuCoreManager&&) = delete; @@ -27,7 +27,8 @@ public: CpuCoreManager& operator=(const CpuCoreManager&) = delete; CpuCoreManager& operator=(CpuCoreManager&&) = delete; - void Initialize(System& system); + void Initialize(); + void StartThreads(); void Shutdown(); Cpu& GetCore(std::size_t index); @@ -54,6 +55,8 @@ private: /// Map of guest threads to CPU cores std::map<std::thread::id, Cpu*> thread_to_cpu; + + System& system; }; } // namespace Core diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4d58e7c69..8539fabe4 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -182,7 +182,12 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) { void KernelCore::MakeCurrentProcess(Process* process) { impl->current_process = process; - Memory::SetCurrentPageTable(&process->VMManager().page_table); + + if (process == nullptr) { + return; + } + + Memory::SetCurrentPageTable(*process); } Process* KernelCore::CurrentProcess() { diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 4e94048da..6d7a7e754 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -28,21 +28,20 @@ namespace { * * @param owner_process The parent process for the main thread * @param kernel The kernel instance to create the main thread under. - * @param entry_point The address at which the thread should start execution * @param priority The priority to give the main thread */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { - // Initialize new "main" thread - const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); +void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { + const auto& vm_manager = owner_process.VMManager(); + const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress(); + const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress(); auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, owner_process.GetIdealCore(), stack_top, owner_process); SharedPtr<Thread> thread = std::move(thread_res).Unwrap(); // Register 1 must be a handle to the main thread - const Handle guest_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); - thread->SetGuestHandle(guest_handle); - thread->GetContext().cpu_registers[1] = guest_handle; + const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); + thread->GetContext().cpu_registers[1] = thread_handle; // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires thread->ResumeFromWait(); @@ -106,8 +105,6 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { is_64bit_process = metadata.Is64BitProgram(); vm_manager.Reset(metadata.GetAddressSpaceType()); - // Ensure that the potentially resized page table is seen by CPU backends. - Memory::SetCurrentPageTable(&vm_manager.page_table); const auto& caps = metadata.GetKernelCapabilities(); const auto capability_init_result = @@ -119,7 +116,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { return handle_table.SetSize(capabilities.GetHandleTableSize()); } -void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { +void Process::Run(s32 main_thread_priority, u64 stack_size) { // The kernel always ensures that the given stack size is page aligned. main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); @@ -135,7 +132,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { vm_manager.LogLayout(); ChangeStatus(ProcessStatus::Running); - SetupMainThread(*this, kernel, entry_point, main_thread_priority); + SetupMainThread(*this, kernel, main_thread_priority); } void Process::PrepareForTermination() { @@ -242,9 +239,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); code_memory_size += module_.memory.size(); - - // Clear instruction cache in CPU JIT - system.InvalidateCpuInstructionCaches(); } Process::Process(Core::System& system) diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index dda52f4c0..bf3b7eef3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -225,9 +225,12 @@ public: ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata); /** - * Applies address space changes and launches the process main thread. + * Starts the main application thread for this process. + * + * @param main_thread_priority The priority for the main thread. + * @param stack_size The stack size for the main thread in bytes. */ - void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size); + void Run(s32 main_thread_priority, u64 stack_size); /** * Prepares a process for termination by stopping all of its threads diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index e5d4d6b55..4c763b288 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1189,6 +1189,142 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, query_address); } +static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, + u64 src_address, u64 size) { + LOG_DEBUG(Kernel_SVC, + "called. process_handle=0x{:08X}, dst_address=0x{:016X}, " + "src_address=0x{:016X}, size=0x{:016X}", + process_handle, dst_address, src_address, size); + + if (!Common::Is4KBAligned(src_address)) { + LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", + src_address); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(dst_address)) { + LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", + dst_address); + return ERR_INVALID_ADDRESS; + } + + if (size == 0 || !Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); + return ERR_INVALID_SIZE; + } + + if (!IsValidAddressRange(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range overflows the address space (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsValidAddressRange(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range overflows the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto process = handle_table.Get<Process>(process_handle); + if (!process) { + LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", + process_handle); + return ERR_INVALID_HANDLE; + } + + auto& vm_manager = process->VMManager(); + if (!vm_manager.IsWithinAddressSpace(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range is not within the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.MapCodeMemory(dst_address, src_address, size); +} + +ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, + u64 src_address, u64 size) { + LOG_DEBUG(Kernel_SVC, + "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, " + "size=0x{:016X}", + process_handle, dst_address, src_address, size); + + if (!Common::Is4KBAligned(dst_address)) { + LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", + dst_address); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(src_address)) { + LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", + src_address); + return ERR_INVALID_ADDRESS; + } + + if (size == 0 || Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); + return ERR_INVALID_SIZE; + } + + if (!IsValidAddressRange(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range overflows the address space (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsValidAddressRange(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range overflows the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto process = handle_table.Get<Process>(process_handle); + if (!process) { + LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", + process_handle); + return ERR_INVALID_HANDLE; + } + + auto& vm_manager = process->VMManager(); + if (!vm_manager.IsWithinAddressSpace(src_address, size)) { + LOG_ERROR(Kernel_SVC, + "Source address range is not within the address space (src_address=0x{:016X}, " + "size=0x{:016X}).", + src_address, size); + return ERR_INVALID_ADDRESS_STATE; + } + + if (!vm_manager.IsWithinASLRRegion(dst_address, size)) { + LOG_ERROR(Kernel_SVC, + "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " + "size=0x{:016X}).", + dst_address, size); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.UnmapCodeMemory(dst_address, src_address, size); +} + /// Exits the current process static void ExitProcess(Core::System& system) { auto* current_process = system.Kernel().CurrentProcess(); @@ -1244,20 +1380,22 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e return ERR_INVALID_THREAD_PRIORITY; } - const std::string name = fmt::format("thread-{:X}", entry_point); auto& kernel = system.Kernel(); CASCADE_RESULT(SharedPtr<Thread> thread, - Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top, + Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, *current_process)); - const auto new_guest_handle = current_process->GetHandleTable().Create(thread); - if (new_guest_handle.Failed()) { + const auto new_thread_handle = current_process->GetHandleTable().Create(thread); + if (new_thread_handle.Failed()) { LOG_ERROR(Kernel_SVC, "Failed to create handle with error=0x{:X}", - new_guest_handle.Code().raw); - return new_guest_handle.Code(); + new_thread_handle.Code().raw); + return new_thread_handle.Code(); } - thread->SetGuestHandle(*new_guest_handle); - *out_handle = *new_guest_handle; + *out_handle = *new_thread_handle; + + // Set the thread name for debugging purposes. + thread->SetName( + fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); @@ -2152,7 +2290,7 @@ static const FunctionDef SVC_Table[] = { {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, - {0x36, nullptr, "Unknown"}, + {0x36, nullptr, "SynchronizePreemptionState"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, {0x39, nullptr, "Unknown"}, @@ -2217,8 +2355,8 @@ static const FunctionDef SVC_Table[] = { {0x74, nullptr, "MapProcessMemory"}, {0x75, nullptr, "UnmapProcessMemory"}, {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, - {0x77, nullptr, "MapProcessCodeMemory"}, - {0x78, nullptr, "UnmapProcessCodeMemory"}, + {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, + {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, {0x79, nullptr, "CreateProcess"}, {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index b3690b5f3..865473c6f 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -44,6 +44,13 @@ void SvcWrap(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); } +template <ResultCode func(Core::System&, u32, u64, u64, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), + Param(system, 2), Param(system, 3)) + .raw); +} + template <ResultCode func(Core::System&, u32*)> void SvcWrap(Core::System& system) { u32 param = 0; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 1b891f632..ca52267b2 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -220,11 +220,6 @@ void Thread::SetPriority(u32 priority) { UpdatePriority(); } -void Thread::BoostPriority(u32 priority) { - scheduler->SetThreadPriority(this, priority); - current_priority = priority; -} - void Thread::SetWaitSynchronizationResult(ResultCode result) { context.cpu_registers[0] = result.raw; } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 83c83e45a..411a73b49 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -102,6 +102,11 @@ public: std::string GetName() const override { return name; } + + void SetName(std::string new_name) { + name = std::move(new_name); + } + std::string GetTypeName() const override { return "Thread"; } @@ -136,12 +141,6 @@ public: */ void SetPriority(u32 priority); - /** - * Temporarily boosts the thread's priority until the next time it is scheduled - * @param priority The new priority - */ - void BoostPriority(u32 priority); - /// Adds a thread to the list of threads that are waiting for a lock held by this thread. void AddMutexWaiter(SharedPtr<Thread> thread); @@ -345,10 +344,6 @@ public: arb_wait_address = address; } - void SetGuestHandle(Handle handle) { - guest_handle = handle; - } - bool HasWakeupCallback() const { return wakeup_callback != nullptr; } @@ -442,9 +437,6 @@ private: /// If waiting for an AddressArbiter, this is the address being waited on. VAddr arb_wait_address{0}; - /// Handle used by guest emulated application to access this thread - Handle guest_handle = 0; - /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. Handle callback_handle = 0; diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index ec0a480ce..f0c0c12fc 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -302,6 +302,86 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { return MakeResult<VAddr>(heap_region_base); } +ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { + constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; + const auto src_check_result = CheckRangeState( + src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::All, + VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (src_check_result.Failed()) { + return src_check_result.Code(); + } + + const auto mirror_result = + MirrorMemory(dst_address, src_address, size, MemoryState::ModuleCode); + if (mirror_result.IsError()) { + return mirror_result; + } + + // Ensure we lock the source memory region. + const auto src_vma_result = CarveVMARange(src_address, size); + if (src_vma_result.Failed()) { + return src_vma_result.Code(); + } + auto src_vma_iter = *src_vma_result; + src_vma_iter->second.attribute = MemoryAttribute::Locked; + Reprotect(src_vma_iter, VMAPermission::Read); + + // The destination memory region is fine as is, however we need to make it read-only. + return ReprotectRange(dst_address, size, VMAPermission::Read); +} + +ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { + constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; + const auto src_check_result = CheckRangeState( + src_address, size, MemoryState::All, MemoryState::Heap, VMAPermission::None, + VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked, ignore_attribute); + + if (src_check_result.Failed()) { + return src_check_result.Code(); + } + + // Yes, the kernel only checks the first page of the region. + const auto dst_check_result = + CheckRangeState(dst_address, Memory::PAGE_SIZE, MemoryState::FlagModule, + MemoryState::FlagModule, VMAPermission::None, VMAPermission::None, + MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (dst_check_result.Failed()) { + return dst_check_result.Code(); + } + + const auto dst_memory_state = std::get<MemoryState>(*dst_check_result); + const auto dst_contiguous_check_result = CheckRangeState( + dst_address, size, MemoryState::All, dst_memory_state, VMAPermission::None, + VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::None, ignore_attribute); + + if (dst_contiguous_check_result.Failed()) { + return dst_contiguous_check_result.Code(); + } + + const auto unmap_result = UnmapRange(dst_address, size); + if (unmap_result.IsError()) { + return unmap_result; + } + + // With the mirrored portion unmapped, restore the original region's traits. + const auto src_vma_result = CarveVMARange(src_address, size); + if (src_vma_result.Failed()) { + return src_vma_result.Code(); + } + auto src_vma_iter = *src_vma_result; + src_vma_iter->second.state = MemoryState::Heap; + src_vma_iter->second.attribute = MemoryAttribute::None; + Reprotect(src_vma_iter, VMAPermission::ReadWrite); + + if (dst_memory_state == MemoryState::ModuleCode) { + Core::System::GetInstance().InvalidateCpuInstructionCaches(); + } + + return unmap_result; +} + MemoryInfo VMManager::QueryMemory(VAddr address) const { const auto vma = FindVMA(address); MemoryInfo memory_info{}; diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 6f484b7bf..288eb9450 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -43,6 +43,9 @@ enum class VMAPermission : u8 { ReadExecute = Read | Execute, WriteExecute = Write | Execute, ReadWriteExecute = Read | Write | Execute, + + // Used as a wildcard when checking permissions across memory ranges + All = 0xFF, }; constexpr VMAPermission operator|(VMAPermission lhs, VMAPermission rhs) { @@ -152,6 +155,9 @@ enum class MemoryState : u32 { FlagUncached = 1U << 24, FlagCodeMemory = 1U << 25, + // Wildcard used in range checking to indicate all states. + All = 0xFFFFFFFF, + // Convenience flag sets to reduce repetition IPCFlags = FlagIPC0 | FlagIPC3 | FlagIPC1, @@ -415,6 +421,49 @@ public: /// ResultVal<VAddr> SetHeapSize(u64 size); + /// Maps a region of memory as code memory. + /// + /// @param dst_address The base address of the region to create the aliasing memory region. + /// @param src_address The base address of the region to be aliased. + /// @param size The total amount of memory to map in bytes. + /// + /// @pre Both memory regions lie within the actual addressable address space. + /// + /// @post After this function finishes execution, assuming success, then the address range + /// [dst_address, dst_address+size) will alias the memory region, + /// [src_address, src_address+size). + /// <p> + /// What this also entails is as follows: + /// 1. The aliased region gains the Locked memory attribute. + /// 2. The aliased region becomes read-only. + /// 3. The aliasing region becomes read-only. + /// 4. The aliasing region is created with a memory state of MemoryState::CodeModule. + /// + ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); + + /// Unmaps a region of memory designated as code module memory. + /// + /// @param dst_address The base address of the memory region aliasing the source memory region. + /// @param src_address The base address of the memory region being aliased. + /// @param size The size of the memory region to unmap in bytes. + /// + /// @pre Both memory ranges lie within the actual addressable address space. + /// + /// @pre The memory region being unmapped has been previously been mapped + /// by a call to MapCodeMemory. + /// + /// @post After execution of the function, if successful. the aliasing memory region + /// will be unmapped and the aliased region will have various traits about it + /// restored to what they were prior to the original mapping call preceding + /// this function call. + /// <p> + /// What this also entails is as follows: + /// 1. The state of the memory region will now indicate a general heap region. + /// 2. All memory attributes for the memory region are cleared. + /// 3. Memory permissions for the region are restored to user read/write. + /// + ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64 size); + /// Queries the memory manager for information about the given address. /// /// @param address The address to query the memory manager about for information. diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 90580ed93..c8eaf9488 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -30,7 +30,7 @@ void WaitObject::RemoveWaitingThread(Thread* thread) { waiting_threads.erase(itr); } -SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { +SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() const { Thread* candidate = nullptr; u32 candidate_priority = THREADPRIO_LOWEST + 1; diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h index 04464a51a..3271a30a7 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/wait_object.h @@ -54,7 +54,7 @@ public: void WakeupWaitingThread(SharedPtr<Thread> thread); /// Obtains the highest priority thread that is ready to run from this object's waiting list. - SharedPtr<Thread> GetHighestPriorityReadyThread(); + SharedPtr<Thread> GetHighestPriorityReadyThread() const; /// Get a const reference to the waiting threads list for debug use const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 1f8ed265e..ba7d7acbd 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -137,6 +137,7 @@ private: class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: IManagerForApplication() : ServiceFramework("IManagerForApplication") { + // clang-format off static const FunctionInfo functions[] = { {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"}, {1, &IManagerForApplication::GetAccountId, "GetAccountId"}, @@ -145,7 +146,10 @@ public: {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"}, {150, nullptr, "CreateAuthorizationRequest"}, {160, nullptr, "StoreOpenContext"}, + {170, nullptr, "LoadNetworkServiceLicenseKindAsync"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 5e2030355..d66233cad 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:su") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_SU::GetUserCount, "GetUserCount"}, {1, &ACC_SU::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, @@ -29,6 +31,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, {113, nullptr, "GetSaveDataThumbnailExistence"}, + {130, nullptr, "ActivateOpenContextRetention"}, + {140, nullptr, "ListQualifiedUsers"}, {190, nullptr, "GetUserLastOpenedApplication"}, {191, nullptr, "ActivateOpenContextHolder"}, {200, nullptr, "BeginUserRegistration"}, @@ -48,6 +52,8 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index a4d705b45..182f7c7e5 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:u0") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_U0::GetUserCount, "GetUserCount"}, {1, &ACC_U0::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, {102, nullptr, "AuthenticateApplicationAsync"}, @@ -27,7 +29,13 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {120, nullptr, "CreateGuestLoginRequest"}, {130, nullptr, "LoadOpenContext"}, + {131, nullptr, "ListOpenContextStoredUsers"}, + {140, nullptr, "InitializeApplicationInfo"}, + {141, nullptr, "ListQualifiedUsers"}, + {150, nullptr, "IsUserAccountSwitchLocked"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp index 8fffc93b5..2dd17d935 100644 --- a/src/core/hle/service/acc/acc_u1.cpp +++ b/src/core/hle/service/acc/acc_u1.cpp @@ -8,6 +8,7 @@ namespace Service::Account { ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> profile_manager) : Module::Interface(std::move(module), std::move(profile_manager), "acc:u1") { + // clang-format off static const FunctionInfo functions[] = { {0, &ACC_U1::GetUserCount, "GetUserCount"}, {1, &ACC_U1::GetUserExistence, "GetUserExistence"}, @@ -19,6 +20,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, {60, nullptr, "ListOpenContextStoredUsers"}, + {99, nullptr, "DebugActivateOpenContextRetention"}, {100, nullptr, "GetUserRegistrationNotifier"}, {101, nullptr, "GetUserStateChangeNotifier"}, {102, nullptr, "GetBaasAccountManagerForSystemService"}, @@ -29,12 +31,16 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {111, nullptr, "ClearSaveDataThumbnail"}, {112, nullptr, "LoadSaveDataThumbnail"}, {113, nullptr, "GetSaveDataThumbnailExistence"}, + {130, nullptr, "ActivateOpenContextRetention"}, + {140, nullptr, "ListQualifiedUsers"}, {190, nullptr, "GetUserLastOpenedApplication"}, {191, nullptr, "ActivateOpenContextHolder"}, {997, nullptr, "DebugInvalidateTokenCacheForUser"}, {998, nullptr, "DebugSetUserStateClose"}, {999, nullptr, "DebugSetUserStateOpen"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 85271d418..1aa4ce1ac 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -224,6 +224,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} { {20, nullptr, "InvalidateTransitionLayer"}, {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"}, {40, nullptr, "GetAppletResourceUsageInfo"}, + {41, nullptr, "SetCpuBoostModeForApplet"}, }; // clang-format on @@ -256,6 +257,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, {41, nullptr, "IsSystemBufferSharingEnabled"}, {42, nullptr, "GetSystemSharedLayerHandle"}, + {43, nullptr, "GetSystemSharedBufferHandle"}, {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"}, {51, nullptr, "ApproveToDisplay"}, {60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"}, @@ -269,9 +271,11 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger {68, nullptr, "SetAutoSleepDisabled"}, {69, nullptr, "IsAutoSleepDisabled"}, {70, nullptr, "ReportMultimediaError"}, + {71, nullptr, "GetCurrentIlluminanceEx"}, {80, nullptr, "SetWirelessPriorityMode"}, {90, nullptr, "GetAccumulatedSuspendedTickValue"}, {91, nullptr, "GetAccumulatedSuspendedTickChangedEvent"}, + {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, {1000, nullptr, "GetDebugStorageChannel"}, }; // clang-format on @@ -516,11 +520,20 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q {50, nullptr, "IsVrModeEnabled"}, {51, nullptr, "SetVrModeEnabled"}, {52, nullptr, "SwitchLcdBacklight"}, + {53, nullptr, "BeginVrModeEx"}, + {54, nullptr, "EndVrModeEx"}, {55, nullptr, "IsInControllerFirmwareUpdateSection"}, {60, &ICommonStateGetter::GetDefaultDisplayResolution, "GetDefaultDisplayResolution"}, {61, &ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent, "GetDefaultDisplayResolutionChangeEvent"}, {62, nullptr, "GetHdcpAuthenticationState"}, {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, + {64, nullptr, "SetTvPowerStateMatchingMode"}, + {65, nullptr, "GetApplicationIdByContentActionName"}, + {66, nullptr, "SetCpuBoostMode"}, + {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, + {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, + {91, nullptr, "GetCurrentPerformanceConfiguration"}, + {200, nullptr, "GetOperationModeSystemInfo"}, }; // clang-format on @@ -960,6 +973,8 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF {11, nullptr, "CreateApplicationAndPushAndRequestToStartForQuest"}, {12, nullptr, "CreateApplicationAndRequestToStart"}, {13, &IApplicationFunctions::CreateApplicationAndRequestToStartForQuest, "CreateApplicationAndRequestToStartForQuest"}, + {14, nullptr, "CreateApplicationWithAttributeAndPushAndRequestToStartForQuest"}, + {15, nullptr, "CreateApplicationWithAttributeAndRequestToStartForQuest"}, {20, &IApplicationFunctions::EnsureSaveData, "EnsureSaveData"}, {21, &IApplicationFunctions::GetDesiredLanguage, "GetDesiredLanguage"}, {22, &IApplicationFunctions::SetTerminateResult, "SetTerminateResult"}, @@ -1233,6 +1248,7 @@ IGlobalStateController::IGlobalStateController() : ServiceFramework("IGlobalStat {2, nullptr, "StartSleepSequence"}, {3, nullptr, "StartShutdownSequence"}, {4, nullptr, "StartRebootSequence"}, + {9, nullptr, "IsAutoPowerDownRequested"}, {10, nullptr, "LoadAndApplyIdlePolicySettings"}, {11, nullptr, "NotifyCecSettingsChanged"}, {12, nullptr, "SetDefaultHomeButtonLongPressTime"}, diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index b888f861d..488add8e7 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp @@ -16,6 +16,7 @@ public: std::shared_ptr<AppletMessageQueue> msg_queue) : ServiceFramework("ILibraryAppletProxy"), nvflinger(std::move(nvflinger)), msg_queue(std::move(msg_queue)) { + // clang-format off static const FunctionInfo functions[] = { {0, &ILibraryAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"}, {1, &ILibraryAppletProxy::GetSelfController, "GetSelfController"}, @@ -25,8 +26,11 @@ public: {10, &ILibraryAppletProxy::GetProcessWindingController, "GetProcessWindingController"}, {11, &ILibraryAppletProxy::GetLibraryAppletCreator, "GetLibraryAppletCreator"}, {20, &ILibraryAppletProxy::GetApplicationFunctions, "GetApplicationFunctions"}, + {21, nullptr, "GetAppletCommonFunctions"}, {1000, &ILibraryAppletProxy::GetDebugFunctions, "GetDebugFunctions"}, }; + // clang-format on + RegisterHandlers(functions); } @@ -113,6 +117,7 @@ public: std::shared_ptr<AppletMessageQueue> msg_queue) : ServiceFramework("ISystemAppletProxy"), nvflinger(std::move(nvflinger)), msg_queue(std::move(msg_queue)) { + // clang-format off static const FunctionInfo functions[] = { {0, &ISystemAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"}, {1, &ISystemAppletProxy::GetSelfController, "GetSelfController"}, @@ -124,8 +129,11 @@ public: {20, &ISystemAppletProxy::GetHomeMenuFunctions, "GetHomeMenuFunctions"}, {21, &ISystemAppletProxy::GetGlobalStateController, "GetGlobalStateController"}, {22, &ISystemAppletProxy::GetApplicationCreator, "GetApplicationCreator"}, + {23, nullptr, "GetAppletCommonFunctions"}, {1000, &ISystemAppletProxy::GetDebugFunctions, "GetDebugFunctions"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 2d768d9fc..51d8c26b4 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -50,6 +50,7 @@ static std::vector<u64> AccumulateAOCTitleIDs() { } AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs()) { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "CountAddOnContentByApplicationId"}, {1, nullptr, "ListAddOnContentByApplicationId"}, @@ -60,7 +61,10 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs {6, nullptr, "PrepareAddOnContentByApplicationId"}, {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, + {100, nullptr, "CreateEcPurchasedEventManager"}, }; + // clang-format on + RegisterHandlers(functions); auto& kernel = Core::System::GetInstance().Kernel(); diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index fcacbab72..d058c0245 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp @@ -87,6 +87,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { {3, nullptr, "GetLastThrottlingState"}, {4, nullptr, "ClearLastThrottlingState"}, {5, nullptr, "LoadAndApplySettings"}, + {6, nullptr, "SetCpuBoostMode"}, + {7, nullptr, "GetCurrentPerformanceConfiguration"}, }; // clang-format on diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index e5daefdde..d7f1d348d 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -25,6 +25,7 @@ public: {11, nullptr, "GetAudioInBufferCount"}, {12, nullptr, "SetAudioInDeviceGain"}, {13, nullptr, "GetAudioInDeviceGain"}, + {14, nullptr, "FlushAudioInBuffers"}, }; // clang-format on diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 39acb7b23..12875fb42 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -44,7 +44,7 @@ public: std::string&& unique_name) : ServiceFramework("IAudioOut"), audio_core(audio_core), device_name(std::move(device_name)), audio_params(audio_params) { - + // clang-format off static const FunctionInfo functions[] = { {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"}, {1, &IAudioOut::StartAudioOut, "StartAudioOut"}, @@ -58,7 +58,10 @@ public: {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"}, {10, nullptr, "GetAudioOutPlayedSampleCount"}, {11, nullptr, "FlushAudioOutBuffers"}, + {12, nullptr, "SetAudioOutVolume"}, + {13, nullptr, "GetAudioOutVolume"}, }; + // clang-format on RegisterHandlers(functions); // This is the event handle used to check if the audio buffer was released diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 59ef603e1..974ff8e1a 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp @@ -154,7 +154,8 @@ public: {96, nullptr, "GetLeHidEventInfo"}, {97, nullptr, "RegisterBleHidEvent"}, {98, nullptr, "SetLeScanParameter"}, - {256, nullptr, "GetIsManufacturingMode"} + {256, nullptr, "GetIsManufacturingMode"}, + {257, nullptr, "EmulateBluetoothCrash"}, }; // clang-format on diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp index ae7b0720b..907f464ab 100644 --- a/src/core/hle/service/caps/caps.cpp +++ b/src/core/hle/service/caps/caps.cpp @@ -15,32 +15,41 @@ public: explicit CAPS_A() : ServiceFramework{"caps:a"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "Unknown1"}, - {1, nullptr, "Unknown2"}, - {2, nullptr, "Unknown3"}, - {3, nullptr, "Unknown4"}, - {4, nullptr, "Unknown5"}, - {5, nullptr, "Unknown6"}, - {6, nullptr, "Unknown7"}, - {7, nullptr, "Unknown8"}, - {8, nullptr, "Unknown9"}, - {9, nullptr, "Unknown10"}, - {10, nullptr, "Unknown11"}, - {11, nullptr, "Unknown12"}, - {12, nullptr, "Unknown13"}, - {13, nullptr, "Unknown14"}, - {14, nullptr, "Unknown15"}, - {301, nullptr, "Unknown16"}, - {401, nullptr, "Unknown17"}, - {501, nullptr, "Unknown18"}, - {1001, nullptr, "Unknown19"}, - {1002, nullptr, "Unknown20"}, - {8001, nullptr, "Unknown21"}, - {8002, nullptr, "Unknown22"}, - {8011, nullptr, "Unknown23"}, - {8012, nullptr, "Unknown24"}, - {8021, nullptr, "Unknown25"}, - {10011, nullptr, "Unknown26"}, + {0, nullptr, "GetAlbumFileCount"}, + {1, nullptr, "GetAlbumFileList"}, + {2, nullptr, "LoadAlbumFile"}, + {3, nullptr, "DeleteAlbumFile"}, + {4, nullptr, "StorageCopyAlbumFile"}, + {5, nullptr, "IsAlbumMounted"}, + {6, nullptr, "GetAlbumUsage"}, + {7, nullptr, "GetAlbumFileSize"}, + {8, nullptr, "LoadAlbumFileThumbnail"}, + {9, nullptr, "LoadAlbumScreenShotImage"}, + {10, nullptr, "LoadAlbumScreenShotThumbnailImage"}, + {11, nullptr, "GetAlbumEntryFromApplicationAlbumEntry"}, + {12, nullptr, "Unknown12"}, + {13, nullptr, "Unknown13"}, + {14, nullptr, "Unknown14"}, + {15, nullptr, "Unknown15"}, + {16, nullptr, "Unknown16"}, + {17, nullptr, "Unknown17"}, + {18, nullptr, "Unknown18"}, + {202, nullptr, "SaveEditedScreenShot"}, + {301, nullptr, "GetLastThumbnail"}, + {401, nullptr, "GetAutoSavingStorage"}, + {501, nullptr, "GetRequiredStorageSpaceSizeToCopyAll"}, + {1001, nullptr, "Unknown1001"}, + {1002, nullptr, "Unknown1002"}, + {1003, nullptr, "Unknown1003"}, + {8001, nullptr, "ForceAlbumUnmounted"}, + {8002, nullptr, "ResetAlbumMountStatus"}, + {8011, nullptr, "RefreshAlbumCache"}, + {8012, nullptr, "GetAlbumCache"}, + {8013, nullptr, "Unknown8013"}, + {8021, nullptr, "GetAlbumEntryFromApplicationAlbumEntryAruid"}, + {10011, nullptr, "SetInternalErrorConversionEnabled"}, + {50000, nullptr, "Unknown50000"}, + {60002, nullptr, "Unknown60002"}, }; // clang-format on @@ -53,16 +62,17 @@ public: explicit CAPS_C() : ServiceFramework{"caps:c"} { // clang-format off static const FunctionInfo functions[] = { - {2001, nullptr, "Unknown1"}, - {2002, nullptr, "Unknown2"}, - {2011, nullptr, "Unknown3"}, - {2012, nullptr, "Unknown4"}, - {2013, nullptr, "Unknown5"}, - {2014, nullptr, "Unknown6"}, - {2101, nullptr, "Unknown7"}, - {2102, nullptr, "Unknown8"}, - {2201, nullptr, "Unknown9"}, - {2301, nullptr, "Unknown10"}, + {33, nullptr, "Unknown33"}, + {2001, nullptr, "Unknown2001"}, + {2002, nullptr, "Unknown2002"}, + {2011, nullptr, "Unknown2011"}, + {2012, nullptr, "Unknown2012"}, + {2013, nullptr, "Unknown2013"}, + {2014, nullptr, "Unknown2014"}, + {2101, nullptr, "Unknown2101"}, + {2102, nullptr, "Unknown2102"}, + {2201, nullptr, "Unknown2201"}, + {2301, nullptr, "Unknown2301"}, }; // clang-format on @@ -127,11 +137,18 @@ public: explicit CAPS_U() : ServiceFramework{"caps:u"} { // clang-format off static const FunctionInfo functions[] = { + {32, nullptr, "SetShimLibraryVersion"}, {102, nullptr, "GetAlbumFileListByAruid"}, {103, nullptr, "DeleteAlbumFileByAruid"}, {104, nullptr, "GetAlbumFileSizeByAruid"}, + {105, nullptr, "DeleteAlbumFileByAruidForDebug"}, {110, nullptr, "LoadAlbumScreenShotImageByAruid"}, {120, nullptr, "LoadAlbumScreenShotThumbnailImageByAruid"}, + {130, nullptr, "PrecheckToCreateContentsByAruid"}, + {140, nullptr, "GetAlbumFileList1AafeAruidDeprecated"}, + {141, nullptr, "GetAlbumFileList2AafeUidAruidDeprecated"}, + {142, nullptr, "GetAlbumFileList3AaeAruid"}, + {143, nullptr, "GetAlbumFileList4AaeUidAruid"}, {60002, nullptr, "OpenAccessorSessionForApplication"}, }; // clang-format on diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 0249b6992..e7df8fd98 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -664,10 +664,13 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {100, nullptr, "OpenImageDirectoryFileSystem"}, {110, nullptr, "OpenContentStorageFileSystem"}, {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"}, + {130, nullptr, "OpenCustomStorageFileSystem"}, {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"}, {201, nullptr, "OpenDataStorageByProgramId"}, {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"}, + {204, nullptr, "OpenDataFileSystemByProgramIndex"}, + {205, nullptr, "OpenDataStorageByProgramIndex"}, {400, nullptr, "OpenDeviceOperator"}, {500, nullptr, "OpenSdCardDetectionEventNotifier"}, {501, nullptr, "OpenGameCardDetectionEventNotifier"}, @@ -691,6 +694,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {614, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId"}, {615, nullptr, "QuerySaveDataInternalStorageTotalSize"}, {616, nullptr, "GetSaveDataCommitId"}, + {617, nullptr, "UnregisterExternalKey"}, {620, nullptr, "SetSdCardEncryptionSeed"}, {630, nullptr, "SetSdCardAccessibility"}, {631, nullptr, "IsSdCardAccessible"}, @@ -701,6 +705,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {710, nullptr, "ResolveAccessFailure"}, {720, nullptr, "AbandonAccessFailure"}, {800, nullptr, "GetAndClearFileSystemProxyErrorInfo"}, + {810, nullptr, "RegisterProgramIndexMapInfo"}, {1000, nullptr, "SetBisRootForHost"}, {1001, nullptr, "SetSaveDataSize"}, {1002, nullptr, "SetSaveDataRootPath"}, @@ -711,6 +716,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { {1007, nullptr, "RegisterUpdatePartition"}, {1008, nullptr, "OpenRegisteredUpdatePartition"}, {1009, nullptr, "GetAndClearMemoryReportInfo"}, + {1010, nullptr, "SetDataStorageRedirectTarget"}, + {1011, nullptr, "OutputAccessLogToSdCard2"}, {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, {1200, nullptr, "OpenMultiCommitManager"}, diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index d9225d624..5100e376c 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -12,6 +12,7 @@ namespace Service::Friend { class IFriendService final : public ServiceFramework<IFriendService> { public: IFriendService() : ServiceFramework("IFriendService") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetCompletionEvent"}, {1, nullptr, "Cancel"}, @@ -24,8 +25,7 @@ public: {10400, nullptr, "GetBlockedUserListIds"}, {10500, nullptr, "GetProfileList"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"}, - {10601, &IFriendService::DeclareCloseOnlinePlaySession, - "DeclareCloseOnlinePlaySession"}, + {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, {10610, &IFriendService::UpdateUserPresence, "UpdateUserPresence"}, {10700, nullptr, "GetPlayHistoryRegistrationKey"}, {10701, nullptr, "GetPlayHistoryRegistrationKeyWithNetworkServiceAccountId"}, @@ -88,6 +88,7 @@ public: {30830, nullptr, "ClearPlayLog"}, {49900, nullptr, "DeleteNetworkServiceAccountCache"}, }; + // clang-format on RegisterHandlers(functions); } diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 63b55758b..a4ad95d96 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -210,6 +210,7 @@ Hid::Hid() : ServiceFramework("hid") { {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, + {134, nullptr, "SetNpadAnalogStickUseCenterClamp"}, {200, &Hid::GetVibrationDeviceInfo, "GetVibrationDeviceInfo"}, {201, &Hid::SendVibrationValue, "SendVibrationValue"}, {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, @@ -221,6 +222,7 @@ Hid::Hid() : ServiceFramework("hid") { {208, nullptr, "GetActualVibrationGcErmCommand"}, {209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"}, {210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"}, + {211, nullptr, "IsVibrationDeviceMounted"}, {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, {302, nullptr, "StopConsoleSixAxisSensor"}, @@ -265,6 +267,7 @@ Hid::Hid() : ServiceFramework("hid") { {523, nullptr, "SetIsPalmaPairedConnectable"}, {524, nullptr, "PairPalma"}, {525, &Hid::SetPalmaBoostMode, "SetPalmaBoostMode"}, + {526, nullptr, "CancelWritePalmaWaveEntry"}, {1000, nullptr, "SetNpadCommunicationMode"}, {1001, nullptr, "GetNpadCommunicationMode"}, }; @@ -797,12 +800,22 @@ public: {232, nullptr, "EnableShipmentMode"}, {233, nullptr, "ClearPairingInfo"}, {234, nullptr, "GetUniquePadDeviceTypeSetInternal"}, + {235, nullptr, "EnableAnalogStickPower"}, {301, nullptr, "GetAbstractedPadHandles"}, {302, nullptr, "GetAbstractedPadState"}, {303, nullptr, "GetAbstractedPadsState"}, {321, nullptr, "SetAutoPilotVirtualPadState"}, {322, nullptr, "UnsetAutoPilotVirtualPadState"}, {323, nullptr, "UnsetAllAutoPilotVirtualPadState"}, + {324, nullptr, "AttachHdlsWorkBuffer"}, + {325, nullptr, "ReleaseHdlsWorkBuffer"}, + {326, nullptr, "DumpHdlsNpadAssignmentState"}, + {327, nullptr, "DumpHdlsStates"}, + {328, nullptr, "ApplyHdlsNpadAssignmentState"}, + {329, nullptr, "ApplyHdlsStateList"}, + {330, nullptr, "AttachHdlsVirtualDevice"}, + {331, nullptr, "DetachHdlsVirtualDevice"}, + {332, nullptr, "SetHdlsState"}, {350, nullptr, "AddRegisteredDevice"}, {400, nullptr, "DisableExternalMcuOnNxDevice"}, {401, nullptr, "DisableRailDeviceFiltering"}, @@ -825,6 +838,7 @@ public: {131, nullptr, "ActivateSleepButton"}, {141, nullptr, "AcquireCaptureButtonEventHandle"}, {151, nullptr, "ActivateCaptureButton"}, + {161, nullptr, "GetPlatformConfig"}, {210, nullptr, "AcquireNfcDeviceUpdateEventHandle"}, {211, nullptr, "GetNpadsWithNfc"}, {212, nullptr, "AcquireNfcActivateEventHandle"}, @@ -894,6 +908,7 @@ public: {827, nullptr, "IsAnalogStickButtonPressed"}, {828, nullptr, "IsAnalogStickInReleasePosition"}, {829, nullptr, "IsAnalogStickInCircumference"}, + {830, nullptr, "SetNotificationLedPattern"}, {850, nullptr, "IsUsbFullKeyControllerEnabled"}, {851, nullptr, "EnableUsbFullKeyController"}, {852, nullptr, "IsUsbConnected"}, diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index e250595e3..ed5059047 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -52,9 +52,11 @@ public: } }; -class ILocalCommunicationService final : public ServiceFramework<ILocalCommunicationService> { +class ISystemLocalCommunicationService final + : public ServiceFramework<ISystemLocalCommunicationService> { public: - explicit ILocalCommunicationService(const char* name) : ServiceFramework{name} { + explicit ISystemLocalCommunicationService() + : ServiceFramework{"ISystemLocalCommunicationService"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "GetState"}, @@ -84,6 +86,50 @@ public: {304, nullptr, "Disconnect"}, {400, nullptr, "InitializeSystem"}, {401, nullptr, "FinalizeSystem"}, + {402, nullptr, "SetOperationMode"}, + {403, nullptr, "InitializeSystem2"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IUserLocalCommunicationService final + : public ServiceFramework<IUserLocalCommunicationService> { +public: + explicit IUserLocalCommunicationService() : ServiceFramework{"IUserLocalCommunicationService"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "GetState"}, + {1, nullptr, "GetNetworkInfo"}, + {2, nullptr, "GetIpv4Address"}, + {3, nullptr, "GetDisconnectReason"}, + {4, nullptr, "GetSecurityParameter"}, + {5, nullptr, "GetNetworkConfig"}, + {100, nullptr, "AttachStateChangeEvent"}, + {101, nullptr, "GetNetworkInfoLatestUpdate"}, + {102, nullptr, "Scan"}, + {103, nullptr, "ScanPrivate"}, + {104, nullptr, "SetWirelessControllerRestriction"}, + {200, nullptr, "OpenAccessPoint"}, + {201, nullptr, "CloseAccessPoint"}, + {202, nullptr, "CreateNetwork"}, + {203, nullptr, "CreateNetworkPrivate"}, + {204, nullptr, "DestroyNetwork"}, + {205, nullptr, "Reject"}, + {206, nullptr, "SetAdvertiseData"}, + {207, nullptr, "SetStationAcceptPolicy"}, + {208, nullptr, "AddAcceptFilterEntry"}, + {209, nullptr, "ClearAcceptFilter"}, + {300, nullptr, "OpenStation"}, + {301, nullptr, "CloseStation"}, + {302, nullptr, "Connect"}, + {303, nullptr, "ConnectPrivate"}, + {304, nullptr, "Disconnect"}, + {400, nullptr, "Initialize"}, + {401, nullptr, "Finalize"}, + {402, nullptr, "SetOperationMode"}, }; // clang-format on @@ -108,7 +154,7 @@ public: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILocalCommunicationService>("ISystemLocalCommunicationService"); + rb.PushIpcInterface<ISystemLocalCommunicationService>(); } }; @@ -129,7 +175,7 @@ public: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILocalCommunicationService>("IUserLocalCommunicationService"); + rb.PushIpcInterface<IUserLocalCommunicationService>(); } }; diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 609102f2c..5af925515 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -86,6 +86,7 @@ public: {2, &RelocatableObject::LoadNrr, "LoadNrr"}, {3, &RelocatableObject::UnloadNrr, "UnloadNrr"}, {4, &RelocatableObject::Initialize, "Initialize"}, + {10, nullptr, "LoadNrrEx"}, }; // clang-format on diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 60479bb45..f92571008 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -15,12 +15,16 @@ namespace Service::NIFM { class IScanRequest final : public ServiceFramework<IScanRequest> { public: explicit IScanRequest() : ServiceFramework("IScanRequest") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "Submit"}, {1, nullptr, "IsProcessing"}, {2, nullptr, "GetResult"}, {3, nullptr, "GetSystemEventReadableHandle"}, + {4, nullptr, "SetChannels"}, }; + // clang-format on + RegisterHandlers(functions); } }; diff --git a/src/core/hle/service/npns/npns.cpp b/src/core/hle/service/npns/npns.cpp index ccb6f9da9..8751522ca 100644 --- a/src/core/hle/service/npns/npns.cpp +++ b/src/core/hle/service/npns/npns.cpp @@ -45,7 +45,7 @@ public: {114, nullptr, "AttachJid"}, {115, nullptr, "DetachJid"}, {201, nullptr, "RequestChangeStateForceTimed"}, - {102, nullptr, "RequestChangeStateForceAsync"}, + {202, nullptr, "RequestChangeStateForceAsync"}, }; // clang-format on @@ -73,6 +73,7 @@ public: {103, nullptr, "GetState"}, {104, nullptr, "GetStatistics"}, {111, nullptr, "GetJid"}, + {120, nullptr, "CreateNotificationReceiver"}, }; // clang-format on diff --git a/src/core/hle/service/pctl/module.cpp b/src/core/hle/service/pctl/module.cpp index 6081f41e1..c75b4ee34 100644 --- a/src/core/hle/service/pctl/module.cpp +++ b/src/core/hle/service/pctl/module.cpp @@ -12,10 +12,10 @@ namespace Service::PCTL { class IParentalControlService final : public ServiceFramework<IParentalControlService> { public: IParentalControlService() : ServiceFramework("IParentalControlService") { + // clang-format off static const FunctionInfo functions[] = { {1, &IParentalControlService::Initialize, "Initialize"}, - {1001, &IParentalControlService::CheckFreeCommunicationPermission, - "CheckFreeCommunicationPermission"}, + {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"}, {1002, nullptr, "ConfirmLaunchApplicationPermission"}, {1003, nullptr, "ConfirmResumeApplicationPermission"}, {1004, nullptr, "ConfirmSnsPostPermission"}, @@ -30,6 +30,7 @@ public: {1013, nullptr, "ConfirmStereoVisionPermission"}, {1014, nullptr, "ConfirmPlayableApplicationVideoOld"}, {1015, nullptr, "ConfirmPlayableApplicationVideo"}, + {1016, nullptr, "ConfirmShowNewsPermission"}, {1031, nullptr, "IsRestrictionEnabled"}, {1032, nullptr, "GetSafetyLevel"}, {1033, nullptr, "SetSafetyLevel"}, @@ -45,6 +46,7 @@ public: {1045, nullptr, "UpdateFreeCommunicationApplicationList"}, {1046, nullptr, "DisableFeaturesForReset"}, {1047, nullptr, "NotifyApplicationDownloadStarted"}, + {1048, nullptr, "NotifyNetworkProfileCreated"}, {1061, nullptr, "ConfirmStereoVisionRestrictionConfigurable"}, {1062, nullptr, "GetStereoVisionRestriction"}, {1063, nullptr, "SetStereoVisionRestriction"}, @@ -63,6 +65,7 @@ public: {1411, nullptr, "GetPairingAccountInfo"}, {1421, nullptr, "GetAccountNickname"}, {1424, nullptr, "GetAccountState"}, + {1425, nullptr, "RequestPostEvents"}, {1432, nullptr, "GetSynchronizationEvent"}, {1451, nullptr, "StartPlayTimer"}, {1452, nullptr, "StopPlayTimer"}, diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index 6b27dc4a3..ebcc41a43 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp @@ -42,15 +42,18 @@ private: class DebugMonitor final : public ServiceFramework<DebugMonitor> { public: explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { + // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "IsDebugMode"}, - {1, nullptr, "GetDebugProcesses"}, - {2, nullptr, "StartDebugProcess"}, - {3, nullptr, "GetTitlePid"}, - {4, nullptr, "EnableDebugForTitleId"}, - {5, nullptr, "GetApplicationPid"}, - {6, nullptr, "EnableDebugForApplication"}, + {0, nullptr, "GetDebugProcesses"}, + {1, nullptr, "StartDebugProcess"}, + {2, nullptr, "GetTitlePid"}, + {3, nullptr, "EnableDebugForTitleId"}, + {4, nullptr, "GetApplicationPid"}, + {5, nullptr, "EnableDebugForApplication"}, + {6, nullptr, "DisableDebug"}, }; + // clang-format on + RegisterHandlers(functions); } }; @@ -68,6 +71,7 @@ public: class Shell final : public ServiceFramework<Shell> { public: explicit Shell() : ServiceFramework{"pm:shell"} { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "LaunchProcess"}, {1, nullptr, "TerminateProcessByPid"}, @@ -77,7 +81,10 @@ public: {5, nullptr, "NotifyBootFinished"}, {6, nullptr, "GetApplicationPid"}, {7, nullptr, "BoostSystemMemoryResourceLimit"}, + {8, nullptr, "EnableAdditionalSystemThreads"}, }; + // clang-format on + RegisterHandlers(functions); } }; diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index 1afc43f75..4ecb6bcef 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -116,6 +116,7 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { } SET::SET() : ServiceFramework("set") { + // clang-format off static const FunctionInfo functions[] = { {0, &SET::GetLanguageCode, "GetLanguageCode"}, {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, @@ -126,7 +127,10 @@ SET::SET() : ServiceFramework("set") { {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, {7, nullptr, "GetKeyCodeMap"}, {8, nullptr, "GetQuestFlag"}, + {9, nullptr, "GetKeyCodeMap2"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/set/set_cal.cpp b/src/core/hle/service/set/set_cal.cpp index 34654bb07..5981c575c 100644 --- a/src/core/hle/service/set/set_cal.cpp +++ b/src/core/hle/service/set/set_cal.cpp @@ -40,7 +40,7 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") { {30, nullptr, "GetAmiiboEcqvBlsCertificate"}, {31, nullptr, "GetAmiiboEcqvBlsRootCertificate"}, {32, nullptr, "GetUsbTypeCPowerSourceCircuitVersion"}, - {33, nullptr, "GetBatteryVersion"}, + {41, nullptr, "GetBatteryVersion"}, }; RegisterHandlers(functions); } diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index ecee554bf..98d0cfdfd 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -104,6 +104,7 @@ void SET_SYS::SetColorSetId(Kernel::HLERequestContext& ctx) { } SET_SYS::SET_SYS() : ServiceFramework("set:sys") { + // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "SetLanguageCode"}, {1, nullptr, "SetNetworkSettings"}, @@ -252,7 +253,33 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") { {147, nullptr, "GetConsoleSixAxisSensorAngularAcceleration"}, {148, nullptr, "SetConsoleSixAxisSensorAngularAcceleration"}, {149, nullptr, "GetRebootlessSystemUpdateVersion"}, + {150, nullptr, "GetDeviceTimeZoneLocationUpdatedTime"}, + {151, nullptr, "SetDeviceTimeZoneLocationUpdatedTime"}, + {152, nullptr, "GetUserSystemClockAutomaticCorrectionUpdatedTime"}, + {153, nullptr, "SetUserSystemClockAutomaticCorrectionUpdatedTime"}, + {154, nullptr, "GetAccountOnlineStorageSettings"}, + {155, nullptr, "SetAccountOnlineStorageSettings"}, + {156, nullptr, "GetPctlReadyFlag"}, + {157, nullptr, "SetPctlReadyFlag"}, + {162, nullptr, "GetPtmBatteryVersion"}, + {163, nullptr, "SetPtmBatteryVersion"}, + {164, nullptr, "GetUsb30HostEnableFlag"}, + {165, nullptr, "SetUsb30HostEnableFlag"}, + {166, nullptr, "GetUsb30DeviceEnableFlag"}, + {167, nullptr, "SetUsb30DeviceEnableFlag"}, + {168, nullptr, "GetThemeId"}, + {169, nullptr, "SetThemeId"}, + {170, nullptr, "GetChineseTraditionalInputMethod"}, + {171, nullptr, "SetChineseTraditionalInputMethod"}, + {172, nullptr, "GetPtmCycleCountReliability"}, + {173, nullptr, "SetPtmCycleCountReliability"}, + {175, nullptr, "GetThemeSettings"}, + {176, nullptr, "SetThemeSettings"}, + {177, nullptr, "GetThemeKey"}, + {178, nullptr, "SetThemeKey"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 4342f3b2d..884ad173b 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -73,6 +73,7 @@ void BSD::Close(Kernel::HLERequestContext& ctx) { } BSD::BSD(const char* name) : ServiceFramework(name) { + // clang-format off static const FunctionInfo functions[] = { {0, &BSD::RegisterClient, "RegisterClient"}, {1, &BSD::StartMonitoring, "StartMonitoring"}, @@ -105,7 +106,11 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {28, nullptr, "GetResourceStatistics"}, {29, nullptr, "RecvMMsg"}, {30, nullptr, "SendMMsg"}, + {31, nullptr, "EventFd"}, + {32, nullptr, "RegisterResourceStatisticsName"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp index f7f87a958..65040c077 100644 --- a/src/core/hle/service/ssl/ssl.cpp +++ b/src/core/hle/service/ssl/ssl.cpp @@ -103,6 +103,8 @@ public: {4, nullptr, "DebugIoctl"}, {5, &SSL::SetInterfaceVersion, "SetInterfaceVersion"}, {6, nullptr, "FlushSessionCache"}, + {7, nullptr, "SetDebugOption"}, + {8, nullptr, "GetDebugOption"}, }; // clang-format on diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp index b3a196f65..8d122ae33 100644 --- a/src/core/hle/service/time/interface.cpp +++ b/src/core/hle/service/time/interface.cpp @@ -8,6 +8,7 @@ namespace Service::Time { Time::Time(std::shared_ptr<Module> time, const char* name) : Module::Interface(std::move(time), name) { + // clang-format off static const FunctionInfo functions[] = { {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"}, {1, &Time::GetStandardNetworkSystemClock, "GetStandardNetworkSystemClock"}, @@ -15,18 +16,23 @@ Time::Time(std::shared_ptr<Module> time, const char* name) {3, &Time::GetTimeZoneService, "GetTimeZoneService"}, {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"}, {5, nullptr, "GetEphemeralNetworkSystemClock"}, + {20, nullptr, "GetSharedMemoryNativeHandle"}, + {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"}, + {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"}, {50, nullptr, "SetStandardSteadyClockInternalOffset"}, {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"}, {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"}, {102, nullptr, "GetStandardUserSystemClockInitialYear"}, {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"}, + {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"}, {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"}, {400, &Time::GetClockSnapshot, "GetClockSnapshot"}, {401, nullptr, "GetClockSnapshotFromSystemClockContext"}, - {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, - "CalculateStandardUserSystemClockDifferenceByUser"}, + {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"}, {501, nullptr, "CalculateSpanBetween"}, }; + // clang-format on + RegisterHandlers(functions); } diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 07aa7a1cd..10b13fb1d 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -86,25 +86,29 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua return FileType::Error; } -ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) { +AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirectory::Load( + Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } if (dir == nullptr) { - if (file == nullptr) - return ResultStatus::ErrorNullFile; + if (file == nullptr) { + return {ResultStatus::ErrorNullFile, {}}; + } + dir = file->GetContainingDirectory(); } // Read meta to determine title ID FileSys::VirtualFile npdm = dir->GetFile("main.npdm"); - if (npdm == nullptr) - return ResultStatus::ErrorMissingNPDM; + if (npdm == nullptr) { + return {ResultStatus::ErrorMissingNPDM, {}}; + } - ResultStatus result = metadata.Load(npdm); + const ResultStatus result = metadata.Load(npdm); if (result != ResultStatus::Success) { - return result; + return {result, {}}; } if (override_update) { @@ -114,23 +118,24 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) // Reread in case PatchExeFS affected the main.npdm npdm = dir->GetFile("main.npdm"); - if (npdm == nullptr) - return ResultStatus::ErrorMissingNPDM; + if (npdm == nullptr) { + return {ResultStatus::ErrorMissingNPDM, {}}; + } - ResultStatus result2 = metadata.Load(npdm); + const ResultStatus result2 = metadata.Load(npdm); if (result2 != ResultStatus::Success) { - return result2; + return {result2, {}}; } metadata.Print(); const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { - return ResultStatus::Error32BitISA; + return {ResultStatus::Error32BitISA, {}}; } if (process.LoadFromMetadata(metadata).IsError()) { - return ResultStatus::ErrorUnableToParseKernelMetadata; + return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } const FileSys::PatchManager pm(metadata.GetTitleID()); @@ -150,7 +155,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) const auto tentative_next_load_addr = AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm); if (!tentative_next_load_addr) { - return ResultStatus::ErrorLoadingNSO; + return {ResultStatus::ErrorLoadingNSO, {}}; } next_load_addr = *tentative_next_load_addr; @@ -159,8 +164,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); } - process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()); - // Find the RomFS by searching for a ".romfs" file in this directory const auto& files = dir->GetFiles(); const auto romfs_iter = @@ -175,7 +178,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) } is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()}}; } ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h index 1615cb5a8..1a65c16a4 100644 --- a/src/core/loader/deconstructed_rom_directory.h +++ b/src/core/loader/deconstructed_rom_directory.h @@ -37,7 +37,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; ResultStatus ReadIcon(std::vector<u8>& buffer) override; diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 46ac372f6..6d4b02375 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -382,13 +382,15 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { - if (is_loaded) - return ResultStatus::ErrorAlreadyLoaded; +AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { + if (is_loaded) { + return {ResultStatus::ErrorAlreadyLoaded, {}}; + } std::vector<u8> buffer = file->ReadAllBytes(); - if (buffer.size() != file->GetSize()) - return ResultStatus::ErrorIncorrectELFFileSize; + if (buffer.size() != file->GetSize()) { + return {ResultStatus::ErrorIncorrectELFFileSize, {}}; + } const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); ElfReader elf_reader(&buffer[0]); @@ -396,10 +398,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { const VAddr entry_point = codeset.entrypoint; process.LoadModule(std::move(codeset), entry_point); - process.Run(entry_point, 48, Memory::DEFAULT_STACK_SIZE); is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}}; } } // namespace Loader diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index a2d33021c..7ef7770a6 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -26,7 +26,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; }; } // namespace Loader diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index bb925f4a6..f7846db52 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -131,6 +131,12 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status); /// Interface for loading an application class AppLoader : NonCopyable { public: + struct LoadParameters { + s32 main_thread_priority; + u64 main_thread_stack_size; + }; + using LoadResult = std::pair<ResultStatus, std::optional<LoadParameters>>; + explicit AppLoader(FileSys::VirtualFile file); virtual ~AppLoader(); @@ -145,7 +151,7 @@ public: * @param process The newly created process. * @return The status result of the operation. */ - virtual ResultStatus Load(Kernel::Process& process) = 0; + virtual LoadResult Load(Kernel::Process& process) = 0; /** * Loads the system mode that this application needs. diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp index 93a970d10..34efef09a 100644 --- a/src/core/loader/nax.cpp +++ b/src/core/loader/nax.cpp @@ -41,31 +41,37 @@ FileType AppLoader_NAX::GetFileType() const { return IdentifyTypeImpl(*nax); } -ResultStatus AppLoader_NAX::Load(Kernel::Process& process) { +AppLoader_NAX::LoadResult AppLoader_NAX::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (nax->GetStatus() != ResultStatus::Success) - return nax->GetStatus(); + const auto nax_status = nax->GetStatus(); + if (nax_status != ResultStatus::Success) { + return {nax_status, {}}; + } const auto nca = nax->AsNCA(); if (nca == nullptr) { - if (!Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; - return ResultStatus::ErrorNAXInconvertibleToNCA; + if (!Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } + + return {ResultStatus::ErrorNAXInconvertibleToNCA, {}}; } - if (nca->GetStatus() != ResultStatus::Success) - return nca->GetStatus(); + const auto nca_status = nca->GetStatus(); + if (nca_status != ResultStatus::Success) { + return {nca_status, {}}; + } const auto result = nca_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h index f40079574..00f1659c1 100644 --- a/src/core/loader/nax.h +++ b/src/core/loader/nax.h @@ -33,7 +33,7 @@ public: FileType GetFileType() const override; - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index ce8196fcf..b3f8f1083 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp @@ -30,36 +30,38 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_NCA::Load(Kernel::Process& process) { +AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } const auto result = nca->GetStatus(); if (result != ResultStatus::Success) { - return result; + return {result, {}}; } - if (nca->GetType() != FileSys::NCAContentType::Program) - return ResultStatus::ErrorNCANotProgram; + if (nca->GetType() != FileSys::NCAContentType::Program) { + return {ResultStatus::ErrorNCANotProgram, {}}; + } const auto exefs = nca->GetExeFS(); - - if (exefs == nullptr) - return ResultStatus::ErrorNoExeFS; + if (exefs == nullptr) { + return {ResultStatus::ErrorNoExeFS, {}}; + } directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true); const auto load_result = directory_loader->Load(process); - if (load_result != ResultStatus::Success) + if (load_result.first != ResultStatus::Success) { return load_result; + } - if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) + if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) { Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); + } is_loaded = true; - - return ResultStatus::Success; + return load_result; } ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) { diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h index b9f077468..94f0ed677 100644 --- a/src/core/loader/nca.h +++ b/src/core/loader/nca.h @@ -33,7 +33,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 31e4a0c84..6a0ca389b 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -201,25 +201,25 @@ bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& fi return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base); } -ResultStatus AppLoader_NRO::Load(Kernel::Process& process) { +AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } // Load NRO const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); if (!LoadNro(process, *file, base_address)) { - return ResultStatus::ErrorLoadingNRO; + return {ResultStatus::ErrorLoadingNRO, {}}; } - if (romfs != nullptr) + if (romfs != nullptr) { Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); - - process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); + } is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; } ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) { diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 85b0ed644..1ffdae805 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h @@ -37,7 +37,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadIcon(std::vector<u8>& buffer) override; ResultStatus ReadProgramId(u64& out_program_id) override; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index d7c47c197..a86653204 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -169,22 +169,21 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, return load_base + image_size; } -ResultStatus AppLoader_NSO::Load(Kernel::Process& process) { +AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } // Load module const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); if (!LoadModule(process, *file, base_address, true)) { - return ResultStatus::ErrorLoadingNSO; + return {ResultStatus::ErrorLoadingNSO, {}}; } LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address); - process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); - is_loaded = true; - return ResultStatus::Success; + return {ResultStatus::Success, + LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; } } // namespace Loader diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index 4674c3724..fdce9191c 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -84,7 +84,7 @@ public: VAddr load_base, bool should_pass_arguments, std::optional<FileSys::PatchManager> pm = {}); - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; }; } // namespace Loader diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp index 7da1f8960..ad56bbb38 100644 --- a/src/core/loader/nsp.cpp +++ b/src/core/loader/nsp.cpp @@ -72,37 +72,45 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_NSP::Load(Kernel::Process& process) { +AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (title_id == 0) - return ResultStatus::ErrorNSPMissingProgramNCA; + if (title_id == 0) { + return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; + } - if (nsp->GetStatus() != ResultStatus::Success) - return nsp->GetStatus(); + const auto nsp_status = nsp->GetStatus(); + if (nsp_status != ResultStatus::Success) { + return {nsp_status, {}}; + } - if (nsp->GetProgramStatus(title_id) != ResultStatus::Success) - return nsp->GetProgramStatus(title_id); + const auto nsp_program_status = nsp->GetProgramStatus(title_id); + if (nsp_program_status != ResultStatus::Success) { + return {nsp_program_status, {}}; + } if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { - if (!Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; - return ResultStatus::ErrorNSPMissingProgramNCA; + if (!Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } + + return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; } const auto result = secondary_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } FileSys::VirtualFile update_raw; - if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) + if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { Service::FileSystem::SetPackedUpdate(std::move(update_raw)); + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) { diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h index 953a1b508..85e870bdf 100644 --- a/src/core/loader/nsp.h +++ b/src/core/loader/nsp.h @@ -35,7 +35,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp index 89f7bbf77..1e285a053 100644 --- a/src/core/loader/xci.cpp +++ b/src/core/loader/xci.cpp @@ -48,31 +48,35 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) { return FileType::Error; } -ResultStatus AppLoader_XCI::Load(Kernel::Process& process) { +AppLoader_XCI::LoadResult AppLoader_XCI::Load(Kernel::Process& process) { if (is_loaded) { - return ResultStatus::ErrorAlreadyLoaded; + return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (xci->GetStatus() != ResultStatus::Success) - return xci->GetStatus(); + if (xci->GetStatus() != ResultStatus::Success) { + return {xci->GetStatus(), {}}; + } - if (xci->GetProgramNCAStatus() != ResultStatus::Success) - return xci->GetProgramNCAStatus(); + if (xci->GetProgramNCAStatus() != ResultStatus::Success) { + return {xci->GetProgramNCAStatus(), {}}; + } - if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) - return ResultStatus::ErrorMissingProductionKeyFile; + if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) { + return {ResultStatus::ErrorMissingProductionKeyFile, {}}; + } const auto result = nca_loader->Load(process); - if (result != ResultStatus::Success) + if (result.first != ResultStatus::Success) { return result; + } FileSys::VirtualFile update_raw; - if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) + if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { Service::FileSystem::SetPackedUpdate(std::move(update_raw)); + } is_loaded = true; - - return ResultStatus::Success; + return result; } ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) { diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h index 436f7387c..ae7145b14 100644 --- a/src/core/loader/xci.h +++ b/src/core/loader/xci.h @@ -35,7 +35,7 @@ public: return IdentifyType(file); } - ResultStatus Load(Kernel::Process& process) override; + LoadResult Load(Kernel::Process& process) override; ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; u64 ReadRomFSIVFCOffset() const override; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 4e0538bc2..f18f6226b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -26,16 +26,16 @@ namespace Memory { static Common::PageTable* current_page_table = nullptr; -void SetCurrentPageTable(Common::PageTable* page_table) { - current_page_table = page_table; +void SetCurrentPageTable(Kernel::Process& process) { + current_page_table = &process.VMManager().page_table; + + const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth(); auto& system = Core::System::GetInstance(); - if (system.IsPoweredOn()) { - system.ArmInterface(0).PageTableChanged(); - system.ArmInterface(1).PageTableChanged(); - system.ArmInterface(2).PageTableChanged(); - system.ArmInterface(3).PageTableChanged(); - } + system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); } static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, diff --git a/src/core/memory.h b/src/core/memory.h index 6845f5fe1..b9fa18b1d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -40,8 +40,9 @@ enum : VAddr { KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, }; -/// Changes the currently active page table. -void SetCurrentPageTable(Common::PageTable* page_table); +/// Changes the currently active page table to that of +/// the given process instance. +void SetCurrentPageTable(Kernel::Process& process); /// Determines if the given VAddr is valid for the specified process. bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 114bed20d..1e31a2900 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -46,6 +46,8 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_sampler_cache.cpp + renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -67,6 +69,8 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/utils.cpp renderer_opengl/utils.h + sampler_cache.cpp + sampler_cache.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 046d047cb..6674d9405 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -57,8 +57,8 @@ bool DmaPusher::Step() { // Push buffer non-empty, read a word command_headers.resize(command_list_header.size); - gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); + gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); for (const CommandHeader& command_header : command_headers) { diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d7..7387886a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -10,6 +10,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { @@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { - state.write_offset = 0; + ProcessExec(); break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(method_call.argument); + ProcessData(method_call.argument, method_call.IsLastCall()); break; } } } -void KeplerMemory::ProcessData(u32 data) { - ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); - ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); - - // We have to invalidate the destination region to evict any outdated surfaces from the cache. - // We do this before actually writing the new data because the destination address might - // contain a dirty surface that will have to be written back to memory. - const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; - rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); - memory_manager.Write<u32>(address, data); - - system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +void KeplerMemory::ProcessExec() { + state.write_offset = 0; + state.copy_size = regs.line_length_in * regs.line_count; + state.inner_buffer.resize(state.copy_size); +} - state.write_offset++; +void KeplerMemory::ProcessData(u32 data, bool is_last_call) { + const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); + std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); + state.write_offset += sub_copy_size; + if (is_last_call) { + const GPUVAddr address{regs.dest.Address()}; + if (regs.exec.linear != 0) { + memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + std::vector<u8> tmp_buffer(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, + regs.dest.y, regs.dest.BlockHeight(), state.copy_size, + state.inner_buffer.data(), tmp_buffer.data()); + memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + } + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } } } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e45..5f892ddad 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -51,7 +52,11 @@ public: u32 address_high; u32 address_low; u32 pitch; - u32 block_dimensions; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; u32 width; u32 height; u32 depth; @@ -63,6 +68,18 @@ public: return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); } + + u32 BlockWidth() const { + return 1U << block_width.Value(); + } + + u32 BlockHeight() const { + return 1U << block_height.Value(); + } + + u32 BlockDepth() const { + return 1U << block_depth.Value(); + } } dest; struct { @@ -81,6 +98,8 @@ public: struct { u32 write_offset = 0; + u32 copy_size = 0; + std::vector<u8> inner_buffer; } state{}; private: @@ -88,7 +107,8 @@ private: VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; - void ProcessData(u32 data); + void ProcessExec(); + void ProcessData(u32 data, bool is_last_call); }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed4..9780417f2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; - memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; - memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); + memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } @@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, - std::size_t offset) const { - auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { Texture::FullTextureInfo tex_info{}; tex_info.index = static_cast<u32>(offset); @@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, return tex_info; } +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, + std::size_t offset) const { + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; + ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); + + const GPUVAddr tex_info_address = + tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); + + ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + + return GetTextureInfo(tex_handle, offset); +} + u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; @@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } +u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { + const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 321af3297..cc2424d38 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1131,12 +1131,18 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + /// Returns a list of enabled textures for the specified shader stage. std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2e1e96c81..e5b4eadea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -387,6 +387,20 @@ enum class IpaSampleMode : u64 { Offset = 2, }; +enum class LmemLoadCacheManagement : u64 { + Default = 0, + LU = 1, + CI = 2, + CV = 3, +}; + +enum class LmemStoreCacheManagement : u64 { + Default = 0, + CG = 1, + CS = 2, + WT = 3, +}; + struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; @@ -782,7 +796,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, u64> unknown; + BitField<44, 2, LmemStoreCacheManagement> cache_management; } st_l; union { @@ -792,6 +806,12 @@ union Instruction { } ldg; union { + BitField<48, 3, UniformType> type; + BitField<46, 2, u64> cache_mode; + BitField<20, 24, s64> immediate_offset; + } stg; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -917,21 +937,34 @@ union Instruction { } iset; union { - BitField<8, 2, Register::Size> dest_size; - BitField<10, 2, Register::Size> src_size; - BitField<12, 1, u64> is_output_signed; - BitField<13, 1, u64> is_input_signed; - BitField<41, 2, u64> selector; + BitField<41, 2, u64> selector; // i2i and i2f only BitField<45, 1, u64> negate_a; BitField<49, 1, u64> abs_a; + BitField<10, 2, Register::Size> src_size; + BitField<13, 1, u64> is_input_signed; + BitField<8, 2, Register::Size> dst_size; + BitField<12, 1, u64> is_output_signed; + + union { + BitField<39, 2, u64> tab5cb8_2; + } i2f; union { BitField<39, 2, F2iRoundingOp> rounding; } f2i; union { - BitField<39, 4, F2fRoundingOp> rounding; + BitField<8, 2, Register::Size> src_size; + BitField<10, 2, Register::Size> dst_size; + BitField<39, 4, u64> rounding; + // H0, H1 extract for F16 missing + BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value + F2fRoundingOp GetRoundingMode() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); + } } f2f; + } conversion; union { @@ -967,6 +1000,38 @@ union Instruction { } tex; union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<36, 1, u64> aoffi_flag; + BitField<37, 3, TextureProcessMode> process_mode; + + bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; + } + + TextureProcessMode GetTextureProcessMode() const { + return process_mode; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } + } tex_b; + + union { BitField<22, 6, TextureQueryType> query_type; BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; @@ -1312,7 +1377,9 @@ public: LDG, // Load from global memory STG, // Store in global memory TEX, + TEX_B, // Texture Load Bindless TXQ, // Texture Query + TXQ_B, // Texture Query Bindless TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations TLD4, // Texture Load 4 @@ -1580,7 +1647,9 @@ private: INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), + INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), + INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), @@ -1678,7 +1747,7 @@ private: INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), + INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index de30ea354..fe6628923 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -207,6 +207,11 @@ public: }; } regs{}; + /// Performs any additional setup necessary in order to begin GPU emulation. + /// This can be used to launch any necessary threads and register any necessary + /// core timing events. + virtual void Start() = 0; + /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index db507cf04..d4e2553a9 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -9,10 +9,14 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {} + : GPU(system, renderer), gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; +void GPUAsynch::Start() { + gpu_thread.StartThread(renderer, *dma_pusher); +} + void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1dcc61a6c..30be74cba 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -13,16 +13,13 @@ class RendererBase; namespace VideoCommon { -namespace GPUThread { -class ThreadManager; -} // namespace GPUThread - /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch : public Tegra::GPU { public: explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); ~GPUAsynch() override; + void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 2cfc900ed..45e43b1dc 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -8,10 +8,12 @@ namespace VideoCommon { GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer) {} + : GPU(system, renderer) {} GPUSynch::~GPUSynch() = default; +void GPUSynch::Start() {} + void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 766b5631c..3031fcf72 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -18,6 +18,7 @@ public: explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); ~GPUSynch() override; + void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index cc56cf467..c9a2077de 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -55,19 +55,24 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } } -ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, - Tegra::DmaPusher& dma_pusher) - : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} { - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); -} +ThreadManager::ThreadManager(Core::System& system) : system{system} {} ThreadManager::~ThreadManager() { + if (!thread.joinable()) { + return; + } + // Notify GPU thread that a shutdown is pending PushCommand(EndProcessingCommand()); thread.join(); } +void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { + thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; + synchronization_event = system.CoreTiming().RegisterEvent( + "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); +} + void ThreadManager::SubmitList(Tegra::CommandList&& entries) { const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 62bcea5bb..cc14527c7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -138,10 +138,12 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, - Tegra::DmaPusher& dma_pusher); + explicit ThreadManager(Core::System& system); ~ThreadManager(); + /// Creates and starts the GPU thread. + void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0f4e820aa..6c98c6701 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -199,7 +199,15 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { +bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) { + const GPUVAddr end = start + size; + const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); + const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); + const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); + return range == size; +} + +void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -226,7 +234,30 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t } } -void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { +void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, + const std::size_t size) const { + std::size_t remaining_size{size}; + std::size_t page_index{src_addr >> page_bits}; + std::size_t page_offset{src_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + const u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + const u8* src_ptr{page_pointer + page_offset}; + std::memcpy(dest_buffer, src_ptr, copy_amount); + } else { + std::memset(dest_buffer, 0, copy_amount); + } + page_index++; + page_offset = 0; + dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + +void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; @@ -253,7 +284,28 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std:: } } -void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { +void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, + const std::size_t size) { + std::size_t remaining_size{size}; + std::size_t page_index{dest_addr >> page_bits}; + std::size_t page_offset{dest_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + u8* page_pointer = page_table.pointers[page_index]; + if (page_pointer) { + u8* dest_ptr{page_pointer + page_offset}; + std::memcpy(dest_ptr, src_buffer, copy_amount); + } + page_index++; + page_offset = 0; + src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } +} + +void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { std::size_t remaining_size{size}; std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; @@ -281,6 +333,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t } } +void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { + std::vector<u8> tmp_buffer(size); + ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); + WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 647cbf93a..e4f0c4bd6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -65,9 +65,32 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); - void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + // Returns true if the block is continous in host memory, false otherwise + bool IsBlockContinous(const GPUVAddr start, const std::size_t size); + + /** + * ReadBlock and WriteBlock are full read and write operations over virtual + * GPU Memory. It's important to use these when GPU memory may not be continous + * in the Host Memory counterpart. Note: This functions cause Host GPU Memory + * Flushes and Invalidations, respectively to each operation. + */ + void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; + void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); + void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + + /** + * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and + * WriteBlock respectively. In this versions, no flushing or invalidation is actually + * done and their performance is similar to a memcpy. This functions can be used + * on either of this 2 scenarios instead of their safe counterpart: + * - Memory which is sure to never be represented in the Host GPU. + * - Memory Managed by a Cache Manager. Example: Texture Flushing should use + * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture + * being flushed. + */ + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); + void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 8d9ee81f1..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -14,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..196e6e278 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush() override; private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -66,10 +67,11 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d250d5cbb..6034dc489 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -101,12 +101,6 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); @@ -582,9 +576,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -656,7 +647,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -756,6 +750,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -812,92 +807,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = Tegra::Texture::TextureFilter::Linear; - min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = Tegra::Texture::WrapMode::Wrap; - wrap_v = Tegra::Texture::WrapMode::Wrap; - wrap_p = Tegra::Texture::WrapMode::Wrap; - use_depth_compare = false; - depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - - // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); - - // Other attributes have correct defaults -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint sampler_id = sampler.handle; - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - } - if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { - min_filter = config.min_filter; - mipmap_filter = config.mipmap_filter; - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); - } - - if (wrap_u != config.wrap_u) { - wrap_u = config.wrap_u; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); - } - if (wrap_v != config.wrap_v) { - wrap_v = config.wrap_v; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); - } - if (wrap_p != config.wrap_p) { - wrap_p = config.wrap_p; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); - } - - if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { - use_depth_compare = enabled; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - } - - if (depth_compare_func != config.depth_compare_func) { - depth_compare_func = config.depth_compare_func; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(depth_compare_func)); - } - - if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { - border_color = new_border_color; - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); - } - - if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { - max_anisotropic = anisotropic; - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); - } - } - - if (const float min = config.GetMinLod(); min_lod != min) { - min_lod = min; - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); - } - if (const float max = config.GetMaxLod(); max_lod != max) { - max_lod = max; - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); - } - - if (const float bias = config.GetLodBias(); lod_bias != bias) { - lod_bias = bias; - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); - } -} - void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { @@ -953,6 +862,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, static_cast<GLsizeiptr>(region->GetSizeInBytes())); } @@ -970,10 +882,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + Tegra::Texture::FullTextureInfo texture; + if (entry.IsBindless()) { + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } else { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const u32 current_bindpoint = base_bindings.sampler + bindpoint; - texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); + state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e4c64ae71..a0e056142 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -25,6 +25,7 @@ #include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -71,39 +72,7 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: - class SamplerInfo { - public: - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const Tegra::Texture::TSCEntry& info); - - private: - Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mipmap_filter = - Tegra::Texture::TextureMipmapFilter::None; - Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool use_depth_compare = false; - Tegra::Texture::DepthCompareFunc depth_compare_func = - Tegra::Texture::DepthCompareFunc::Always; - GLvec4 border_color = {}; - float min_lod = 0.0f; - float max_lod = 16.0f; - float lod_bias = 0.0f; - float max_anisotropic = 1.0f; - }; - struct FramebufferConfigState { bool using_color_fb{}; bool using_depth_fb{}; @@ -208,6 +177,7 @@ private: RasterizerCacheOpenGL res_cache; ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; + SamplerCacheOpenGL sampler_cache; Core::System& system; @@ -223,8 +193,6 @@ private: FramebufferConfigState current_framebuffer_config_state; std::pair<bool, bool> current_depth_stencil_usage{}; - std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; PrimitiveAssembler primitive_assembler{buffer_cache}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 4ebc343c3..5a25f5b37 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -281,10 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; - if (!params.is_tiled) { - const u32 bpp = params.GetFormatBpp() / 8; - params.pitch = config.width * bpp; - } + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp new file mode 100644 index 000000000..3ded5ecea --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace OpenGL { + +SamplerCacheOpenGL::SamplerCacheOpenGL() = default; + +SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; + +OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + OGLSampler sampler; + sampler.Create(); + + const GLuint sampler_id{sampler.handle}; + glSamplerParameteri( + sampler_id, GL_TEXTURE_MAG_FILTER, + MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, + MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); + if (GLAD_GL_ARB_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); + } else if (GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); + } else if (tsc.GetMaxAnisotropy() != 1) { + LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); + } + + return sampler; +} + +GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { + return sampler.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h new file mode 100644 index 000000000..defbc2d81 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/sampler_cache.h" + +namespace OpenGL { + +class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { +public: + explicit SamplerCacheOpenGL(); + ~SamplerCacheOpenGL(); + +protected: + OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; + + GLuint ToSamplerType(const OGLSampler& sampler) const; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 99f67494c..43f2906a8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(const u8* host_ptr) { +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, + const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { std::fill(program_code.begin(), program_code.end(), 0); return program_code; }); - std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); + memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), + program_code.size() * sizeof(u64)); return program_code; } @@ -497,11 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (!shader) { // No shader found - create a new one - ProgramCode program_code{GetShaderCode(host_ptr)}; + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { - program_code_b = GetShaderCode( - memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); + const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 28e490b3c..cd462621d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); class ShaderWriter { public: @@ -121,14 +119,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { /// Returns true if an object has to be treated as precise bool IsPrecise(Operation operand) { - const auto& meta = operand.GetMeta(); - + const auto& meta{operand.GetMeta()}; if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { return arithmetic->precise; } - if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { - return half_arithmetic->precise; - } return false; } @@ -208,8 +202,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -380,12 +376,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -617,28 +623,7 @@ private: } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { - std::string value = VisitOperand(operation, operand_index); - switch (type) { - case Type::HalfFloat: { - const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); - if (!half_meta) { - value = "toHalf2(" + value + ')'; - } - - switch (half_meta->types.at(operand_index)) { - case Tegra::Shader::HalfType::H0_H1: - return "toHalf2(" + value + ')'; - case Tegra::Shader::HalfType::F32: - return "vec2(" + value + ')'; - case Tegra::Shader::HalfType::H0_H0: - return "vec2(toHalf2(" + value + ")[0])"; - case Tegra::Shader::HalfType::H1_H1: - return "vec2(toHalf2(" + value + ")[1])"; - } - } - default: - return CastOperand(value, type); - } + return CastOperand(VisitOperand(operation, operand_index), type); } std::string CastOperand(const std::string& value, Type type) const { @@ -652,9 +637,7 @@ private: case Type::Uint: return "ftou(" + value + ')'; case Type::HalfFloat: - // Can't be handled as a stand-alone value - UNREACHABLE(); - return value; + return "toHalf2(" + value + ')'; } UNREACHABLE(); return value; @@ -868,6 +851,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1067,13 +1056,40 @@ private: return BitwiseCastResult(value, Type::HalfFloat); } + std::string HClamp(Operation operation) { + const std::string value = VisitOperand(operation, 0, Type::HalfFloat); + const std::string min = VisitOperand(operation, 1, Type::Float); + const std::string max = VisitOperand(operation, 2, Type::Float); + const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); + } + + std::string HUnpack(Operation operation) { + const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; + const auto value = [&]() -> std::string { + switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { + case Tegra::Shader::HalfType::H0_H1: + return operand; + case Tegra::Shader::HalfType::F32: + return "vec2(fromHalf2(" + operand + "))"; + case Tegra::Shader::HalfType::H0_H0: + return "vec2(" + operand + "[0])"; + case Tegra::Shader::HalfType::H1_H1: + return "vec2(" + operand + "[1])"; + } + UNREACHABLE(); + return "0"; + }(); + return "fromHalf2(" + value + ')'; + } + std::string HMergeF32(Operation operation) { return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; } std::string HMergeH0(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + - Visit(operation[1]) + ")[0]))"; + return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + + Visit(operation[0]) + ")[1]))"; } std::string HMergeH1(Operation operation) { @@ -1173,34 +1189,46 @@ private: return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); } + template <bool with_nan> + std::string GenerateHalfComparison(Operation operation, std::string compare_op) { + std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat)}; + if constexpr (!with_nan) { + return comparison; + } + return "halfFloatNanComparison(" + comparison + ", " + + VisitOperand(operation, 0, Type::HalfFloat) + ", " + + VisitOperand(operation, 1, Type::HalfFloat) + ')'; + } + + template <bool with_nan> std::string Logical2HLessThan(Operation operation) { - return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThan"); } + template <bool with_nan> std::string Logical2HEqual(Operation operation) { - return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "equal"); } + template <bool with_nan> std::string Logical2HLessEqual(Operation operation) { - return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); } + template <bool with_nan> std::string Logical2HGreaterThan(Operation operation) { - return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThan"); } + template <bool with_nan> std::string Logical2HNotEqual(Operation operation) { - return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "notEqual"); } + template <bool with_nan> std::string Logical2HGreaterEqual(Operation operation) { - return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, - Type::HalfFloat); + return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); } std::string Texture(Operation operation) { @@ -1489,6 +1517,8 @@ private: &GLSLDecompiler::Fma<Type::HalfFloat>, &GLSLDecompiler::Absolute<Type::HalfFloat>, &GLSLDecompiler::HNegate, + &GLSLDecompiler::HClamp, + &GLSLDecompiler::HUnpack, &GLSLDecompiler::HMergeF32, &GLSLDecompiler::HMergeH0, &GLSLDecompiler::HMergeH1, @@ -1525,12 +1555,18 @@ private: &GLSLDecompiler::LogicalNotEqual<Type::Uint>, &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, + &GLSLDecompiler::Logical2HLessThan<false>, + &GLSLDecompiler::Logical2HEqual<false>, + &GLSLDecompiler::Logical2HLessEqual<false>, + &GLSLDecompiler::Logical2HGreaterThan<false>, + &GLSLDecompiler::Logical2HNotEqual<false>, + &GLSLDecompiler::Logical2HGreaterEqual<false>, + &GLSLDecompiler::Logical2HLessThan<true>, + &GLSLDecompiler::Logical2HEqual<true>, + &GLSLDecompiler::Logical2HLessEqual<true>, + &GLSLDecompiler::Logical2HGreaterThan<true>, + &GLSLDecompiler::Logical2HNotEqual<true>, + &GLSLDecompiler::Logical2HGreaterEqual<true>, &GLSLDecompiler::Texture, &GLSLDecompiler::TextureLod, @@ -1621,9 +1657,7 @@ private: std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" @@ -1633,6 +1667,12 @@ std::string GetCommonDeclarations() { "}\n\n" "vec2 toHalf2(float value) {\n" " return unpackHalf2x16(ftou(value));\n" + "}\n\n" + "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" + " bvec2 is_nan1 = isnan(pair1);\n" + " bvec2 is_nan2 = isnan(pair2);\n" + " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " + "is_nan2.y);\n" "}\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 4e04ab2f8..74032d237 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -39,8 +39,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -50,14 +51,25 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; std::vector<SamplerEntry> samplers; + std::vector<SamplerEntry> bindless_samplers; std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; @@ -68,4 +80,4 @@ std::string GetCommonDeclarations(); ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix); -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 8a43eb157..53752b38d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u32 type{}; u8 is_array{}; u8 is_shadow{}; + u8 is_bindless{}; if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || - file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { + file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0); + entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), + static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), + is_array != 0, is_shadow != 0, is_bindless != 0); } u32 global_memory_count{}; @@ -337,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; + u8 is_read{}; + u8 is_written{}; if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, + is_written != 0); } for (auto& clip_distance : entry.entries.clip_distances) { @@ -388,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || - file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) { + file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) { return false; } } @@ -397,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu return false; for (const auto& gmem : entries.global_memory_entries) { if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { return false; } } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index ed3178f09..801826d3d 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -7,7 +7,6 @@ #include <unordered_map> #include "common/assert.h" -#include "common/cityhash.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -28,39 +27,20 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } } -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast<std::size_t>( - Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} VKSamplerCache::~VKSamplerCache() = default; -vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return *sampler; -} - -UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) { - const float max_anisotropy = tsc.GetMaxAnisotropy(); - const bool has_anisotropy = max_anisotropy > 1.0f; +UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + const float max_anisotropy{tsc.GetMaxAnisotropy()}; + const bool has_anisotropy{max_anisotropy > 1.0f}; - const auto border_color = tsc.GetBorderColor(); - const auto vk_border_color = TryConvertBorderColor(border_color); + const auto border_color{tsc.GetBorderColor()}; + const auto vk_border_color{TryConvertBorderColor(border_color)}; UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", border_color[0], border_color[1], border_color[2], border_color[3]); - constexpr bool unnormalized_coords = false; + constexpr bool unnormalized_coords{false}; const vk::SamplerCreateInfo sampler_ci( {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), @@ -73,9 +53,13 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), unnormalized_coords); - const auto& dld = device.GetDispatchLoader(); - const auto dev = device.GetLogical(); + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; return dev.createSamplerUnique(sampler_ci, nullptr, dld); } +vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { + return *sampler; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index c6394dc87..771b05c73 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -8,49 +8,25 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" +#include "video_core/sampler_cache.h" #include "video_core/textures/texture.h" namespace Vulkan { class VKDevice; -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::SamplerCacheKey> { - std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKSamplerCache { +class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { public: explicit VKSamplerCache(const VKDevice& device); ~VKSamplerCache(); - vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc); +protected: + UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; -private: - UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc); + vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; +private: const VKDevice& device; - std::unordered_map<SamplerCacheKey, UniqueSampler> cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index e0a6f5e87..23d9b10db 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -76,14 +76,10 @@ constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) { /// Returns true if an object has to be treated as precise bool IsPrecise(Operation operand) { - const auto& meta = operand.GetMeta(); - + const auto& meta{operand.GetMeta()}; if (std::holds_alternative<MetaArithmetic>(meta)) { return std::get<MetaArithmetic>(meta).precise; } - if (std::holds_alternative<MetaHalfArithmetic>(meta)) { - return std::get<MetaHalfArithmetic>(meta).precise; - } return false; } @@ -191,8 +187,9 @@ public: for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second, cbuf.first); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset); } for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); @@ -225,7 +222,7 @@ private: return current_binding; }; const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size()); - global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size()); + global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size()); samplers_base_binding = Allocate(ir.GetSamplers().size()); ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE, @@ -390,14 +387,15 @@ private: void DeclareGlobalBuffers() { u32 binding = global_buffers_base_binding; - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& entry : ir.GetGlobalMemory()) { + const auto [base, usage] = entry; const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset))); + Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(entry, id); + global_buffers.emplace(base, id); } } @@ -744,6 +742,16 @@ private: return {}; } + Id HClamp(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id HUnpack(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HMergeF32(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1216,6 +1224,8 @@ private: &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, &SPIRVDecompiler::HNegate, + &SPIRVDecompiler::HClamp, + &SPIRVDecompiler::HUnpack, &SPIRVDecompiler::HMergeF32, &SPIRVDecompiler::HMergeH0, &SPIRVDecompiler::HMergeH1, @@ -1258,6 +1268,13 @@ private: &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, + // TODO(Rodrigo): Should these use the OpFUnord* variants? + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, + &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, &SPIRVDecompiler::Texture, &SPIRVDecompiler::TextureLod, diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp new file mode 100644 index 000000000..53c7ef12d --- /dev/null +++ b/src/video_core/sampler_cache.cpp @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/cityhash.h" +#include "common/common_types.h" +#include "video_core/sampler_cache.h" + +namespace VideoCommon { + +std::size_t SamplerCacheKey::Hash() const { + static_assert(sizeof(raw) % sizeof(u64) == 0); + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); +} + +bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { + return raw == rhs.raw; +} + +} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h new file mode 100644 index 000000000..cbe3ad071 --- /dev/null +++ b/src/video_core/sampler_cache.h @@ -0,0 +1,60 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <unordered_map> + +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { + std::size_t Hash() const; + + bool operator==(const SamplerCacheKey& rhs) const; + + bool operator!=(const SamplerCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::SamplerCacheKey> { + std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace VideoCommon { + +template <typename SamplerType, typename SamplerStorageType> +class SamplerCache { +public: + SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { + const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); + auto& sampler = entry->second; + if (is_cache_miss) { + sampler = CreateSampler(tsc); + } + return ToSamplerType(sampler); + } + +protected: + virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; + + virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; + +private: + std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index baee89107..9467f9417 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -18,7 +18,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); + if (instr.alu_half.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); @@ -27,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const bool negate_b = opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; - const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); - - // instr.alu_half.type_a + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); Node op_b = [&]() { switch (opcode->get().GetId()) { @@ -44,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_C: case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); return Immediate(0); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index c2164ba50..fbcd35b18 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); + if (instr.alu_half_imm.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } else { UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); } - UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, - "Half float immediate saturation not implemented"); - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); const Node op_b = UnpackHalfImmediate(instr, true); Node value = [&]() { - MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; switch (opcode->get().GetId()) { case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, meta, op_a, op_b); + return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, meta, op_a, op_b); + return Operation(OperationCode::HMul, PRECISE, op_a, op_b); default: UNREACHABLE(); return Immediate(0); } }(); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); + value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); + value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); SetRegister(bb, instr.gpr0, value); - return pc; } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 55a6fbbf2..ba15b1115 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -18,13 +18,29 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: { + case OpCode::Id::I2I_R: + case OpCode::Id::I2I_C: + case OpCode::Id::I2I_IMM: { UNIMPLEMENTED_IF(instr.conversion.selector); + UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.alu.saturate_d); const bool input_signed = instr.conversion.is_input_signed; const bool output_signed = instr.conversion.is_output_signed; - Node value = GetRegister(instr.gpr20); + Node value = [&]() { + switch (opcode->get().GetId()) { + case OpCode::Id::I2I_R: + return GetRegister(instr.gpr20); + case OpCode::Id::I2I_C: + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::I2I_IMM: + return Immediate(instr.alu.GetSignedImm20_20()); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, @@ -38,17 +54,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: { - UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); + case OpCode::Id::I2F_C: + case OpCode::Id::I2F_IMM: { + UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::I2F_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::I2F_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::I2F_IMM: + return Immediate(instr.alu.GetSignedImm20_20()); + default: + UNREACHABLE(); + return Immediate(0); } }(); const bool input_signed = instr.conversion.is_input_signed; @@ -62,24 +85,31 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: { - UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); - UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); + case OpCode::Id::F2F_C: + case OpCode::Id::F2F_IMM: { + UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); + UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2F is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::F2F_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::F2F_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::F2F_IMM: + return GetImmediate19(instr); + default: + UNREACHABLE(); + return Immediate(0); } }(); value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); value = [&]() { - switch (instr.conversion.f2f.rounding) { + switch (instr.conversion.f2f.GetRoundingMode()) { case Tegra::Shader::F2fRoundingOp::None: return value; case Tegra::Shader::F2fRoundingOp::Round: @@ -102,15 +132,22 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: { + case OpCode::Id::F2I_C: + case OpCode::Id::F2I_IMM: { UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in F2I is not implemented"); Node value = [&]() { - if (instr.is_b_gpr) { + switch (opcode->get().GetId()) { + case OpCode::Id::F2I_R: return GetRegister(instr.gpr20); - } else { + case OpCode::Id::F2I_C: return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::F2I_IMM: + return GetImmediate19(instr); + default: + UNREACHABLE(); + return Immediate(0); } }(); @@ -134,7 +171,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { }(); const bool is_signed = instr.conversion.is_output_signed; value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); + value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); SetRegister(bb, instr.gpr0, value); break; diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 748368555..1dd94bf9d 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hset2.ftz != 0); + if (instr.hset2.ftz != 0) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } + + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - // instr.hset2.type_a - // instr.hset2.type_b - Node op_a = GetRegister(instr.gpr8); Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSET2_R: @@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); - - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); + op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); + const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index e68512692..6e59eb650 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); - Node op_a = GetRegister(instr.gpr8); + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - const Node op_b = [&]() { + Node op_b = [&]() { switch (opcode->get().GetId()) { case OpCode::Id::HSETP2_R: return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, @@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); // We can't use the constant predicate as destination. ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); @@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const OperationCode pair_combiner = instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; - MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; - const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); + const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); const Node first_pred = Operation(pair_combiner, comparison); // Set the primary predicate to the result of Predicate OP SecondPredicate diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 7a07c5ec6..5c1becce5 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { } constexpr auto identity = HalfType::H0_H1; - - const HalfType type_a = instr.hfma2.type_a; - const Node op_a = GetRegister(instr.gpr8); - bool neg_b{}, neg_c{}; auto [saturate, type_b, op_b, type_c, op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { @@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { }(); UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); - op_b = GetOperandAbsNegHalf(op_b, false, neg_b); - op_c = GetOperandAbsNegHalf(op_c, false, neg_c); + const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); + op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); + op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; - Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); + Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); SetRegister(bb, instr.gpr0, value); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ea1092db1 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +19,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LD_L: { - UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<u32>(instr.ld_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", + static_cast<u64>(instr.ld_l.unknown.Value())); const auto GetLmem = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - static_cast<u32>(instr.st_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", + static_cast<u64>(instr.st_l.cache_management.Value())); const auto GetLmemAddr = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a775b402b..fa65ac9a9 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - + bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { @@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); + break; + } + case OpCode::Id::TEX_B: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex_b.texture_type}; + const bool is_array = instr.tex_b.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); + const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex_b.GetTextureProcessMode(); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, is_aoffi, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexsInstructionFloat(bb, instr, values); break; } + case OpCode::Id::TXQ_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless + ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false) + : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + Operation(OperationCode::TextureQueryDimensions, meta, + GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { @@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = is_bindless + ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) + : GetSampler(instr.sampler, texture_type, is_array, false); std::vector<Node> coords; @@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + 1)); texture_type = TextureType::Texture2D; } - + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } auto params = coords; MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); + SetTemporal(bb, indexer++, value); } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - break; } case OpCode::Id::TLDS: { @@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, + bool is_array, bool is_shadow) { + const Node sampler_register = GetRegister(reg); + const Node base_sampler = + TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); + const auto cbuf = std::get_if<CbufNode>(base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto cbuf_index = cbuf->GetIndex(); + const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { @@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, Node array, Node depth_compare, u32 bias_offset, - std::vector<Node> aoffi) { + std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; + const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = is_bindless + ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) + : GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not + // supported in GL for sampler2DArrayShadow and + // samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = Immediate(0.0f); break; case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers + // If present, lod or bias are always stored in the register + // indexed by the gpr20 field with an offset depending on the + // usage of the other registers bias = GetRegister(instr.gpr20.Value() + bias_offset); break; case TextureProcessMode::LL: @@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi) { + bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + const bool is_bindless = bindless_reg.has_value(); + u64 parameter_register = instr.gpr20.Value(); + if (is_bindless) { + ++parameter_register; + } + + const u32 bias_lod_offset = (is_bindless ? 1 : 0); if (lod_bias_enabled) { ++parameter_register; } @@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, + aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, + {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index ac5112d78..17f2f711c 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); + return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); +} + +Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { + return Operation(OperationCode::HUnpack, type, value); } Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { @@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { if (absolute) { - value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); + value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); } if (negate) { - value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), + value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), GetPredicate(true)); } return value; } +Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { + if (!saturate) { + return value; + } + const Node positive_zero = Immediate(std::copysignf(0, 1)); + const Node positive_one = Immediate(1.0f); + return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); +} + Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::LogicalFLessThan}, {PredCondition::Equal, OperationCode::LogicalFEqual}, {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, @@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::LogicalILessThan}, {PredCondition::Equal, OperationCode::LogicalIEqual}, {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, @@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si return predicate; } -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b) { - - UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan, - "Unimplemented NaN comparison for half floats"); - - static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { +Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, + Node op_b) { + const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { {PredCondition::LessThan, OperationCode::Logical2HLessThan}, {PredCondition::Equal, OperationCode::Logical2HEqual}, {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, - {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; + {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, + {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, + {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, + {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, + {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; const auto comparison{PredicateComparisonTable.find(condition)}; UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), "Unknown predicate comparison operation"); - const Node predicate = Operation(comparison->second, meta, op_a, op_b); + const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); return predicate; } OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { + const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { {PredOperation::And, OperationCode::LogicalAnd}, {PredOperation::Or, OperationCode::LogicalOr}, {PredOperation::Xor, OperationCode::LogicalXor}, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 4888998d3..81278fb33 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -109,11 +109,13 @@ enum class OperationCode { UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint - HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 @@ -150,12 +152,18 @@ enum class OperationCode { LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Texture, /// (MetaTexture, float[N] coords) -> float4 TextureLod, /// (MetaTexture, float[N] coords) -> float4 @@ -196,9 +204,23 @@ enum class ExitMethod { class Sampler { public: + // Use this constructor for bounded Samplers explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + // Use this constructor for bindless Samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + // Use this only for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} std::size_t GetOffset() const { return offset; @@ -220,6 +242,14 @@ public: return is_shadow; } + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + bool operator<(const Sampler& rhs) const { return std::tie(offset, index, type, is_array, is_shadow) < std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); @@ -231,8 +261,9 @@ private: std::size_t offset{}; std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; class ConstBuffer { @@ -276,15 +307,13 @@ struct GlobalMemoryBase { } }; -struct MetaArithmetic { - bool precise{}; +struct GlobalMemoryUsage { + bool is_read{}; + bool is_written{}; }; -struct MetaHalfArithmetic { +struct MetaArithmetic { bool precise{}; - std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1, - Tegra::Shader::HalfType::H0_H1}; }; struct MetaTexture { @@ -298,11 +327,10 @@ struct MetaTexture { u32 element{}; }; -constexpr MetaArithmetic PRECISE = {true}; -constexpr MetaArithmetic NO_PRECISE = {false}; -constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; +inline constexpr MetaArithmetic PRECISE = {true}; +inline constexpr MetaArithmetic NO_PRECISE = {false}; -using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; +using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; /// Holds any kind of operation that can be done in the IR class OperationNode final { @@ -578,8 +606,8 @@ public: return used_clip_distances; } - const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { - return used_global_memory_bases; + const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { + return used_global_memory; } std::size_t GetLength() const { @@ -706,10 +734,14 @@ private: /// Unpacks a half immediate from an instruction Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); + /// Unpacks a binary value into a half float pair with a type format + Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); /// Merges a half pair into another value Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); /// Conditionally absolute/negated half float pair. Absolute is applied first Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); + /// Conditionally saturates a half float pair + Node GetSaturatedHalfFloat(Node value, bool saturate = true); /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); @@ -717,8 +749,7 @@ private: Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, Node op_a, Node op_b); /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, - const MetaHalfArithmetic& meta, Node op_a, Node op_b); + Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); @@ -730,6 +761,11 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + // Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, + Tegra::Shader::TextureType type, bool is_array, + bool is_shadow); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -743,7 +779,8 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi); + bool is_array, bool is_aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -763,7 +800,8 @@ private: Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -781,6 +819,11 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write); + template <typename... T> Node Operation(OperationCode code, const T*... operands) { return StoreNode(OperationNode(code, operands...)); @@ -834,7 +877,7 @@ private: std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::set<GlobalMemoryBase> used_global_memory_bases; + std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; Tegra::Shader::Header header; }; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068..217805386 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } } +void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, + const u32 block_height, const std::size_t copy_size, const u8* source_data, + u8* swizzle_data) { + const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; + std::size_t count = 0; + for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { + const std::size_t gob_address_y = + (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const auto& table = legacy_swizzle_table[y % gob_size_y]; + for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { + const std::size_t gob_address = + gob_address_y + (x / gob_size_x) * gob_size * block_height; + const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; + const u8* source_line = source_data + count; + u8* dest_addr = swizzle_data + swizzled_offset; + count++; + + std::memcpy(dest_addr, source_line, 1); + } + } +} + std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, u32 height) { std::vector<u8> rgba_data; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa274..e072d8401 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y); +void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, + const u32 block_height, const std::size_t copy_size, const u8* source_data, + u8* swizzle_data); + } // namespace Tegra::Texture diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cb82ecf3f..60cda0ca3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -5,6 +5,8 @@ #include <memory> #include "core/core.h" #include "core/settings.h" +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/video_core.h" @@ -16,6 +18,14 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); } +std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { + if (Settings::values.use_asynchronous_gpu_emulation) { + return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); + } + + return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); +} + u16 GetResolutionScaleFactor(const RendererBase& renderer) { return static_cast<u16>( Settings::values.resolution_factor diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 3c583f195..b8e0ac372 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -14,6 +14,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace Tegra { +class GPU; +} + namespace VideoCore { class RendererBase; @@ -27,6 +31,9 @@ class RendererBase; std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, Core::System& system); +/// Creates an emulated GPU instance using the given system context. +std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); + u16 GetResolutionScaleFactor(const RendererBase& renderer); } // namespace VideoCore diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 2eb86d6e5..31b65c04c 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -151,6 +151,12 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +target_compile_definitions(yuzu PRIVATE + # Use QStringBuilder for string concatenation to reduce + # the overall number of temporary strings created. + -DQT_USE_QSTRINGBUILDER +) + if (YUZU_ENABLE_COMPATIBILITY_REPORTING) target_compile_definitions(yuzu PRIVATE -DYUZU_ENABLE_COMPATIBILITY_REPORTING) endif() diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index c29f2d2dc..7eed9fcf3 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -91,8 +91,8 @@ void EmuThread::run() { class GGLContext : public Core::Frontend::GraphicsContext { public: - explicit GGLContext(QOpenGLContext* shared_context) : surface() { - context = std::make_unique<QOpenGLContext>(shared_context); + explicit GGLContext(QOpenGLContext* shared_context) + : context{std::make_unique<QOpenGLContext>(shared_context)} { surface.setFormat(shared_context->format()); surface.create(); } @@ -186,8 +186,7 @@ private: }; GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread) - : QWidget(parent), child(nullptr), context(nullptr), emu_thread(emu_thread) { - + : QWidget(parent), emu_thread(emu_thread) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc)); setAttribute(Qt::WA_AcceptTouchEvents); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 9608b959f..3df33aca1 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -10,7 +10,6 @@ #include <QImage> #include <QThread> #include <QWidget> -#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 51bd1f121..a5218b051 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -12,7 +12,7 @@ #include "yuzu/hotkeys.h" ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry) - : QDialog(parent), registry(registry), ui(new Ui::ConfigureDialog) { + : QDialog(parent), ui(new Ui::ConfigureDialog), registry(registry) { ui->setupUi(this); ui->hotkeysTab->Populate(registry); this->setConfiguration(); diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index bfb562535..a7a8752e5 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp @@ -66,20 +66,21 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) { } void ConfigureHotkeys::Configure(QModelIndex index) { - if (index.parent() == QModelIndex()) + if (!index.parent().isValid()) { return; + } index = index.sibling(index.row(), 1); - auto* model = ui->hotkey_list->model(); - auto previous_key = model->data(index); - - auto* hotkey_dialog = new SequenceDialog; - int return_code = hotkey_dialog->exec(); + auto* const model = ui->hotkey_list->model(); + const auto previous_key = model->data(index); - auto key_sequence = hotkey_dialog->GetSequence(); + SequenceDialog hotkey_dialog{this}; - if (return_code == QDialog::Rejected || key_sequence.isEmpty()) + const int return_code = hotkey_dialog.exec(); + const auto key_sequence = hotkey_dialog.GetSequence(); + if (return_code == QDialog::Rejected || key_sequence.isEmpty()) { return; + } if (IsUsedKey(key_sequence) && key_sequence != QKeySequence(previous_key.toString())) { QMessageBox::critical(this, tr("Error in inputted key"), @@ -90,7 +91,7 @@ void ConfigureHotkeys::Configure(QModelIndex index) { } } -bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) { +bool ConfigureHotkeys::IsUsedKey(QKeySequence key_sequence) const { return GetUsedKeyList().contains(key_sequence); } diff --git a/src/yuzu/configuration/configure_hotkeys.h b/src/yuzu/configuration/configure_hotkeys.h index cd203aad6..73fb8a175 100644 --- a/src/yuzu/configuration/configure_hotkeys.h +++ b/src/yuzu/configuration/configure_hotkeys.h @@ -6,7 +6,6 @@ #include <memory> #include <QWidget> -#include "core/settings.h" namespace Ui { class ConfigureHotkeys; @@ -39,7 +38,7 @@ signals: private: void Configure(QModelIndex index); - bool IsUsedKey(QKeySequence key_sequence); + bool IsUsedKey(QKeySequence key_sequence) const; QList<QKeySequence> GetUsedKeyList() const; std::unique_ptr<Ui::ConfigureHotkeys> ui; diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 3db0e90da..2cf5c58a0 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -95,7 +95,7 @@ public: if (row2.isEmpty()) return row1; - return row1 + "\n " + row2; + return QString(row1 + "\n " + row2); } return GameListItem::data(role); |