diff options
62 files changed, 1688 insertions, 602 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers -Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895 +Subproject fd568d51ed3d9bc6132e1639d7492453a08fe1b diff --git a/src/core/core.cpp b/src/core/core.cpp index 20d64f3b0..3d0978cbf 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -104,7 +104,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, return vfs->OpenFile(path, FileSys::Mode::Read); } struct System::Impl { - explicit Impl(System& system) : kernel{system}, cpu_core_manager{system}, reporter{system} {} + explicit Impl(System& system) + : kernel{system}, cpu_core_manager{system}, applet_manager{system}, reporter{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 40cea1e7c..c7af87073 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -296,12 +296,6 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { } ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { - const auto end_addr = target + size; - const auto last_addr = end_addr - 1; - VAddr cur_addr = target; - - ResultCode result = RESULT_SUCCESS; - // Check how much memory we've already mapped. const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); if (mapped_size_result.Failed()) { @@ -324,13 +318,16 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { // Keep track of the memory regions we unmap. std::vector<std::pair<u64, u64>> mapped_regions; + ResultCode result = RESULT_SUCCESS; // Iterate, trying to map memory. { - cur_addr = target; + const auto end_addr = target + size; + const auto last_addr = end_addr - 1; + VAddr cur_addr = target; auto iter = FindVMA(target); - ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); + ASSERT(iter != vma_map.end()); while (true) { const auto& vma = iter->second; @@ -342,7 +339,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); if (vma.state == MemoryState::Unmapped) { const auto map_res = - MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0, + MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size), 0, map_size, MemoryState::Heap, VMAPermission::ReadWrite); result = map_res.Code(); if (result.IsError()) { @@ -360,7 +357,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { // Advance to the next block. cur_addr = vma_end; iter = FindVMA(cur_addr); - ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); + ASSERT(iter != vma_map.end()); } } @@ -368,7 +365,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { if (result.IsError()) { for (const auto [unmap_address, unmap_size] : mapped_regions) { ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), - "MapPhysicalMemory un-map on error"); + "Failed to unmap memory range."); } return result; @@ -381,12 +378,6 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { } ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { - const auto end_addr = target + size; - const auto last_addr = end_addr - 1; - VAddr cur_addr = target; - - ResultCode result = RESULT_SUCCESS; - // Check how much memory is currently mapped. const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); if (mapped_size_result.Failed()) { @@ -401,13 +392,16 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { // Keep track of the memory regions we unmap. std::vector<std::pair<u64, u64>> unmapped_regions; + ResultCode result = RESULT_SUCCESS; // Try to unmap regions. { - cur_addr = target; + const auto end_addr = target + size; + const auto last_addr = end_addr - 1; + VAddr cur_addr = target; auto iter = FindVMA(target); - ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); + ASSERT(iter != vma_map.end()); while (true) { const auto& vma = iter->second; @@ -434,7 +428,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { // Advance to the next block. cur_addr = vma_end; iter = FindVMA(cur_addr); - ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); + ASSERT(iter != vma_map.end()); } } @@ -443,10 +437,12 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { if (result.IsError()) { for (const auto [map_address, map_size] : unmapped_regions) { const auto remap_res = - MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0, - map_size, MemoryState::Heap, VMAPermission::None); - ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); + MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size), 0, map_size, + MemoryState::Heap, VMAPermission::None); + ASSERT_MSG(remap_res.Succeeded(), "Failed to remap a memory block."); } + + return result; } // Update mapped amount @@ -757,20 +753,26 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre // Always merge allocated memory blocks, even when they don't share the same backing block. if (left.type == VMAType::AllocatedMemoryBlock && (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { + const auto right_begin = right.backing_block->begin() + right.offset; + const auto right_end = right_begin + right.size; + // Check if we can save work. if (left.offset == 0 && left.size == left.backing_block->size()) { // Fast case: left is an entire backing block. - left.backing_block->insert(left.backing_block->end(), - right.backing_block->begin() + right.offset, - right.backing_block->begin() + right.offset + right.size); + left.backing_block->insert(left.backing_block->end(), right_begin, right_end); } else { // Slow case: make a new memory block for left and right. + const auto left_begin = left.backing_block->begin() + left.offset; + const auto left_end = left_begin + left.size; + const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end)); + const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end)); + auto new_memory = std::make_shared<PhysicalMemory>(); - new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, - left.backing_block->begin() + left.offset + left.size); - new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, - right.backing_block->begin() + right.offset + right.size); - left.backing_block = new_memory; + new_memory->reserve(left_size + right_size); + new_memory->insert(new_memory->end(), left_begin, left_end); + new_memory->insert(new_memory->end(), right_begin, right_end); + + left.backing_block = std::move(new_memory); left.offset = 0; } @@ -965,7 +967,7 @@ ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, VAddr cur_addr = address; auto iter = FindVMA(cur_addr); - ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); + ASSERT(iter != vma_map.end()); while (true) { const auto& vma = iter->second; @@ -986,7 +988,7 @@ ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, // Advance to the next block. cur_addr = vma_end; iter = std::next(iter); - ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); + ASSERT(iter != vma_map.end()); } return MakeResult(mapped_size); @@ -1000,7 +1002,7 @@ ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr ad VAddr cur_addr = address; auto iter = FindVMA(cur_addr); - ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); + ASSERT(iter != vma_map.end()); while (true) { const auto& vma = iter->second; @@ -1029,7 +1031,7 @@ ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr ad // Advance to the next block. cur_addr = vma_end; iter = std::next(iter); - ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); + ASSERT(iter != vma_map.end()); } return MakeResult(mapped_size); diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index b18cde619..850a7ebc3 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -454,8 +454,8 @@ public: /// Maps memory at a given address. /// - /// @param addr The virtual address to map memory at. - /// @param size The amount of memory to map. + /// @param target The virtual address to map memory at. + /// @param size The amount of memory to map. /// /// @note The destination address must lie within the Map region. /// @@ -468,8 +468,8 @@ public: /// Unmaps memory at a given address. /// - /// @param addr The virtual address to unmap memory at. - /// @param size The amount of memory to unmap. + /// @param target The virtual address to unmap memory at. + /// @param size The amount of memory to unmap. /// /// @note The destination address must lie within the Map region. /// diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index c01ee3eda..a7c55e116 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -31,6 +31,9 @@ namespace Service::Account { +constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30}; +constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100}; + static std::string GetImagePath(Common::UUID uuid) { return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; @@ -41,20 +44,31 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) { return static_cast<u32>(std::min(size, max_jpeg_image_size)); } -class IProfile final : public ServiceFramework<IProfile> { +class IProfileCommon : public ServiceFramework<IProfileCommon> { public: - explicit IProfile(Common::UUID user_id, ProfileManager& profile_manager) - : ServiceFramework("IProfile"), profile_manager(profile_manager), user_id(user_id) { + explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, + ProfileManager& profile_manager) + : ServiceFramework(name), profile_manager(profile_manager), user_id(user_id) { static const FunctionInfo functions[] = { - {0, &IProfile::Get, "Get"}, - {1, &IProfile::GetBase, "GetBase"}, - {10, &IProfile::GetImageSize, "GetImageSize"}, - {11, &IProfile::LoadImage, "LoadImage"}, + {0, &IProfileCommon::Get, "Get"}, + {1, &IProfileCommon::GetBase, "GetBase"}, + {10, &IProfileCommon::GetImageSize, "GetImageSize"}, + {11, &IProfileCommon::LoadImage, "LoadImage"}, }; + RegisterHandlers(functions); + + if (editor_commands) { + static const FunctionInfo editor_functions[] = { + {100, &IProfileCommon::Store, "Store"}, + {101, &IProfileCommon::StoreWithImage, "StoreWithImage"}, + }; + + RegisterHandlers(editor_functions); + } } -private: +protected: void Get(Kernel::HLERequestContext& ctx) { LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); ProfileBase profile_base{}; @@ -127,10 +141,91 @@ private: } } - const ProfileManager& profile_manager; + void Store(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto base = rp.PopRaw<ProfileBase>(); + + const auto user_data = ctx.ReadBuffer(); + + LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", + Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(base.username.data()), base.username.size()), + base.timestamp, base.user_uuid.Format()); + + if (user_data.size() < sizeof(ProfileData)) { + LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_INVALID_BUFFER_SIZE); + return; + } + + ProfileData data; + std::memcpy(&data, user_data.data(), sizeof(ProfileData)); + + if (!profile_manager.SetProfileBaseAndData(user_id, base, data)) { + LOG_ERROR(Service_ACC, "Failed to update profile data and base!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_FAILED_SAVE_DATA); + return; + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void StoreWithImage(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto base = rp.PopRaw<ProfileBase>(); + + const auto user_data = ctx.ReadBuffer(); + const auto image_data = ctx.ReadBuffer(1); + + LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", + Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(base.username.data()), base.username.size()), + base.timestamp, base.user_uuid.Format()); + + if (user_data.size() < sizeof(ProfileData)) { + LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_INVALID_BUFFER_SIZE); + return; + } + + ProfileData data; + std::memcpy(&data, user_data.data(), sizeof(ProfileData)); + + FileUtil::IOFile image(GetImagePath(user_id), "wb"); + + if (!image.IsOpen() || !image.Resize(image_data.size()) || + image.WriteBytes(image_data.data(), image_data.size()) != image_data.size() || + !profile_manager.SetProfileBaseAndData(user_id, base, data)) { + LOG_ERROR(Service_ACC, "Failed to update profile data, base, and image!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_FAILED_SAVE_DATA); + return; + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + ProfileManager& profile_manager; Common::UUID user_id; ///< The user id this profile refers to. }; +class IProfile final : public IProfileCommon { +public: + IProfile(Common::UUID user_id, ProfileManager& profile_manager) + : IProfileCommon("IProfile", false, user_id, profile_manager) {} +}; + +class IProfileEditor final : public IProfileCommon { +public: + IProfileEditor(Common::UUID user_id, ProfileManager& profile_manager) + : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} +}; + class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: IManagerForApplication() : ServiceFramework("IManagerForApplication") { @@ -322,6 +417,17 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx rb.Push(is_locked); } +void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + Common::UUID user_id = rp.PopRaw<Common::UUID>(); + + LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format()); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IProfileEditor>(user_id, *profile_manager); +} + void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_ACC, "called"); // A u8 is passed into this function which we can safely ignore. It's to determine if we have diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index f651773b7..7a7dc9ec6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -32,6 +32,7 @@ public: void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); + void GetProfileEditor(Kernel::HLERequestContext& ctx); private: ResultCode InitializeApplicationInfoBase(u64 process_id); diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 1b7ec3ed0..0d1663657 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -41,7 +41,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {202, nullptr, "CancelUserRegistration"}, {203, nullptr, "DeleteUser"}, {204, nullptr, "SetUserPosition"}, - {205, nullptr, "GetProfileEditor"}, + {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, {206, nullptr, "CompleteUserRegistrationForcibly"}, {210, nullptr, "CreateFloatingRegistrationRequest"}, {230, nullptr, "AuthenticateServiceAsync"}, diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp index 49aa5908b..8f9986326 100644 --- a/src/core/hle/service/acc/profile_manager.cpp +++ b/src/core/hle/service/acc/profile_manager.cpp @@ -305,6 +305,17 @@ bool ProfileManager::SetProfileBase(UUID uuid, const ProfileBase& profile_new) { return true; } +bool ProfileManager::SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, + const ProfileData& data_new) { + const auto index = GetUserIndex(uuid); + if (index.has_value() && SetProfileBase(uuid, profile_new)) { + profiles[*index].data = data_new; + return true; + } + + return false; +} + void ProfileManager::ParseUserSaveFile() { FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat", diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index fd7abb541..5a6d28925 100644 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h @@ -91,6 +91,8 @@ public: bool RemoveUser(Common::UUID uuid); bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new); + bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, + const ProfileData& data_new); private: void ParseUserSaveFile(); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 111633ba3..aa2c83937 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -56,7 +56,8 @@ struct LaunchParameters { }; static_assert(sizeof(LaunchParameters) == 0x88); -IWindowController::IWindowController() : ServiceFramework("IWindowController") { +IWindowController::IWindowController(Core::System& system_) + : ServiceFramework("IWindowController"), system{system_} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "CreateWindow"}, @@ -75,7 +76,7 @@ IWindowController::IWindowController() : ServiceFramework("IWindowController") { IWindowController::~IWindowController() = default; void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) { - const u64 process_id = Core::System::GetInstance().Kernel().CurrentProcess()->GetProcessID(); + const u64 process_id = system.CurrentProcess()->GetProcessID(); LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id); @@ -231,8 +232,9 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} { IDebugFunctions::~IDebugFunctions() = default; -ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger) - : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger)) { +ISelfController::ISelfController(Core::System& system_, + std::shared_ptr<NVFlinger::NVFlinger> nvflinger_) + : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger_)) { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "Exit"}, @@ -280,7 +282,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger RegisterHandlers(functions); - auto& kernel = Core::System::GetInstance().Kernel(); + auto& kernel = system_.Kernel(); launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "ISelfController:LaunchableEvent"); @@ -501,8 +503,7 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable); } -AppletMessageQueue::AppletMessageQueue() { - auto& kernel = Core::System::GetInstance().Kernel(); +AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "AMMessageQueue:OnMessageRecieved"); on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair( @@ -937,9 +938,8 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } -ILibraryAppletCreator::ILibraryAppletCreator(u64 current_process_title_id) - : ServiceFramework("ILibraryAppletCreator"), - current_process_title_id(current_process_title_id) { +ILibraryAppletCreator::ILibraryAppletCreator(Core::System& system_) + : ServiceFramework("ILibraryAppletCreator"), system{system_} { static const FunctionInfo functions[] = { {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"}, {1, nullptr, "TerminateAllLibraryApplets"}, @@ -961,8 +961,8 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx) LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", static_cast<u32>(applet_id), applet_mode); - const auto& applet_manager{Core::System::GetInstance().GetAppletManager()}; - const auto applet = applet_manager.GetApplet(applet_id, current_process_title_id); + const auto& applet_manager{system.GetAppletManager()}; + const auto applet = applet_manager.GetApplet(applet_id); if (applet == nullptr) { LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); @@ -999,8 +999,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex const auto handle{rp.Pop<Kernel::Handle>()}; const auto transfer_mem = - Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>( - handle); + system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle); if (transfer_mem == nullptr) { LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle); @@ -1018,7 +1017,8 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory))); } -IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") { +IApplicationFunctions::IApplicationFunctions(Core::System& system_) + : ServiceFramework("IApplicationFunctions"), system{system_} { // clang-format off static const FunctionInfo functions[] = { {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"}, @@ -1180,7 +1180,7 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) { // Get supported languages from NACP, if possible // Default to 0 (all languages supported) u32 supported_languages = 0; - FileSys::PatchManager pm{Core::System::GetInstance().CurrentProcess()->GetTitleID()}; + FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()}; const auto res = pm.GetControlMetadata(); if (res.first != nullptr) { @@ -1188,8 +1188,8 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) { } // Call IApplicationManagerInterface implementation. - auto& service_manager = Core::System::GetInstance().ServiceManager(); - auto ns_am2 = service_manager.GetService<Service::NS::NS>("ns:am2"); + auto& service_manager = system.ServiceManager(); + auto ns_am2 = service_manager.GetService<NS::NS>("ns:am2"); auto app_man = ns_am2->GetApplicationManagerInterface(); // Get desired application language @@ -1261,8 +1261,8 @@ void IApplicationFunctions::ExtendSaveData(Kernel::HLERequestContext& ctx) { "new_journal={:016X}", static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size); - FileSystem::WriteSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id, - {new_normal_size, new_journal_size}); + const auto title_id = system.CurrentProcess()->GetTitleID(); + FileSystem::WriteSaveDataSize(type, title_id, user_id, {new_normal_size, new_journal_size}); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); @@ -1281,8 +1281,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), user_id[1], user_id[0]); - const auto size = - FileSystem::ReadSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id); + const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto size = FileSystem::ReadSaveDataSize(type, title_id, user_id); IPC::ResponseBuilder rb{ctx, 6}; rb.Push(RESULT_SUCCESS); @@ -1300,9 +1300,9 @@ void IApplicationFunctions::GetGpuErrorDetectedSystemEvent(Kernel::HLERequestCon void InstallInterfaces(SM::ServiceManager& service_manager, std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) { - auto message_queue = std::make_shared<AppletMessageQueue>(); - message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); // Needed on - // game boot + auto message_queue = std::make_shared<AppletMessageQueue>(system.Kernel()); + // Needed on game boot + message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager); std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager); diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index cbc9da7b6..28f870302 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -10,12 +10,15 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/service/service.h" -namespace Service { -namespace NVFlinger { +namespace Kernel { +class KernelCore; +} + +namespace Service::NVFlinger { class NVFlinger; } -namespace AM { +namespace Service::AM { enum SystemLanguage { Japanese = 0, @@ -47,7 +50,7 @@ public: PerformanceModeChanged = 31, }; - AppletMessageQueue(); + explicit AppletMessageQueue(Kernel::KernelCore& kernel); ~AppletMessageQueue(); const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; @@ -65,12 +68,14 @@ private: class IWindowController final : public ServiceFramework<IWindowController> { public: - IWindowController(); + explicit IWindowController(Core::System& system_); ~IWindowController() override; private: void GetAppletResourceUserId(Kernel::HLERequestContext& ctx); void AcquireForegroundRights(Kernel::HLERequestContext& ctx); + + Core::System& system; }; class IAudioController final : public ServiceFramework<IAudioController> { @@ -113,7 +118,8 @@ public: class ISelfController final : public ServiceFramework<ISelfController> { public: - explicit ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger); + explicit ISelfController(Core::System& system_, + std::shared_ptr<NVFlinger::NVFlinger> nvflinger_); ~ISelfController() override; private: @@ -208,7 +214,7 @@ private: class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> { public: - ILibraryAppletCreator(u64 current_process_title_id); + explicit ILibraryAppletCreator(Core::System& system_); ~ILibraryAppletCreator() override; private: @@ -216,12 +222,12 @@ private: void CreateStorage(Kernel::HLERequestContext& ctx); void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx); - u64 current_process_title_id; + Core::System& system; }; class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> { public: - IApplicationFunctions(); + explicit IApplicationFunctions(Core::System& system_); ~IApplicationFunctions() override; private: @@ -245,6 +251,7 @@ private: void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx); Kernel::EventPair gpu_error_detected_event; + Core::System& system; }; class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { @@ -278,5 +285,4 @@ public: void InstallInterfaces(SM::ServiceManager& service_manager, std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system); -} // namespace AM -} // namespace Service +} // namespace Service::AM diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index a34368c8b..e454b77d8 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp @@ -50,7 +50,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISelfController>(nvflinger); + rb.PushIpcInterface<ISelfController>(system, nvflinger); } void GetWindowController(Kernel::HLERequestContext& ctx) { @@ -58,7 +58,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IWindowController>(); + rb.PushIpcInterface<IWindowController>(system); } void GetAudioController(Kernel::HLERequestContext& ctx) { @@ -98,7 +98,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); + rb.PushIpcInterface<ILibraryAppletCreator>(system); } void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { @@ -106,7 +106,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IApplicationFunctions>(); + rb.PushIpcInterface<IApplicationFunctions>(system); } std::shared_ptr<NVFlinger::NVFlinger> nvflinger; @@ -154,7 +154,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISelfController>(nvflinger); + rb.PushIpcInterface<ISelfController>(system, nvflinger); } void GetWindowController(Kernel::HLERequestContext& ctx) { @@ -162,7 +162,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IWindowController>(); + rb.PushIpcInterface<IWindowController>(system); } void GetAudioController(Kernel::HLERequestContext& ctx) { @@ -194,7 +194,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); + rb.PushIpcInterface<ILibraryAppletCreator>(system); } void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp index 5d53ef113..a2ffaa440 100644 --- a/src/core/hle/service/am/applet_oe.cpp +++ b/src/core/hle/service/am/applet_oe.cpp @@ -4,7 +4,6 @@ #include "common/logging/log.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/process.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applet_oe.h" #include "core/hle/service/nvflinger/nvflinger.h" @@ -64,7 +63,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IWindowController>(); + rb.PushIpcInterface<IWindowController>(system); } void GetSelfController(Kernel::HLERequestContext& ctx) { @@ -72,7 +71,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISelfController>(nvflinger); + rb.PushIpcInterface<ISelfController>(system, nvflinger); } void GetCommonStateGetter(Kernel::HLERequestContext& ctx) { @@ -88,7 +87,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); + rb.PushIpcInterface<ILibraryAppletCreator>(system); } void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { @@ -96,7 +95,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<IApplicationFunctions>(); + rb.PushIpcInterface<IApplicationFunctions>(system); } std::shared_ptr<NVFlinger::NVFlinger> nvflinger; diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index 6bdba2468..d2e35362f 100644 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp @@ -23,8 +23,7 @@ namespace Service::AM::Applets { -AppletDataBroker::AppletDataBroker() { - auto& kernel = Core::System::GetInstance().Kernel(); +AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) { state_changed_event = Kernel::WritableEvent::CreateEventPair( kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent"); pop_out_data_event = Kernel::WritableEvent::CreateEventPair( @@ -121,7 +120,7 @@ Kernel::SharedPtr<Kernel::ReadableEvent> AppletDataBroker::GetStateChangedEvent( return state_changed_event.readable; } -Applet::Applet() = default; +Applet::Applet(Kernel::KernelCore& kernel_) : broker{kernel_} {} Applet::~Applet() = default; @@ -154,7 +153,7 @@ AppletFrontendSet::AppletFrontendSet(AppletFrontendSet&&) noexcept = default; AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default; -AppletManager::AppletManager() = default; +AppletManager::AppletManager(Core::System& system_) : system{system_} {} AppletManager::~AppletManager() = default; @@ -216,28 +215,28 @@ void AppletManager::ClearAll() { frontend = {}; } -std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id, u64 current_process_title_id) const { +std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const { switch (id) { case AppletId::Auth: - return std::make_shared<Auth>(*frontend.parental_controls); + return std::make_shared<Auth>(system, *frontend.parental_controls); case AppletId::Error: - return std::make_shared<Error>(*frontend.error); + return std::make_shared<Error>(system, *frontend.error); case AppletId::ProfileSelect: - return std::make_shared<ProfileSelect>(*frontend.profile_select); + return std::make_shared<ProfileSelect>(system, *frontend.profile_select); case AppletId::SoftwareKeyboard: - return std::make_shared<SoftwareKeyboard>(*frontend.software_keyboard); + return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard); case AppletId::PhotoViewer: - return std::make_shared<PhotoViewer>(*frontend.photo_viewer); + return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer); case AppletId::LibAppletShop: - return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id, + return std::make_shared<WebBrowser>(system, *frontend.web_browser, frontend.e_commerce.get()); case AppletId::LibAppletOff: - return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id); + return std::make_shared<WebBrowser>(system, *frontend.web_browser); default: UNIMPLEMENTED_MSG( "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", static_cast<u8>(id)); - return std::make_shared<StubApplet>(id); + return std::make_shared<StubApplet>(system, id); } } diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index adc973dad..764c3418c 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -12,6 +12,10 @@ union ResultCode; +namespace Core { +class System; +} + namespace Core::Frontend { class ECommerceApplet; class ErrorApplet; @@ -22,6 +26,10 @@ class SoftwareKeyboardApplet; class WebBrowserApplet; } // namespace Core::Frontend +namespace Kernel { +class KernelCore; +} + namespace Service::AM { class IStorage; @@ -53,7 +61,7 @@ enum class AppletId : u32 { class AppletDataBroker final { public: - AppletDataBroker(); + explicit AppletDataBroker(Kernel::KernelCore& kernel_); ~AppletDataBroker(); struct RawChannelData { @@ -108,7 +116,7 @@ private: class Applet { public: - Applet(); + explicit Applet(Kernel::KernelCore& kernel_); virtual ~Applet(); virtual void Initialize(); @@ -179,7 +187,7 @@ struct AppletFrontendSet { class AppletManager { public: - AppletManager(); + explicit AppletManager(Core::System& system_); ~AppletManager(); void SetAppletFrontendSet(AppletFrontendSet set); @@ -187,10 +195,11 @@ public: void SetDefaultAppletsIfMissing(); void ClearAll(); - std::shared_ptr<Applet> GetApplet(AppletId id, u64 current_process_title_id) const; + std::shared_ptr<Applet> GetApplet(AppletId id) const; private: AppletFrontendSet frontend; + Core::System& system; }; } // namespace Applets diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp index af3a900f8..a7db26725 100644 --- a/src/core/hle/service/am/applets/error.cpp +++ b/src/core/hle/service/am/applets/error.cpp @@ -85,7 +85,8 @@ ResultCode Decode64BitError(u64 error) { } // Anonymous namespace -Error::Error(const Core::Frontend::ErrorApplet& frontend) : frontend(frontend) {} +Error::Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {} Error::~Error() = default; @@ -145,8 +146,8 @@ void Error::Execute() { } const auto callback = [this] { DisplayCompleted(); }; - const auto title_id = Core::CurrentProcess()->GetTitleID(); - const auto& reporter{Core::System::GetInstance().GetReporter()}; + const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto& reporter{system.GetReporter()}; switch (mode) { case ErrorAppletMode::ShowError: diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/error.h index a3590d181..a105cdb0c 100644 --- a/src/core/hle/service/am/applets/error.h +++ b/src/core/hle/service/am/applets/error.h @@ -7,6 +7,10 @@ #include "core/hle/result.h" #include "core/hle/service/am/applets/applets.h" +namespace Core { +class System; +} + namespace Service::AM::Applets { enum class ErrorAppletMode : u8 { @@ -21,7 +25,7 @@ enum class ErrorAppletMode : u8 { class Error final : public Applet { public: - explicit Error(const Core::Frontend::ErrorApplet& frontend); + explicit Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_); ~Error() override; void Initialize() override; @@ -42,6 +46,7 @@ private: std::unique_ptr<ErrorArguments> args; bool complete = false; + Core::System& system; }; } // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp index e0def8dff..328438a1d 100644 --- a/src/core/hle/service/am/applets/general_backend.cpp +++ b/src/core/hle/service/am/applets/general_backend.cpp @@ -37,7 +37,8 @@ static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) } } -Auth::Auth(Core::Frontend::ParentalControlsApplet& frontend) : frontend(frontend) {} +Auth::Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_) {} Auth::~Auth() = default; @@ -151,7 +152,8 @@ void Auth::AuthFinished(bool successful) { broker.SignalStateChanged(); } -PhotoViewer::PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend) : frontend(frontend) {} +PhotoViewer::PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {} PhotoViewer::~PhotoViewer() = default; @@ -185,7 +187,7 @@ void PhotoViewer::Execute() { const auto callback = [this] { ViewFinished(); }; switch (mode) { case PhotoViewerAppletMode::CurrentApp: - frontend.ShowPhotosForApplication(Core::CurrentProcess()->GetTitleID(), callback); + frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback); break; case PhotoViewerAppletMode::AllApps: frontend.ShowAllPhotos(callback); @@ -200,7 +202,8 @@ void PhotoViewer::ViewFinished() { broker.SignalStateChanged(); } -StubApplet::StubApplet(AppletId id) : id(id) {} +StubApplet::StubApplet(Core::System& system_, AppletId id_) + : Applet{system_.Kernel()}, id(id_), system{system_} {} StubApplet::~StubApplet() = default; @@ -209,7 +212,7 @@ void StubApplet::Initialize() { Applet::Initialize(); const auto data = broker.PeekDataToAppletForDebug(); - Core::System::GetInstance().GetReporter().SaveUnimplementedAppletReport( + system.GetReporter().SaveUnimplementedAppletReport( static_cast<u32>(id), common_args.arguments_version, common_args.library_version, common_args.theme_color, common_args.play_startup_sound, common_args.system_tick, data.normal, data.interactive); diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/general_backend.h index 0da252044..cfa2df369 100644 --- a/src/core/hle/service/am/applets/general_backend.h +++ b/src/core/hle/service/am/applets/general_backend.h @@ -6,6 +6,10 @@ #include "core/hle/service/am/applets/applets.h" +namespace Core { +class System; +} + namespace Service::AM::Applets { enum class AuthAppletType : u32 { @@ -16,7 +20,7 @@ enum class AuthAppletType : u32 { class Auth final : public Applet { public: - explicit Auth(Core::Frontend::ParentalControlsApplet& frontend); + explicit Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_); ~Auth() override; void Initialize() override; @@ -45,7 +49,7 @@ enum class PhotoViewerAppletMode : u8 { class PhotoViewer final : public Applet { public: - explicit PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend); + explicit PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_); ~PhotoViewer() override; void Initialize() override; @@ -60,11 +64,12 @@ private: const Core::Frontend::PhotoViewerApplet& frontend; bool complete = false; PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp; + Core::System& system; }; class StubApplet final : public Applet { public: - explicit StubApplet(AppletId id); + explicit StubApplet(Core::System& system_, AppletId id_); ~StubApplet() override; void Initialize() override; @@ -76,6 +81,7 @@ public: private: AppletId id; + Core::System& system; }; } // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp index 57b5419e8..3eba696ca 100644 --- a/src/core/hle/service/am/applets/profile_select.cpp +++ b/src/core/hle/service/am/applets/profile_select.cpp @@ -15,8 +15,9 @@ namespace Service::AM::Applets { constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1}; -ProfileSelect::ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend) - : frontend(frontend) {} +ProfileSelect::ProfileSelect(Core::System& system_, + const Core::Frontend::ProfileSelectApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_) {} ProfileSelect::~ProfileSelect() = default; diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/profile_select.h index 563cd744a..16364ead7 100644 --- a/src/core/hle/service/am/applets/profile_select.h +++ b/src/core/hle/service/am/applets/profile_select.h @@ -11,6 +11,10 @@ #include "core/hle/result.h" #include "core/hle/service/am/applets/applets.h" +namespace Core { +class System; +} + namespace Service::AM::Applets { struct UserSelectionConfig { @@ -29,7 +33,8 @@ static_assert(sizeof(UserSelectionOutput) == 0x18, "UserSelectionOutput has inco class ProfileSelect final : public Applet { public: - explicit ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend); + explicit ProfileSelect(Core::System& system_, + const Core::Frontend::ProfileSelectApplet& frontend_); ~ProfileSelect() override; void Initialize() override; diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index e197990f7..748559cd0 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -39,8 +39,9 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters( return params; } -SoftwareKeyboard::SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend) - : frontend(frontend) {} +SoftwareKeyboard::SoftwareKeyboard(Core::System& system_, + const Core::Frontend::SoftwareKeyboardApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_) {} SoftwareKeyboard::~SoftwareKeyboard() = default; diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index 0fbc43e51..ef4801fc6 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h @@ -16,6 +16,10 @@ union ResultCode; +namespace Core { +class System; +} + namespace Service::AM::Applets { enum class KeysetDisable : u32 { @@ -55,7 +59,8 @@ static_assert(sizeof(KeyboardConfig) == 0x3E0, "KeyboardConfig has incorrect siz class SoftwareKeyboard final : public Applet { public: - explicit SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend); + explicit SoftwareKeyboard(Core::System& system_, + const Core::Frontend::SoftwareKeyboardApplet& frontend_); ~SoftwareKeyboard() override; void Initialize() override; diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index f3c9fef0e..32283e819 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp @@ -190,8 +190,9 @@ std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>& return out; } -FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordType type) { - const auto& installed{Core::System::GetInstance().GetContentProvider()}; +FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id, + FileSys::ContentRecordType type) { + const auto& installed{system.GetContentProvider()}; const auto res = installed.GetEntry(title_id, type); if (res != nullptr) { @@ -207,10 +208,10 @@ FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordTyp } // Anonymous namespace -WebBrowser::WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, - Core::Frontend::ECommerceApplet* frontend_e_commerce) - : frontend(frontend), frontend_e_commerce(frontend_e_commerce), - current_process_title_id(current_process_title_id) {} +WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, + Core::Frontend::ECommerceApplet* frontend_e_commerce_) + : Applet{system_.Kernel()}, frontend(frontend_), + frontend_e_commerce(frontend_e_commerce_), system{system_} {} WebBrowser::~WebBrowser() = default; @@ -266,7 +267,7 @@ void WebBrowser::UnpackRomFS() { ASSERT(offline_romfs != nullptr); const auto dir = FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); - const auto& vfs{Core::System::GetInstance().GetFilesystem()}; + const auto& vfs{system.GetFilesystem()}; const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite); FileSys::VfsRawCopyD(dir, temp_dir); @@ -470,10 +471,10 @@ void WebBrowser::InitializeOffline() { } if (title_id == 0) { - title_id = current_process_title_id; + title_id = system.CurrentProcess()->GetTitleID(); } - offline_romfs = GetApplicationRomFS(title_id, type); + offline_romfs = GetApplicationRomFS(system, title_id, type); if (offline_romfs == nullptr) { status = ResultCode(-1); LOG_ERROR(Service_AM, "Failed to find offline data for request!"); diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h index 870f57b64..8d4027411 100644 --- a/src/core/hle/service/am/applets/web_browser.h +++ b/src/core/hle/service/am/applets/web_browser.h @@ -9,6 +9,10 @@ #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applets.h" +namespace Core { +class System; +} + namespace Service::AM::Applets { enum class ShimKind : u32; @@ -17,8 +21,8 @@ enum class WebArgTLVType : u16; class WebBrowser final : public Applet { public: - WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, - Core::Frontend::ECommerceApplet* frontend_e_commerce = nullptr); + WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, + Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr); ~WebBrowser() override; @@ -59,8 +63,6 @@ private: bool unpacked = false; ResultCode status = RESULT_SUCCESS; - u64 current_process_title_id; - ShimKind kind; std::map<WebArgTLVType, std::vector<u8>> args; @@ -74,6 +76,8 @@ private: std::optional<u128> user_id; std::optional<bool> shop_full_display; std::string shop_extra_parameter; + + Core::System& system; }; } // namespace Service::AM::Applets diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index e92e2e06e..3a5361fdd 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -258,6 +258,15 @@ ResultStatus AppLoader_NRO::ReadTitle(std::string& title) { return ResultStatus::Success; } +ResultStatus AppLoader_NRO::ReadControlData(FileSys::NACP& control) { + if (nacp == nullptr) { + return ResultStatus::ErrorNoControl; + } + + control = *nacp; + return ResultStatus::Success; +} + bool AppLoader_NRO::IsRomFSUpdatable() const { return false; } diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 1ffdae805..71811bc29 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h @@ -43,6 +43,7 @@ public: ResultStatus ReadProgramId(u64& out_program_id) override; ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; ResultStatus ReadTitle(std::string& title) override; + ResultStatus ReadControlData(FileSys::NACP& control) override; bool IsRomFSUpdatable() const override; private: diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 08586d33c..63d449135 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <bitset> #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -49,6 +50,33 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { } } +Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { + const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); + ASSERT(cbuf_mask[regs.tex_cb_index]); + + const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; + ASSERT(texinfo.Address() != 0); + + const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); + ASSERT(address < texinfo.Address() + texinfo.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; + return GetTextureInfo(tex_handle, offset); +} + +Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { + return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), + GetTSCEntry(tex_handle.tsc_id)}; +} + +u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { + const auto& buffer = launch_description.const_buffer_config[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, @@ -60,4 +88,29 @@ void KeplerCompute::ProcessLaunch() { rasterizer.DispatchCompute(code_addr); } +Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; + + Texture::TICEntry tic_entry; + memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + + const auto r_type{tic_entry.r_type.Value()}; + const auto g_type{tic_entry.g_type.Value()}; + const auto b_type{tic_entry.b_type.Value()}; + const auto a_type{tic_entry.a_type.Value()}; + + // TODO(Subv): Different data types for separate components are not supported + DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + + return tic_entry; +} + +Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const { + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; + + Texture::TSCEntry tsc_entry; + memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); + return tsc_entry; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 6a3309a2c..90cf650d2 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" +#include "video_core/textures/texture.h" namespace Core { class System; @@ -111,7 +112,7 @@ public: INSERT_PADDING_WORDS(0x3FE); - u32 texture_const_buffer_index; + u32 tex_cb_index; INSERT_PADDING_WORDS(0x374); }; @@ -149,7 +150,7 @@ public: union { BitField<0, 8, u32> const_buffer_enable_mask; BitField<29, 2, u32> cache_layout; - } memory_config; + }; INSERT_PADDING_WORDS(0x8); @@ -194,6 +195,14 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; + + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + + u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; @@ -201,6 +210,12 @@ private: Upload::State upload_state; void ProcessLaunch(); + + /// Retrieves information about a specific TIC entry from the TIC buffer. + Texture::TICEntry GetTICEntry(u32 tic_index) const; + + /// Retrieves information about a specific TSC entry from the TSC buffer. + Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -218,12 +233,12 @@ ASSERT_REG_POSITION(launch, 0xAF); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(code_loc, 0x582); -ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_REG_POSITION(tex_cb_index, 0x982); ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); -ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14); ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index f5158d219..c7a3c85a0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -89,6 +89,9 @@ void Maxwell3D::InitializeRegisterDefaults() { // Commercial games seem to assume this value is enabled and nouveau sets this value manually. regs.rt_separate_frag_data = 1; + + // Some games (like Super Mario Odyssey) assume that SRGB is enabled. + regs.framebuffer_srgb = 1; } #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) @@ -244,7 +247,7 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; } -void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { +void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { // Reset the current macro. executing_macro = 0; @@ -252,7 +255,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); // Execute the current macro. - macro_interpreter.Execute(macro_positions[entry], std::move(parameters)); + macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); } void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { @@ -289,7 +292,8 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { // Call the macro when there are no more parameters in the command buffer if (method_call.IsLastCall()) { - CallMacroMethod(executing_macro, std::move(macro_params)); + CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); + macro_params.clear(); } return; } @@ -328,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessMacroBind(method_call.argument); break; } + case MAXWELL3D_REG_INDEX(firmware[4]): { + ProcessFirmwareCall4(); + break; + } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): @@ -418,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) { macro_positions[regs.macros.entry++] = data; } +void Maxwell3D::ProcessFirmwareCall4() { + LOG_WARNING(HW_GPU, "(STUBBED) called"); + + // Firmware call 4 is a blob that changes some registers depending on its parameters. + // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. + regs.reg_array[0xd00] = 1; +} + void Maxwell3D::ProcessQueryGet() { const GPUVAddr sequence_address{regs.query.QueryAddress()}; // Since the sequence address is given as a GPU VAddr, we have to convert it to an application diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0184342a0..e5ec90717 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -62,6 +62,7 @@ public: static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumTextureSamplers = 32; + static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; static constexpr std::size_t MaxShaderStage = 5; @@ -1088,7 +1089,9 @@ public: INSERT_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x80); + INSERT_PADDING_WORDS(0x60); + + u32 firmware[0x20]; struct { u32 cb_size; @@ -1307,9 +1310,10 @@ private: /** * Call a macro on this engine. * @param method Method to call + * @param num_parameters Number of arguments * @param parameters Arguments to the method call */ - void CallMacroMethod(u32 method, std::vector<u32> parameters); + void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters); /// Handles writes to the macro uploading register. void ProcessMacroUpload(u32 data); @@ -1317,6 +1321,9 @@ private: /// Handles writes to the macro bind register. void ProcessMacroBind(u32 data); + /// Handles firmware blob 4 + void ProcessFirmwareCall4(); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(); @@ -1429,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700); ASSERT_REG_POSITION(independent_blend, 0x780); ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); ASSERT_REG_POSITION(shader_config[0], 0x800); +ASSERT_REG_POSITION(firmware, 0x8C0); ASSERT_REG_POSITION(const_buffer, 0x8E0); ASSERT_REG_POSITION(cb_bind[0], 0x904); ASSERT_REG_POSITION(tex_cb_index, 0x982); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c3678b9ea..a6110bd86 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -544,6 +544,35 @@ enum class VoteOperation : u64 { Eq = 2, // allThreadsEqualNV }; +enum class ImageAtomicSize : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32 = 3, + S64 = 5, + SD32 = 6, + SD64 = 7, +}; + +enum class ImageAtomicOperation : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, +}; + +enum class ShuffleOperation : u64 { + Idx = 0, // shuffleNV + Up = 1, // shuffleUpNV + Down = 2, // shuffleDownNV + Bfly = 3, // shuffleXorNV +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -578,6 +607,15 @@ union Instruction { } vote; union { + BitField<30, 2, ShuffleOperation> operation; + BitField<48, 3, u64> pred48; + BitField<28, 1, u64> is_index_imm; + BitField<29, 1, u64> is_mask_imm; + BitField<20, 5, u64> index_imm; + BitField<34, 13, u64> mask_imm; + } shfl; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -675,6 +713,10 @@ union Instruction { } shift; union { + BitField<39, 1, u64> wrap; + } shr; + + union { BitField<39, 5, u64> shift_amount; BitField<48, 1, u64> negate_b; BitField<49, 1, u64> negate_a; @@ -1388,6 +1430,14 @@ union Instruction { } sust; union { + BitField<28, 1, u64> is_ba; + BitField<51, 3, ImageAtomicSize> size; + BitField<33, 3, ImageType> image_type; + BitField<29, 4, ImageAtomicOperation> operation; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + } suatom_d; + + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1508,6 +1558,7 @@ public: BRK, DEPBAR, VOTE, + SHFL, BFE_C, BFE_R, BFE_IMM, @@ -1539,6 +1590,7 @@ public: TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level SUST, // Surface Store + SUATOM, // Surface Atomic Operation EXIT, NOP, IPA, @@ -1798,6 +1850,7 @@ private: INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), + INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), @@ -1822,6 +1875,7 @@ private: INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), INST("11101011001-----", Id::SUST, Type::Image, "SUST"), + INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 9f59a2dc1..62afc0d11 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -14,11 +14,18 @@ namespace Tegra { MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} -void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { +void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) { MICROPROFILE_SCOPE(MacroInterp); Reset(); + registers[1] = parameters[0]; - this->parameters = std::move(parameters); + + if (num_parameters > parameters_capacity) { + parameters_capacity = num_parameters; + this->parameters = std::make_unique<u32[]>(num_parameters); + } + std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32)); + this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; @@ -27,7 +34,7 @@ void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { } // Assert the the macro used all the input parameters - ASSERT(next_parameter_index == this->parameters.size()); + ASSERT(next_parameter_index == num_parameters); } void MacroInterpreter::Reset() { @@ -35,7 +42,7 @@ void MacroInterpreter::Reset() { pc = 0; delayed_pc = {}; method_address.raw = 0; - parameters.clear(); + num_parameters = 0; // The next parameter index starts at 1, because $r1 already has the value of the first // parameter. next_parameter_index = 1; @@ -124,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { // An instruction with the Exit flag will not actually // cause an exit if it's executed inside a delay slot. - // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further - // testing on the MME code. - if (opcode.is_exit) { + if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction Step(offset, true); return false; @@ -229,7 +234,8 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res } u32 MacroInterpreter::FetchParameter() { - return parameters.at(next_parameter_index++); + ASSERT(next_parameter_index < num_parameters); + return parameters[next_parameter_index++]; } u32 MacroInterpreter::GetRegister(u32 register_id) const { diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index cde360288..76b6a895b 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -25,7 +25,7 @@ public: * @param offset Offset to start execution at. * @param parameters The parameters of the macro. */ - void Execute(u32 offset, std::vector<u32> parameters); + void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); private: enum class Operation : u32 { @@ -162,10 +162,12 @@ private: MethodAddress method_address = {}; /// Input parameters of the current macro. - std::vector<u32> parameters; + std::unique_ptr<u32[]> parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; /// Index of the next parameter that will be fetched by the 'parm' instruction. u32 next_parameter_index = 0; - bool carry_flag{}; + bool carry_flag = false; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 01d89f47d..4dd08bccb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -331,7 +331,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); SetupDrawConstBuffers(stage_enum, shader); SetupDrawGlobalMemory(stage_enum, shader); - const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; + const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); @@ -489,9 +489,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. texture_cache.MarkColorBufferInUse(*single_color_target); - // Workaround for and issue in nvidia drivers - // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; @@ -512,11 +509,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. texture_cache.MarkColorBufferInUse(index); - // Enable sRGB only for supported formats - // Workaround for and issue in nvidia drivers - // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = @@ -801,7 +793,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } auto kernel = shader_cache.GetComputeKernel(code_addr); - const auto [program, next_bindings] = kernel->GetProgramHandle({}); + ProgramVariant variant; + variant.texture_buffer_usage = SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const auto [program, next_bindings] = kernel->GetProgramHandle(variant); state.draw.shader_program = program; state.draw.program_pipeline = 0; @@ -816,13 +812,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); - // TODO(Rodrigo): Bind images and samplers - buffer_cache.Unmap(); bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); + state.ApplyTextures(); + state.ApplyImages(); state.ApplyShaderProgram(); state.ApplyProgramPipeline(); @@ -902,6 +898,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } screen_info.display_texture = surface->GetTexture(); + screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; return true; } @@ -922,7 +919,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; - const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); + const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; buffer.address = config.Address(); buffer.size = config.size; @@ -981,53 +978,125 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); } -TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings) { +TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, + const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), + ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); TextureBufferUsage texture_buffer_usage{0}; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - Tegra::Texture::FullTextureInfo texture; - if (entry.IsBindless()) { + const auto texture = [&]() { + if (!entry.IsBindless()) { + return maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); - texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); - } else { - texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } - const u32 current_bindpoint = base_bindings.sampler + bindpoint; + } - auto& unit{state.texture_units[current_bindpoint]}; - unit.sampler = sampler_cache.GetSampler(texture.tsc); + return texture_buffer_usage; +} - if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { - if (view->GetSurfaceParams().IsBuffer()) { - // Record that this texture is a texture buffer. - texture_buffer_usage.set(bindpoint); - } else { - // Apply swizzle to textures that are not buffers. - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); +TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { + MICROPROFILE_SCOPE(OpenGL_Texture); + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = kernel->GetShaderEntries().samplers; + + ASSERT_MSG(entries.size() <= std::size(state.textures), + "Exceeded the number of active textures."); + + TextureBufferUsage texture_buffer_usage{0}; + + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto texture = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()); } - state.texture_units[current_bindpoint].texture = view->GetTexture(); - } else { - // Can occur when texture addr is null or its memory is unmapped/invalid - unit.texture = 0; + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } } return texture_buffer_usage; } +bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry) { + state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); + + const auto view = texture_cache.GetTextureSurface(texture.tic, entry); + if (!view) { + // Can occur when texture addr is null or its memory is unmapped/invalid + state.textures[binding] = 0; + return false; + } + state.textures[binding] = view->GetTexture(); + + if (view->GetSurfaceParams().IsBuffer()) { + return true; + } + + // Apply swizzle to textures that are not buffers. + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + return false; +} + +void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = shader->GetShaderEntries().images; + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto tic = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()).tic; + } + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; + }(); + SetupImage(bindpoint, tic, entry); + } +} + +void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry) { + const auto view = texture_cache.GetImageSurface(tic, entry); + if (!view) { + state.images[binding] = 0; + return; + } + if (!tic.IsBuffer()) { + view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + } + if (entry.IsWritten()) { + view->MarkAsModified(texture_cache.Tick()); + } + state.images[binding] = view->GetTexture(); +} + void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { const auto& regs = system.GPU().Maxwell3D().regs; const bool geometry_shaders_enabled = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9d20a4fbf..eada752e0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/textures/texture.h" namespace Core { class System; @@ -137,8 +138,22 @@ private: /// Configures the current textures to use for the draw command. Returns shaders texture buffer /// usage. - TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, BaseBindings base_bindings); + TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader, BaseBindings base_bindings); + + /// Configures the textures used in a compute shader. Returns texture buffer usage. + TextureBufferUsage SetupComputeTextures(const Shader& kernel); + + /// Configures a texture. Returns true when the texture is a texture buffer. + bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry); + + /// Configures images in a compute shader. + void SetupComputeImages(const Shader& shader); + + /// Configures an image. + void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 909ccb82c..0dbc4c02f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn std::string source = "#version 430 core\n" "#extension GL_ARB_separate_shader_objects : enable\n" "#extension GL_NV_gpu_shader5 : enable\n" - "#extension GL_NV_shader_thread_group : enable\n"; + "#extension GL_NV_shader_thread_group : enable\n" + "#extension GL_NV_shader_thread_shuffle : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4a8c7edc9..76439e7ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -325,6 +325,7 @@ public: DeclareRegisters(); DeclarePredicates(); DeclareLocalMemory(); + DeclareSharedMemory(); DeclareInternalFlags(); DeclareInputAttributes(); DeclareOutputAttributes(); @@ -389,11 +390,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& image : ir.GetImages()) { + for (const auto& [offset, image] : ir.GetImages()) { entries.images.emplace_back(image); } - for (const auto& gmem_pair : ir.GetGlobalMemory()) { - const auto& [base, usage] = gmem_pair; + for (const auto& [base, usage] : ir.GetGlobalMemory()) { entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, usage.is_written); } @@ -500,6 +500,13 @@ private: code.AddNewLine(); } + void DeclareSharedMemory() { + if (stage != ProgramType::Compute) { + return; + } + code.AddLine("shared uint {}[];", GetSharedMemory()); + } + void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { const auto flag_code = static_cast<InternalFlag>(flag); @@ -706,8 +713,8 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; - for (const auto& image : images) { - const std::string image_type = [&]() { + for (const auto& [offset, image] : images) { + const char* image_type = [&] { switch (image.GetType()) { case Tegra::Shader::ImageType::Texture1D: return "image1D"; @@ -726,9 +733,33 @@ private: return "image1D"; } }(); - code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform " + + const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { + if (!image.IsSizeKnown()) { + return {"", ""}; + } + switch (image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {"u", "r32ui, "}; + case Tegra::Shader::ImageAtomicSize::S32: + return {"i", "r32i, "}; + default: + UNIMPLEMENTED_MSG("Unimplemented atomic size={}", + static_cast<u32>(image.GetSize())); + return {"", ""}; + } + }(); + + std::string qualifier = "coherent volatile"; + if (image.IsRead() && !image.IsWritten()) { + qualifier += " readonly"; + } else if (image.IsWritten() && !image.IsRead()) { + qualifier += " writeonly"; + } + + code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform " "{} {};", - image.GetIndex(), image_type, GetImage(image)); + image.GetIndex(), qualifier, image_type, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); @@ -858,6 +889,12 @@ private: Type::Uint}; } + if (const auto smem = std::get_if<SmemNode>(&*node)) { + return { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; + } + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; } @@ -1174,6 +1211,74 @@ private: return expr; } + std::string BuildIntegerCoordinates(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const std::size_t coords_count{operation.GetOperandsCount()}; + std::string expr = constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i).AsInt(); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + std::string BuildImageValues(Operation operation) { + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { + constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; + if (!meta.image.IsSizeKnown()) { + return {float_constructors, Type::Float}; + } + switch (meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; + case Tegra::Shader::ImageAtomicSize::S32: + return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", + static_cast<u32>(meta.image.GetSize())); + return {float_constructors, Type::Float}; + } + }(); + + const std::size_t values_count{meta.values.size()}; + std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)).As(type); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + Expression AtomicImage(Operation operation, const char* opname) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + ASSERT(meta.values.size() == 1); + ASSERT(meta.image.IsSizeKnown()); + + const auto type = [&]() { + switch (const auto size = meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return Type::Uint; + case Tegra::Shader::ImageAtomicSize::S32: + return Type::Int; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); + return Type::Uint; + } + }(); + + return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), + BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), + type}; + } + Expression Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -1199,6 +1304,11 @@ private: target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; + } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { + ASSERT(stage == ProgramType::Compute); + target = { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -1692,36 +1802,37 @@ private: } Expression ImageStore(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; const auto meta{std::get<MetaImage>(operation.GetMeta())}; + code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), + BuildIntegerCoordinates(operation), BuildImageValues(operation)); + return {}; + } - std::string expr = "imageStore("; - expr += GetImage(meta.image); - expr += ", "; + Expression AtomicImageAdd(Operation operation) { + return AtomicImage(operation, "imageAtomicAdd"); + } - const std::size_t coords_count{operation.GetOperandsCount()}; - expr += constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += "), "; + Expression AtomicImageMin(Operation operation) { + return AtomicImage(operation, "imageAtomicMin"); + } - const std::size_t values_count{meta.values.size()}; - UNIMPLEMENTED_IF(values_count != 4); - expr += "vec4("; - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsFloat(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += "));"; + Expression AtomicImageMax(Operation operation) { + return AtomicImage(operation, "imageAtomicMax"); + } + Expression AtomicImageAnd(Operation operation) { + return AtomicImage(operation, "imageAtomicAnd"); + } - code.AddLine(expr); - return {}; + Expression AtomicImageOr(Operation operation) { + return AtomicImage(operation, "imageAtomicOr"); + } + + Expression AtomicImageXor(Operation operation) { + return AtomicImage(operation, "imageAtomicXor"); + } + + Expression AtomicImageExchange(Operation operation) { + return AtomicImage(operation, "imageAtomicExchange"); } Expression Branch(Operation operation) { @@ -1846,8 +1957,7 @@ private: Expression BallotThread(Operation operation) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia warp intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub on non-Nvidia devices by simulating all threads voting the same as the active // one. return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; @@ -1858,8 +1968,7 @@ private: Expression Vote(Operation operation, const char* func) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub with a warp size of one. return {value, Type::Bool}; } @@ -1876,15 +1985,54 @@ private: Expression VoteEqual(Operation operation) { if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); - // We must return true here since a stub for a theoretical warp size of 1 will always - // return an equal result for all its votes. + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); + // We must return true here since a stub for a theoretical warp size of 1. + // This will always return an equal result across all votes. return {"true", Type::Bool}; } return Vote(operation, "allThreadsEqualNV"); } + template <const std::string_view& func> + Expression Shuffle(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsFloat(); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); + // On a "single-thread" device we are either on the same thread or out of bounds. Both + // cases return the passed value. + return {value, Type::Float}; + } + + const std::string index = VisitOperand(operation, 1).AsUint(); + const std::string width = VisitOperand(operation, 2).AsUint(); + return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + } + + template <const std::string_view& func> + Expression InRangeShuffle(Operation operation) { + const std::string index = VisitOperand(operation, 0).AsUint(); + const std::string width = VisitOperand(operation, 1).AsUint(); + if (!device.HasWarpIntrinsics()) { + // On a "single-thread" device we are only in bounds when the requested index is 0. + return {fmt::format("({} == 0U)", index), Type::Bool}; + } + + const std::string in_range = code.GenerateTemporary(); + code.AddLine("bool {};", in_range); + code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); + return {in_range, Type::Bool}; + } + + struct Func final { + Func() = delete; + ~Func() = delete; + + static constexpr std::string_view ShuffleIndexed = "shuffleNV"; + static constexpr std::string_view ShuffleUp = "shuffleUpNV"; + static constexpr std::string_view ShuffleDown = "shuffleDownNV"; + static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; + }; + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, @@ -2017,6 +2165,13 @@ private: &GLSLDecompiler::TexelFetch, &GLSLDecompiler::ImageStore, + &GLSLDecompiler::AtomicImageAdd, + &GLSLDecompiler::AtomicImageMin, + &GLSLDecompiler::AtomicImageMax, + &GLSLDecompiler::AtomicImageAnd, + &GLSLDecompiler::AtomicImageOr, + &GLSLDecompiler::AtomicImageXor, + &GLSLDecompiler::AtomicImageExchange, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, @@ -2040,6 +2195,16 @@ private: &GLSLDecompiler::VoteAll, &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, + + &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::Shuffle<Func::ShuffleUp>, + &GLSLDecompiler::Shuffle<Func::ShuffleDown>, + &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, + + &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2080,6 +2245,10 @@ private: return "lmem_" + suffix; } + std::string GetSharedMemory() const { + return fmt::format("smem_{}", suffix); + } + std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 969fe9ced..f141c4e3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -341,13 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 index{}; u32 type{}; u8 is_bindless{}; + u8 is_written{}; + u8 is_read{}; + u8 is_size_known{}; + u32 size{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) { + !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || + !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || + !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { return {}; } entry.entries.images.emplace_back( static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0); + static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, + is_read != 0, + is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) + : std::nullopt); } u32 global_memory_count{}; @@ -426,10 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: return false; } for (const auto& image : entries.images) { + const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || + !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index f4777d0b0..bf86b5a0b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -16,7 +16,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; OpenGLState OpenGLState::cur_state; -bool OpenGLState::s_rgb_used; namespace { @@ -34,6 +33,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) { return changed; } +template <typename T> +std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { + std::optional<std::size_t> first; + std::size_t last; + for (std::size_t i = 0; i < std::size(current_values); ++i) { + if (!UpdateValue(current_values[i], new_values[i])) { + continue; + } + if (!first) { + first = i; + } + last = i; + } + if (!first) { + return std::nullopt; + } + return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); +} + void Enable(GLenum cap, bool enable) { if (enable) { glEnable(cap); @@ -134,10 +152,6 @@ OpenGLState::OpenGLState() { logic_op.enabled = false; logic_op.operation = GL_COPY; - for (auto& texture_unit : texture_units) { - texture_unit.Reset(); - } - draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -267,8 +281,6 @@ void OpenGLState::ApplySRgb() const { return; cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; if (framebuffer_srgb.enabled) { - // Track if sRGB is used - s_rgb_used = true; glEnable(GL_FRAMEBUFFER_SRGB); } else { glDisable(GL_FRAMEBUFFER_SRGB); @@ -496,52 +508,20 @@ void OpenGLState::ApplyAlphaTest() const { } void OpenGLState::ApplyTextures() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> textures; - - for (std::size_t i = 0; i < std::size(texture_units); ++i) { - const auto& texture_unit = texture_units[i]; - auto& cur_state_texture_unit = cur_state.texture_units[i]; - textures[i] = texture_unit.texture; - if (cur_state_texture_unit.texture == textures[i]) { - continue; - } - cur_state_texture_unit.texture = textures[i]; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; - } - if (has_delta) { - glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - textures.data() + first); + if (const auto update = UpdateArray(cur_state.textures, textures)) { + glBindTextures(update->first, update->second, textures.data() + update->first); } } void OpenGLState::ApplySamplers() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> samplers; - - for (std::size_t i = 0; i < std::size(samplers); ++i) { - samplers[i] = texture_units[i].sampler; - if (cur_state.texture_units[i].sampler == texture_units[i].sampler) { - continue; - } - cur_state.texture_units[i].sampler = texture_units[i].sampler; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (const auto update = UpdateArray(cur_state.samplers, samplers)) { + glBindSamplers(update->first, update->second, samplers.data() + update->first); } - if (has_delta) { - glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data() + first); +} + +void OpenGLState::ApplyImages() const { + if (const auto update = UpdateArray(cur_state.images, images)) { + glBindImageTextures(update->first, update->second, images.data() + update->first); } } @@ -576,6 +556,7 @@ void OpenGLState::Apply() { ApplyLogicOp(); ApplyTextures(); ApplySamplers(); + ApplyImages(); if (dirty.polygon_offset) { ApplyPolygonOffset(); dirty.polygon_offset = false; @@ -606,18 +587,18 @@ void OpenGLState::EmulateViewportWithScissor() { } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.texture == handle) { - unit.Unbind(); + for (auto& texture : textures) { + if (texture == handle) { + texture = 0; } } return *this; } OpenGLState& OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.sampler == handle) { - unit.sampler = 0; + for (auto& sampler : samplers) { + if (sampler == handle) { + sampler = 0; } } return *this; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fdf9a8a12..c358d3b38 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -118,21 +118,9 @@ public: GLenum operation; } logic_op; - // 3 texture units - one for each that is used in PICA fragment shader emulation - struct TextureUnit { - GLuint texture; // GL_TEXTURE_BINDING_2D - GLuint sampler; // GL_SAMPLER_BINDING - - void Unbind() { - texture = 0; - } - - void Reset() { - Unbind(); - sampler = 0; - } - }; - std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING @@ -187,14 +175,6 @@ public: return cur_state; } - static bool GetsRGBUsed() { - return s_rgb_used; - } - - static void ClearsRGBUsed() { - s_rgb_used = false; - } - void SetDefaultViewports(); /// Apply this state as the current OpenGL state void Apply(); @@ -220,6 +200,7 @@ public: void ApplyLogicOp() const; void ApplyTextures() const; void ApplySamplers() const; + void ApplyImages() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; void ApplyAlphaTest() const; @@ -264,8 +245,6 @@ public: private: static OpenGLState cur_state; - // Workaround for sRGB problems caused by QT not supporting srgb output - static bool s_rgb_used; struct { bool blend_state; bool stencil_state; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 21324488a..8e13ab38b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -78,6 +78,17 @@ public: /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + + void MarkAsModified(u64 tick) { + surface.MarkAsModified(true, tick); + } + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); @@ -89,13 +100,6 @@ public: return surface.GetSurfaceParams(); } - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); - private: u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, @@ -111,8 +115,8 @@ private: GLenum target{}; OGLTextureView texture_view; - u32 swizzle; - bool is_proxy; + u32 swizzle{}; + bool is_proxy{}; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index af9684839..1e6ef66ab 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -264,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - OpenGLState::ClearsRGBUsed(); rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); } @@ -342,21 +341,17 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), }}; - state.texture_units[0].texture = screen_info.display_texture; - // Workaround brigthness problems in SMO by enabling sRGB in the final output - // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 - state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); + state.textures[0] = screen_info.display_texture; + state.framebuffer_srgb.enabled = screen_info.display_srgb; state.AllDirty(); state.Apply(); glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); // Restore default state state.framebuffer_srgb.enabled = false; - state.texture_units[0].texture = 0; + state.textures[0] = 0; state.AllDirty(); state.Apply(); - // Clear sRGB state for the next frame - OpenGLState::ClearsRGBUsed(); } /** @@ -406,8 +401,8 @@ void RendererOpenGL::CaptureScreenshot() { GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); - glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width, - layout.height); + glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8, + layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); DrawScreen(layout); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 9bd086368..cf26628ca 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -38,7 +38,8 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { - GLuint display_texture; + GLuint display_texture{}; + bool display_srgb{}; const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 3b966ddc3..897cbb4e8 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -2,9 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <map> +#include <bitset> #include <optional> #include <set> +#include <string_view> #include <vector> #include "common/assert.h" #include "video_core/renderer_vulkan/declarations.h" @@ -12,13 +13,32 @@ namespace Vulkan { +namespace { + +template <typename T> +void SetNext(void**& next, T& data) { + *next = &data; + next = &data.pNext; +} + +template <typename T> +T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) { + vk::PhysicalDeviceFeatures2 features; + T extension_features; + features.pNext = &extension_features; + physical.getFeatures2(&features, dldi); + return extension_features; +} + +} // Anonymous namespace + namespace Alternatives { -constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { - vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; -constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { - vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; -constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; +constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, + vk::Format::eD16UnormS8Uint, vk::Format{}}; +constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, vk::Format{}}; +constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}}; } // namespace Alternatives @@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - vk::PhysicalDeviceFeatures device_features; - device_features.vertexPipelineStoresAndAtomics = true; - device_features.independentBlend = true; - device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector<const char*> extensions = LoadExtensions(dldi); - const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), - 0, nullptr, static_cast<u32>(extensions.size()), - extensions.data(), &device_features); + const std::vector extensions = LoadExtensions(dldi); + + vk::PhysicalDeviceFeatures2 features2; + void** next = &features2.pNext; + auto& features = features2.features; + features.vertexPipelineStoresAndAtomics = true; + features.independentBlend = true; + features.depthClamp = true; + features.samplerAnisotropy = true; + features.largePoints = true; + features.textureCompressionASTC_LDR = is_optimal_astc_supported; + + vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor; + vertex_divisor.vertexAttributeInstanceRateDivisor = true; + vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true; + SetNext(next, vertex_divisor); + + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; + if (is_float16_supported) { + float16_int8.shaderFloat16 = true; + SetNext(next, float16_int8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); + } + + vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; + if (khr_uniform_buffer_standard_layout) { + std430_layout.uniformBufferStandardLayout = true; + SetNext(next, std430_layout); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); + } + + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; + if (ext_index_type_uint8) { + index_type_uint8.indexTypeUint8 = true; + SetNext(next, index_type_uint8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); + } + + vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, + nullptr, static_cast<u32>(extensions.size()), extensions.data(), + nullptr); + device_ci.pNext = &features2; + vk::Device dummy_logical; if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); @@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan logical = UniqueDevice( dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + if (khr_driver_properties) { + vk::PhysicalDeviceDriverPropertiesKHR driver; + vk::PhysicalDeviceProperties2 properties; + properties.pNext = &driver; + physical.getProperties2(&properties, dld); + driver_id = driver.driverID; + LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo); + } else { + LOG_INFO(Render_Vulkan, "Driver: Unknown"); + } + graphics_queue = logical->getQueue(graphics_family, 0, dld); present_queue = logical->getQueue(present_family, 0, dld); return true; @@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, // The wanted format is not supported by hardware, search for alternatives const vk::Format* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { - LOG_CRITICAL(Render_Vulkan, - "Format={} with usage={} and type={} has no defined alternatives and host " - "hardware does not support it", - vk::to_string(wanted_format), vk::to_string(wanted_usage), - static_cast<u32>(format_type)); - UNREACHABLE(); + UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + vk::to_string(wanted_format), vk::to_string(wanted_usage), + static_cast<u32>(format_type)); return wanted_format; } std::size_t i = 0; for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; alternative = alternatives[++i]) { - if (!IsFormatSupported(alternative, wanted_usage, format_type)) + if (!IsFormatSupported(alternative, wanted_usage, format_type)) { continue; + } LOG_WARNING(Render_Vulkan, "Emulating format={} with alternative format={} with usage={} and type={}", static_cast<u32>(wanted_format), static_cast<u32>(alternative), @@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, } // No alternatives found, panic - LOG_CRITICAL(Render_Vulkan, - "Format={} with usage={} and type={} is not supported by the host hardware and " - "doesn't support any of the alternatives", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), - static_cast<u32>(format_type)); - UNREACHABLE(); + UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); return wanted_format; } @@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | vk::FormatFeatureFlagBits::eTransferDst}; - constexpr std::array<vk::Format, 9> astc_formats = { + constexpr std::array astc_formats = { vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, @@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { - LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); - UNREACHABLE(); + UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); return true; } - const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); + const auto supported_usage = GetFormatFeatures(it->second, format_type); return (supported_usage & wanted_usage) == wanted_usage; } bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - bool has_swapchain{}; + LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName); + bool is_suitable = true; + + constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME}; + std::bitset<required_extensions.size()> available_extensions{}; + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + for (std::size_t i = 0; i < required_extensions.size(); ++i) { + if (available_extensions[i]) { + continue; + } + available_extensions[i] = + required_extensions[i] == std::string_view{prop.extensionName}; + } } - if (!has_swapchain) { - // The device doesn't support creating swapchains. - return false; + if (!available_extensions.all()) { + for (std::size_t i = 0; i < required_extensions.size(); ++i) { + if (available_extensions[i]) { + continue; + } + LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); + is_suitable = false; + } } bool has_graphics{}, has_present{}; const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; - if (family.queueCount == 0) + if (family.queueCount == 0) { continue; - + } has_graphics |= (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; } if (!has_graphics || !has_present) { - // The device doesn't have a graphics and present queue. - return false; + LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue"); + is_suitable = false; } // TODO(Rodrigo): Check if the device matches all requeriments. const auto properties{physical.getProperties(dldi)}; - const auto limits{properties.limits}; - if (limits.maxUniformBufferRange < 65536) { - return false; + const auto& limits{properties.limits}; + + constexpr u32 required_ubo_size = 65536; + if (limits.maxUniformBufferRange < required_ubo_size) { + LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)", + limits.maxUniformBufferRange, required_ubo_size); + is_suitable = false; } - const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; - if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { - return false; + const auto features{physical.getFeatures(dldi)}; + const std::array feature_report = { + std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.independentBlend, "independentBlend"), + std::make_pair(features.depthClamp, "depthClamp"), + std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), + std::make_pair(features.largePoints, "largePoints"), + }; + for (const auto& [supported, name] : feature_report) { + if (supported) { + continue; + } + LOG_INFO(Render_Vulkan, "Missing required feature: {}", name); + is_suitable = false; } - // Device is suitable. - return true; + return is_suitable; } std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { std::vector<const char*> extensions; - extensions.reserve(2); + extensions.reserve(7); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); const auto Test = [&](const vk::ExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, - u32 revision) { - if (extension.extensionName != std::string(name)) { + bool push) { + if (extension.extensionName != std::string_view(name)) { return; } - extensions.push_back(name); + if (push) { + extensions.push_back(name); + } if (status) { status->get() = true; } }; + bool khr_shader_float16_int8{}; for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + Test(extension, khr_uniform_buffer_standard_layout, + VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true); + Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + } + + if (khr_shader_float16_int8) { + is_float16_supported = + GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; + extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } return extensions; @@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK } void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + const auto props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); + storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment); max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); } @@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con return queue_cis; } -std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( +std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { - static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eB5G6R5UnormPack16, - vk::Format::eA2B10G10R10UnormPack32, - vk::Format::eR32G32B32A32Sfloat, - vk::Format::eR16G16Unorm, - vk::Format::eR16G16Snorm, - vk::Format::eR8G8B8A8Srgb, - vk::Format::eR8Unorm, - vk::Format::eB10G11R11UfloatPack32, - vk::Format::eR32Sfloat, - vk::Format::eR16Sfloat, - vk::Format::eR16G16B16A16Sfloat, - vk::Format::eD32Sfloat, - vk::Format::eD16Unorm, - vk::Format::eD16UnormS8Uint, - vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, - vk::Format::eBc1RgbaUnormBlock, - vk::Format::eBc2UnormBlock, - vk::Format::eBc3UnormBlock, - vk::Format::eBc4UnormBlock, - vk::Format::eBc5UnormBlock, - vk::Format::eBc5SnormBlock, - vk::Format::eBc7UnormBlock, - vk::Format::eAstc4x4UnormBlock, - vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, - vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock}; - std::map<vk::Format, vk::FormatProperties> format_properties; + constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eA8B8G8R8SnormPack32, + vk::Format::eA8B8G8R8SrgbPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16B16A16Uint, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR16G16Sfloat, + vk::Format::eR16Unorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8G8Unorm, + vk::Format::eR8G8Snorm, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eB8G8R8A8Unorm, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eBc1RgbaSrgbBlock, + vk::Format::eBc3SrgbBlock, + vk::Format::eBc7SrgbBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + std::unordered_map<vk::Format, vk::FormatProperties> format_properties; for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 537825d8b..010d4c3d6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -4,7 +4,7 @@ #pragma once -#include <map> +#include <unordered_map> #include <vector> #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" @@ -69,16 +69,26 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU. + /// Returns true if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } + /// Returns the driver ID. + vk::DriverIdKHR GetDriverID() const { + return driver_id; + } + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns storage alignment requeriment. + u64 GetStorageBufferAlignment() const { + return storage_buffer_alignment; + } + /// Returns the maximum range for storage buffers. u64 GetMaxStorageBufferRange() const { return max_storage_buffer_range; @@ -89,9 +99,19 @@ public: return is_optimal_astc_supported; } + /// Returns true if the device supports float16 natively + bool IsFloat16Supported() const { + return is_float16_supported; + } + /// Returns true if the device supports VK_EXT_scalar_block_layout. - bool IsExtScalarBlockLayoutSupported() const { - return ext_scalar_block_layout; + bool IsKhrUniformBufferStandardLayoutSupported() const { + return khr_uniform_buffer_standard_layout; + } + + /// Returns true if the device supports VK_EXT_index_type_uint8. + bool IsExtIndexTypeUint8Supported() const { + return ext_index_type_uint8; } /// Checks if the physical device is suitable. @@ -123,22 +143,28 @@ private: FormatType format_type) const; /// Returns the device properties for Vulkan formats. - static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( + static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::PhysicalDeviceType device_type; ///< Physical device type. - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. - u64 max_storage_buffer_range{}; ///< Max storage buffer size. - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. - std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + vk::DriverIdKHR driver_id{}; ///< Driver ID. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties. + std::unordered_map<vk::Format, vk::FormatProperties> + format_properties; ///< Format properties dictionary. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a35b45c9c..f7fbbb6e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -370,8 +370,8 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id type = - device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo + : t_cbuf_std140_ubo; const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); @@ -565,7 +565,7 @@ private: const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); Id pointer{}; - if (device.IsExtScalarBlockLayoutSupported()) { + if (device.IsKhrUniformBufferStandardLayoutSupported()) { const Id buffer_offset = Emit(OpShiftRightLogical( t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); pointer = Emit( @@ -944,6 +944,41 @@ private: return {}; } + Id AtomicImageAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMin(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMax(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageAnd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageOr(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageXor(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageExchange(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1092,6 +1127,46 @@ private: return {}; } + Id ShuffleIndexed(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleUp(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleDown(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleButterfly(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleIndexed(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleUp(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleDown(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleButterfly(Operation) { + UNIMPLEMENTED(); + return {}; + } + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, const std::string& name) { const Id id = OpVariable(type, storage); @@ -1366,6 +1441,13 @@ private: &SPIRVDecompiler::TexelFetch, &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::AtomicImageAdd, + &SPIRVDecompiler::AtomicImageMin, + &SPIRVDecompiler::AtomicImageMax, + &SPIRVDecompiler::AtomicImageAnd, + &SPIRVDecompiler::AtomicImageOr, + &SPIRVDecompiler::AtomicImageXor, + &SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -1389,6 +1471,16 @@ private: &SPIRVDecompiler::VoteAll, &SPIRVDecompiler::VoteAny, &SPIRVDecompiler::VoteEqual, + + &SPIRVDecompiler::ShuffleIndexed, + &SPIRVDecompiler::ShuffleUp, + &SPIRVDecompiler::ShuffleDown, + &SPIRVDecompiler::ShuffleButterfly, + + &SPIRVDecompiler::InRangeShuffleIndexed, + &SPIRVDecompiler::InRangeShuffleUp, + &SPIRVDecompiler::InRangeShuffleDown, + &SPIRVDecompiler::InRangeShuffleButterfly, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 77151a24b..d54fb88c9 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SUST: { UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store @@ -61,56 +60,105 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } const auto type{instr.sust.image_type}; - const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; + auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; + image.MarkWrite(); + MetaImage meta{image, values}; - const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; - bb.push_back(store); + bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + break; + } + case OpCode::Id::SUATOM: { + UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); + + Node value = GetRegister(instr.gpr0); + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const OperationCode operation_code = [instr] { + switch (instr.suatom_d.operation) { + case Tegra::Shader::ImageAtomicOperation::Add: + return OperationCode::AtomicImageAdd; + case Tegra::Shader::ImageAtomicOperation::Min: + return OperationCode::AtomicImageMin; + case Tegra::Shader::ImageAtomicOperation::Max: + return OperationCode::AtomicImageMax; + case Tegra::Shader::ImageAtomicOperation::And: + return OperationCode::AtomicImageAnd; + case Tegra::Shader::ImageAtomicOperation::Or: + return OperationCode::AtomicImageOr; + case Tegra::Shader::ImageAtomicOperation::Xor: + return OperationCode::AtomicImageXor; + case Tegra::Shader::ImageAtomicOperation::Exch: + return OperationCode::AtomicImageExchange; + default: + UNIMPLEMENTED_MSG("Unimplemented operation={}", + static_cast<u32>(instr.suatom_d.operation.Value())); + return OperationCode::AtomicImageAdd; + } + }(); + + const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); break; } default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); } return pc; } -const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { const auto offset{static_cast<std::size_t>(image.index.Value())}; - - // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == offset; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + if (const auto image = TryUseExistingImage(offset, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; } -const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, - Tegra::Shader::ImageType type) { +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { const Node image_register{GetRegister(reg)}; const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; - // If this image has already been used, return the existing mapping. - const auto itr{std::find_if(used_images.begin(), used_images.end(), - [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; - if (itr != used_images.end()) { - ASSERT(itr->GetType() == type); - return *itr; + if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - const Image entry{cbuf_index, cbuf_offset, next_index, type}; - return *used_images.emplace(entry).first; + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) + .first->second; +} + +Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + auto it = used_images.find(offset); + if (it == used_images.end()) { + return nullptr; + } + auto& image = it->second; + ASSERT(image.GetType() == type); + + if (size) { + // We know the size, if it's known it has to be the same as before, otherwise we can set it. + if (image.IsSizeKnown()) { + ASSERT(image.GetSize() == size); + } else { + image.SetSize(*size); + } + } + return ℑ } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ed108bea8..7923d4d69 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -35,7 +35,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { return 1; } } -} // namespace +} // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -106,16 +106,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::LD_L: { - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", - static_cast<u64>(instr.ld_l.unknown.Value())); - - const auto GetLmem = [&](s32 offset) { + case OpCode::Id::LD_L: + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); + [[fallthrough]]; + case OpCode::Id::LD_S: { + const auto GetMemory = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate_offset); - return GetLocalMemory(address); + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) + : GetLocalMemory(address); }; switch (instr.ldst_sl.type.Value()) { @@ -135,14 +136,16 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return 0; } }(); - for (u32 i = 0; i < count; ++i) - SetTemporary(bb, i, GetLmem(i * 4)); - for (u32 i = 0; i < count; ++i) + for (u32 i = 0; i < count; ++i) { + SetTemporary(bb, i, GetMemory(i * 4)); + } + for (u32 i = 0; i < count; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } break; } default: - UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", + UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); } break; @@ -209,27 +212,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } - case OpCode::Id::ST_L: { + case OpCode::Id::ST_L: LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", static_cast<u64>(instr.st_l.cache_management.Value())); - - const auto GetLmemAddr = [&](s32 offset) { + [[fallthrough]]; + case OpCode::Id::ST_S: { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; + const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L + ? &ShaderIR::SetLocalMemory + : &ShaderIR::SetSharedMemory; + switch (instr.ldst_sl.type.Value()) { case Tegra::Shader::StoreType::Bits128: - SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3)); - SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2)); + (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); + (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); + [[fallthrough]]; case Tegra::Shader::StoreType::Bits64: - SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1)); + (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); + [[fallthrough]]; case Tegra::Shader::StoreType::Bits32: - SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0)); + (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; default: - UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", + UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); } break; diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 2ac16eeb0..f6ee68a54 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { + Node op_a = GetRegister(instr.gpr8); + Node op_b = [&]() { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { @@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, - instr.shift.is_signed, PRECISE, op_a, op_b); + if (instr.shr.wrap) { + op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); + } else { + op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); + op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); + } + + Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, + std::move(op_a), std::move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); + const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index 04ca74f46..a8e481b3c 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -13,6 +13,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; +using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { @@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { SetPredicate(bb, instr.vote.dest_pred, vote); break; } + case OpCode::Id::SHFL: { + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node width = [&] { + // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has + // been done reversing Nvidia's math. It won't work on all cases due to SHFL having + // different parameters that don't properly map to GLSL's interface, but it should work + // for cases emitted by Nvidia's compiler. + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation( + OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), + Immediate(8)); + } else { + return Operation(OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, Immediate(0x201F), + Operation(OperationCode::INegate, std::move(mask))), + Immediate(8)); + } + }(); + + const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + switch (instr.shfl.operation) { + case ShuffleOperation::Idx: + return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; + case ShuffleOperation::Up: + return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + case ShuffleOperation::Down: + return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + case ShuffleOperation::Bfly: + return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + } + UNREACHABLE_MSG("Invalid SHFL operation: {}", + static_cast<u64>(instr.shfl.operation.Value())); + return {}; + }(); + + // Setting the predicate before the register is intentional to avoid overwriting. + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + SetRegister( + bb, instr.gpr0, + Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + break; + } default: UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); break; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5db9313c4..abf2cb1ab 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -148,7 +149,14 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 - ImageStore, /// (MetaImage, float[N] coords) -> void + ImageStore, /// (MetaImage, int[N] values) -> void + AtomicImageAdd, /// (MetaImage, int[N] coords) -> void + AtomicImageMin, /// (MetaImage, int[N] coords) -> void + AtomicImageMax, /// (MetaImage, int[N] coords) -> void + AtomicImageAnd, /// (MetaImage, int[N] coords) -> void + AtomicImageOr, /// (MetaImage, int[N] coords) -> void + AtomicImageXor, /// (MetaImage, int[N] coords) -> void + AtomicImageExchange, /// (MetaImage, int[N] coords) -> void Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -173,6 +181,16 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool + ShuffleIndexed, /// (uint value, uint index, uint width) -> uint + ShuffleUp, /// (uint value, uint index, uint width) -> uint + ShuffleDown, /// (uint value, uint index, uint width) -> uint + ShuffleButterfly, /// (uint value, uint index, uint width) -> uint + + InRangeShuffleIndexed, /// (uint index, uint width) -> bool + InRangeShuffleUp, /// (uint index, uint width) -> bool + InRangeShuffleDown, /// (uint index, uint width) -> bool + InRangeShuffleButterfly, /// (uint index, uint width) -> bool + Amount, }; @@ -198,12 +216,13 @@ class PredicateNode; class AbufNode; class CbufNode; class LmemNode; +class SmemNode; class GmemNode; class CommentNode; using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, - PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; + PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; @@ -273,46 +292,85 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; -class Image { +class Image final { public: - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) + : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} - explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) + constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} + is_bindless{true}, size{size} {} - explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless) - : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + bool is_bindless, bool is_written, bool is_read, + std::optional<Tegra::Shader::ImageAtomicSize> size) + : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, + is_written{is_written}, is_read{is_read}, size{size} {} - std::size_t GetOffset() const { + void MarkWrite() { + is_written = true; + } + + void MarkRead() { + is_read = true; + } + + void SetSize(Tegra::Shader::ImageAtomicSize size_) { + size = size_; + } + + constexpr std::size_t GetOffset() const { return offset; } - std::size_t GetIndex() const { + constexpr std::size_t GetIndex() const { return index; } - Tegra::Shader::ImageType GetType() const { + constexpr Tegra::Shader::ImageType GetType() const { return type; } - bool IsBindless() const { + constexpr bool IsBindless() const { return is_bindless; } - bool operator<(const Image& rhs) const { - return std::tie(offset, index, type, is_bindless) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + constexpr bool IsWritten() const { + return is_written; + } + + constexpr bool IsRead() const { + return is_read; + } + + constexpr std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + + constexpr bool IsSizeKnown() const { + return size.has_value(); + } + + constexpr Tegra::Shader::ImageAtomicSize GetSize() const { + return size.value(); + } + + constexpr bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, size, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); } private: - std::size_t offset{}; + u64 offset{}; std::size_t index{}; Tegra::Shader::ImageType type{}; bool is_bindless{}; + bool is_written{}; + bool is_read{}; + std::optional<Tegra::Shader::ImageAtomicSize> size{}; }; struct GlobalMemoryBase { @@ -536,6 +594,19 @@ private: Node address; }; +/// Shared memory node +class SmemNode final { +public: + explicit SmemNode(Node address) : address{std::move(address)} {} + + const Node& GetAddress() const { + return address; + } + +private: + Node address; +}; + /// Global memory node class GmemNode final { public: diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1e5c7f660..bbbab0bca 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -137,6 +137,10 @@ Node ShaderIR::GetLocalMemory(Node address) { return MakeNode<LmemNode>(std::move(address)); } +Node ShaderIR::GetSharedMemory(Node address) { + return MakeNode<SmemNode>(std::move(address)); +} + Node ShaderIR::GetTemporary(u32 id) { return GetRegister(Register::ZeroIndex + 1 + id); } @@ -378,6 +382,11 @@ void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); } +void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { + bb.push_back( + Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); +} + void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index bcc9b79b6..6aed9bb84 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -95,7 +95,7 @@ public: return used_samplers; } - const std::set<Image>& GetImages() const { + const std::map<u64, Image>& GetImages() const { return used_images; } @@ -208,6 +208,8 @@ private: Node GetInternalFlag(InternalFlag flag, bool negated = false); /// Generates a node representing a local memory address Node GetLocalMemory(Node address); + /// Generates a node representing a shared memory address + Node GetSharedMemory(Node address); /// Generates a temporary, internally it uses a post-RZ register Node GetTemporary(u32 id); @@ -217,8 +219,10 @@ private: void SetPredicate(NodeBlock& bb, u64 dest, Node src); /// Sets an internal flag. src value must be a bool-evaluated node void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address. address and value must be a number-evaluated node + /// Sets a local memory address with a value. void SetLocalMemory(NodeBlock& bb, Node address, Node value); + /// Sets a shared memory address with a value. + void SetSharedMemory(NodeBlock& bb, Node address, Node value); /// Sets a temporary. Internally it uses a post-RZ register void SetTemporary(NodeBlock& bb, u32 id, Node value); @@ -272,10 +276,16 @@ private: bool is_shadow); /// Accesses an image. - const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); /// Access a bindless image sampler. - const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); + + /// Tries to access an existing image, updating it's state as needed + Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -356,7 +366,7 @@ private: std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; - std::set<Image> used_images; + std::map<u64, Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 4ceb219be..53d0142cb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) { } } +bool IsPixelFormatSRGB(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8_SRGB: + case PixelFormat::BGRA8_SRGB: + case PixelFormat::DXT1_SRGB: + case PixelFormat::DXT23_SRGB: + case PixelFormat::DXT45_SRGB: + case PixelFormat::BC7U_SRGB: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8_SRGB: + return true; + default: + return false; + } +} + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 83f31c12c..19268b7cd 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format); bool IsPixelFormatASTC(PixelFormat format); +bool IsPixelFormatSRGB(PixelFormat format); + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); /// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index bcce8d863..5e497e49f 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -195,18 +195,18 @@ public: virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; - void MarkAsModified(const bool is_modified_, const u64 tick) { + void MarkAsModified(bool is_modified_, u64 tick) { is_modified = is_modified_ || is_target; modification_tick = tick; } - void MarkAsRenderTarget(const bool is_target, const u32 index) { - this->is_target = is_target; - this->index = index; + void MarkAsRenderTarget(bool is_target_, u32 index_) { + is_target = is_target_; + index = index_; } - void MarkAsPicked(const bool is_picked) { - this->is_picked = is_picked; + void MarkAsPicked(bool is_picked_) { + is_picked = is_picked_; } bool IsModified() const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index fd5472451..1e4d3fb79 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -24,55 +24,62 @@ using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; using VideoCore::Surface::SurfaceType; -SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { +namespace { + +SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { switch (type) { - case Tegra::Shader::TextureType::Texture1D: { - if (is_array) - return SurfaceTarget::Texture1DArray; - else - return SurfaceTarget::Texture1D; - } - case Tegra::Shader::TextureType::Texture2D: { - if (is_array) - return SurfaceTarget::Texture2DArray; - else - return SurfaceTarget::Texture2D; - } - case Tegra::Shader::TextureType::Texture3D: { + case Tegra::Shader::TextureType::Texture1D: + return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; + case Tegra::Shader::TextureType::Texture2D: + return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + case Tegra::Shader::TextureType::Texture3D: ASSERT(!is_array); return SurfaceTarget::Texture3D; - } - case Tegra::Shader::TextureType::TextureCube: { - if (is_array) - return SurfaceTarget::TextureCubeArray; - else - return SurfaceTarget::TextureCubemap; - } - default: { + case Tegra::Shader::TextureType::TextureCube: + return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; + default: UNREACHABLE(); return SurfaceTarget::Texture2D; } +} + +SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { + switch (type) { + case Tegra::Shader::ImageType::Texture1D: + return SurfaceTarget::Texture1D; + case Tegra::Shader::ImageType::TextureBuffer: + return SurfaceTarget::TextureBuffer; + case Tegra::Shader::ImageType::Texture1DArray: + return SurfaceTarget::Texture1DArray; + case Tegra::Shader::ImageType::Texture2D: + return SurfaceTarget::Texture2D; + case Tegra::Shader::ImageType::Texture2DArray: + return SurfaceTarget::Texture2DArray; + case Tegra::Shader::ImageType::Texture3D: + return SurfaceTarget::Texture3D; + default: + UNREACHABLE(); + return SurfaceTarget::Texture2D; } } -namespace { constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); } + } // Anonymous namespace -SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config, +SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; - params.is_tiled = config.tic.IsTiled(); - params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, - params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), - params.srgb_conversion); + params.is_tiled = tic.IsTiled(); + params.srgb_conversion = tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; + params.pixel_format = + PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { @@ -92,31 +99,72 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.type = GetFormatType(params.pixel_format); } - params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. - if (!config.tic.IsBuffer()) { - params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); - params.depth = config.tic.Depth(); - params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + if (tic.IsBuffer()) { + params.target = SurfaceTarget::TextureBuffer; + params.width = tic.Width(); + params.pitch = params.width * params.GetBytesPerPixel(); + params.height = 1; + params.depth = 1; + params.num_levels = 1; + params.emulated_levels = 1; + params.is_layered = false; + } else { + params.target = TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray()); + params.width = tic.Width(); + params.height = tic.Height(); + params.depth = tic.Depth(); + params.pitch = params.is_tiled ? 0 : tic.Pitch(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } - params.num_levels = config.tic.max_mip_level + 1; + params.num_levels = tic.max_mip_level + 1; params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); - } else { + } + return params; +} + +SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry) { + SurfaceParams params; + params.is_tiled = tic.IsTiled(); + params.srgb_conversion = tic.IsSrgbConversionEnabled(); + params.block_width = params.is_tiled ? tic.BlockWidth() : 0, + params.block_height = params.is_tiled ? tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, + params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; + params.pixel_format = + PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); + params.type = GetFormatType(params.pixel_format); + params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.target = ImageTypeToSurfaceTarget(entry.GetType()); + // TODO: on 1DBuffer we should use the tic info. + if (tic.IsBuffer()) { params.target = SurfaceTarget::TextureBuffer; - params.width = config.tic.Width(); + params.width = tic.Width(); params.pitch = params.width * params.GetBytesPerPixel(); params.height = 1; params.depth = 1; params.num_levels = 1; params.emulated_levels = 1; params.is_layered = false; + } else { + params.width = tic.Width(); + params.height = tic.Height(); + params.depth = tic.Depth(); + params.pitch = params.is_tiled ? 0 : tic.Pitch(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.num_levels = tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); + params.is_layered = params.IsLayered(); } return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e7ef66ee2..c58e7f8a4 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -4,8 +4,6 @@ #pragma once -#include <map> - #include "common/alignment.h" #include "common/bit_util.h" #include "common/cityhash.h" @@ -23,10 +21,13 @@ using VideoCore::Surface::SurfaceCompression; class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config, + static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry); + /// Creates SurfaceCachedParams from an image configuration. + static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry); + /// Creates SurfaceCachedParams for a depth buffer configuration. static SurfaceParams CreateForDepthBuffer( Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 467696a4c..57a1f5803 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -10,7 +10,7 @@ namespace VideoCommon { std::size_t ViewParams::Hash() const { - return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^ + return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ (static_cast<std::size_t>(base_level) << 24) ^ (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); } diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 04ca5639b..b17fd11a9 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,8 +13,8 @@ namespace VideoCommon { struct ViewParams { - ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, - u32 base_level, u32 num_levels) + constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, + u32 num_layers, u32 base_level, u32 num_levels) : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, num_levels{num_levels} {} @@ -22,12 +22,6 @@ struct ViewParams { bool operator==(const ViewParams& rhs) const; - VideoCore::Surface::SurfaceTarget target{}; - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; - bool IsLayered() const { switch (target) { case VideoCore::Surface::SurfaceTarget::Texture1DArray: @@ -39,13 +33,19 @@ struct ViewParams { return false; } } + + VideoCore::Surface::SurfaceTarget target{}; + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; }; class ViewBase { public: - ViewBase(const ViewParams& params) : params{params} {} + constexpr explicit ViewBase(const ViewParams& params) : params{params} {} - const ViewParams& GetViewParams() const { + constexpr const ViewParams& GetViewParams() const { return params; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ec0203d1..877c6635d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -89,14 +89,29 @@ public: } } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry) { std::lock_guard lock{mutex}; - const auto gpu_addr{config.tic.Address()}; + const auto gpu_addr{tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + const auto params{SurfaceParams::CreateForTexture(tic, entry)}; + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + sampled_textures.push_back(surface); + } + return view; + } + + TView GetImageSurface(const Tegra::Texture::TICEntry& tic, + const VideoCommon::Shader::Image& entry) { + std::lock_guard lock{mutex}; + const auto gpu_addr{tic.Address()}; + if (!gpu_addr) { + return {}; + } + const auto params{SurfaceParams::CreateForImage(tic, entry)}; const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); |