diff options
21 files changed, 565 insertions, 122 deletions
diff --git a/appveyor.yml b/appveyor.yml index 72cda26a7..4f928adb5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -53,7 +53,7 @@ build_script: # https://www.appveyor.com/docs/build-phase msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" } else { - C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1' + C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1' } after_build: diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7ab60c5bc..aff1d2180 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -175,6 +175,8 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_ctrl_gpu.h hle/service/nvdrv/devices/nvhost_gpu.cpp hle/service/nvdrv/devices/nvhost_gpu.h + hle/service/nvdrv/devices/nvhost_nvdec.cpp + hle/service/nvdrv/devices/nvhost_nvdec.h hle/service/nvdrv/devices/nvmap.cpp hle/service/nvdrv/devices/nvmap.h hle/service/nvdrv/interface.cpp diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h index c9257de77..eaa5395ac 100644 --- a/src/core/hle/ipc.h +++ b/src/core/hle/ipc.h @@ -29,7 +29,10 @@ enum class ControlCommand : u32 { }; enum class CommandType : u32 { + Invalid = 0, + LegacyRequest = 1, Close = 2, + LegacyControl = 3, Request = 4, Control = 5, RequestWithContext = 6, diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index 5be20c878..e1b5430bf 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -21,7 +21,9 @@ enum { // Confirmed Switch OS error codes MisalignedAddress = 102, + InvalidProcessorId = 113, InvalidHandle = 114, + InvalidCombination = 116, Timeout = 117, SynchronizationCanceled = 118, TooLarge = 119, diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 0811a16b8..ec3601e8b 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -732,7 +732,7 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) } static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) { - NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:08X}, core=0x{:X}", thread_handle, + NGLOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:16X}, core=0x{:X}", thread_handle, mask, core); const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle); @@ -740,6 +740,31 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) { return ERR_INVALID_HANDLE; } + if (core == THREADPROCESSORID_DEFAULT) { + ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT); + // Set the target CPU to the one specified in the process' exheader. + core = thread->owner_process->ideal_processor; + mask = 1 << core; + } + + if (mask == 0) { + return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination); + } + + /// This value is used to only change the affinity mask without changing the current ideal core. + static constexpr u32 OnlyChangeMask = static_cast<u32>(-3); + + if (core == OnlyChangeMask) { + core = thread->ideal_core; + } else if (core >= Core::NUM_CPU_CORES && core != -1) { + return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidProcessorId); + } + + // Error out if the input core isn't enabled in the input mask. + if (core < Core::NUM_CPU_CORES && (mask & (1 << core)) == 0) { + return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination); + } + thread->ChangeCore(core, mask); return RESULT_SUCCESS; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 46fcdefb8..cffa7ca83 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -133,8 +133,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { auto lock_owner = thread->lock_owner; // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance - // and don't have a lock owner. - ASSERT(lock_owner == nullptr); + // and don't have a lock owner unless SignalProcessWideKey was called first and the thread + // wasn't awakened due to the mutex already being acquired. + if (lock_owner) { + lock_owner->RemoveMutexWaiter(thread); + } } if (resume) @@ -460,13 +463,13 @@ void Thread::UpdatePriority() { void Thread::ChangeCore(u32 core, u64 mask) { ideal_core = core; - mask = mask; + affinity_mask = mask; if (status != THREADSTATUS_READY) { return; } - boost::optional<s32> new_processor_id{GetNextProcessorId(mask)}; + boost::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)}; if (!new_processor_id) { new_processor_id = processor_id; @@ -476,7 +479,7 @@ void Thread::ChangeCore(u32 core, u64 mask) { new_processor_id = ideal_core; } - ASSERT(new_processor_id < 4); + ASSERT(*new_processor_id < 4); // Add thread to new core's scheduler auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 6b1d6bf97..12954556d 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cinttypes> +#include <stack> #include "core/file_sys/filesystem.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/event.h" @@ -154,7 +155,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger RegisterHandlers(functions); launchable_event = - Kernel::Event::Create(Kernel::ResetType::OneShot, "ISelfController:LaunchableEvent"); + Kernel::Event::Create(Kernel::ResetType::Sticky, "ISelfController:LaunchableEvent"); } void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) { @@ -348,19 +349,100 @@ void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { NGLOG_WARNING(Service_AM, "(STUBBED) called"); } +class IStorageAccessor final : public ServiceFramework<IStorageAccessor> { +public: + explicit IStorageAccessor(std::vector<u8> buffer) + : ServiceFramework("IStorageAccessor"), buffer(std::move(buffer)) { + static const FunctionInfo functions[] = { + {0, &IStorageAccessor::GetSize, "GetSize"}, + {10, &IStorageAccessor::Write, "Write"}, + {11, &IStorageAccessor::Read, "Read"}, + }; + RegisterHandlers(functions); + } + +private: + std::vector<u8> buffer; + + void GetSize(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push(static_cast<u64>(buffer.size())); + + NGLOG_DEBUG(Service_AM, "called"); + } + + void Write(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const u64 offset{rp.Pop<u64>()}; + const std::vector<u8> data{ctx.ReadBuffer()}; + + ASSERT(offset + data.size() <= buffer.size()); + + std::memcpy(&buffer[offset], data.data(), data.size()); + + IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)}; + rb.Push(RESULT_SUCCESS); + + NGLOG_DEBUG(Service_AM, "called, offset={}", offset); + } + + void Read(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const u64 offset{rp.Pop<u64>()}; + const size_t size{ctx.GetWriteBufferSize()}; + + ASSERT(offset + size <= buffer.size()); + + ctx.WriteBuffer(buffer.data() + offset, size); + + IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)}; + rb.Push(RESULT_SUCCESS); + + NGLOG_DEBUG(Service_AM, "called, offset={}", offset); + } +}; + +class IStorage final : public ServiceFramework<IStorage> { +public: + explicit IStorage(std::vector<u8> buffer) + : ServiceFramework("IStorage"), buffer(std::move(buffer)) { + static const FunctionInfo functions[] = { + {0, &IStorage::Open, "Open"}, + {1, nullptr, "OpenTransferStorage"}, + }; + RegisterHandlers(functions); + } + +private: + std::vector<u8> buffer; + + void Open(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<AM::IStorageAccessor>(buffer); + + NGLOG_DEBUG(Service_AM, "called"); + } +}; + class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { public: explicit ILibraryAppletAccessor() : ServiceFramework("ILibraryAppletAccessor") { static const FunctionInfo functions[] = { {0, &ILibraryAppletAccessor::GetAppletStateChangedEvent, "GetAppletStateChangedEvent"}, {1, nullptr, "IsCompleted"}, - {10, nullptr, "Start"}, + {10, &ILibraryAppletAccessor::Start, "Start"}, {20, nullptr, "RequestExit"}, {25, nullptr, "Terminate"}, - {30, nullptr, "GetResult"}, + {30, &ILibraryAppletAccessor::GetResult, "GetResult"}, {50, nullptr, "SetOutOfFocusApplicationSuspendingEnabled"}, - {100, nullptr, "PushInData"}, - {101, nullptr, "PopOutData"}, + {100, &ILibraryAppletAccessor::PushInData, "PushInData"}, + {101, &ILibraryAppletAccessor::PopOutData, "PopOutData"}, {102, nullptr, "PushExtraStorage"}, {103, nullptr, "PushInteractiveInData"}, {104, nullptr, "PopInteractiveOutData"}, @@ -388,6 +470,41 @@ private: NGLOG_WARNING(Service_AM, "(STUBBED) called"); } + void GetResult(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + + NGLOG_WARNING(Service_AM, "(STUBBED) called"); + } + + void Start(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + + NGLOG_WARNING(Service_AM, "(STUBBED) called"); + } + + void PushInData(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + storage_stack.push(rp.PopIpcInterface<AM::IStorage>()); + + IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)}; + rb.Push(RESULT_SUCCESS); + + NGLOG_DEBUG(Service_AM, "called"); + } + + void PopOutData(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<AM::IStorage>(std::move(storage_stack.top())); + + storage_stack.pop(); + + NGLOG_DEBUG(Service_AM, "called"); + } + + std::stack<std::shared_ptr<AM::IStorage>> storage_stack; Kernel::SharedPtr<Kernel::Event> state_changed_event; }; @@ -396,7 +513,7 @@ ILibraryAppletCreator::ILibraryAppletCreator() : ServiceFramework("ILibraryApple {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"}, {1, nullptr, "TerminateAllLibraryApplets"}, {2, nullptr, "AreAnyLibraryAppletsLeft"}, - {10, nullptr, "CreateStorage"}, + {10, &ILibraryAppletCreator::CreateStorage, "CreateStorage"}, {11, nullptr, "CreateTransferMemoryStorage"}, {12, nullptr, "CreateHandleStorage"}, }; @@ -412,72 +529,17 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx) NGLOG_DEBUG(Service_AM, "called"); } -class IStorageAccessor final : public ServiceFramework<IStorageAccessor> { -public: - explicit IStorageAccessor(std::vector<u8> buffer) - : ServiceFramework("IStorageAccessor"), buffer(std::move(buffer)) { - static const FunctionInfo functions[] = { - {0, &IStorageAccessor::GetSize, "GetSize"}, - {10, nullptr, "Write"}, - {11, &IStorageAccessor::Read, "Read"}, - }; - RegisterHandlers(functions); - } - -private: - std::vector<u8> buffer; - - void GetSize(Kernel::HLERequestContext& ctx) { - IPC::ResponseBuilder rb{ctx, 4}; - - rb.Push(RESULT_SUCCESS); - rb.Push(static_cast<u64>(buffer.size())); - - NGLOG_DEBUG(Service_AM, "called"); - } - - void Read(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; - - u64 offset = rp.Pop<u64>(); - - const size_t size{ctx.GetWriteBufferSize()}; - - ASSERT(offset + size <= buffer.size()); - - ctx.WriteBuffer(buffer.data() + offset, size); - - IPC::ResponseBuilder rb{ctx, 2}; - - rb.Push(RESULT_SUCCESS); - - NGLOG_DEBUG(Service_AM, "called"); - } -}; - -class IStorage final : public ServiceFramework<IStorage> { -public: - explicit IStorage(std::vector<u8> buffer) - : ServiceFramework("IStorage"), buffer(std::move(buffer)) { - static const FunctionInfo functions[] = { - {0, &IStorage::Open, "Open"}, - {1, nullptr, "OpenTransferStorage"}, - }; - RegisterHandlers(functions); - } - -private: - std::vector<u8> buffer; - - void Open(Kernel::HLERequestContext& ctx) { - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; +void ILibraryAppletCreator::CreateStorage(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 size{rp.Pop<u64>()}; + std::vector<u8> buffer(size); - rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<AM::IStorageAccessor>(buffer); + IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 1)}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<AM::IStorage>(std::move(buffer)); - NGLOG_DEBUG(Service_AM, "called"); - } -}; + NGLOG_DEBUG(Service_AM, "called, size={}", size); +} IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") { static const FunctionInfo functions[] = { diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index ff8eb14d7..301a6c798 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -121,6 +121,7 @@ public: private: void CreateLibraryApplet(Kernel::HLERequestContext& ctx); + void CreateStorage(Kernel::HLERequestContext& ctx); }; class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 1891255cb..00c5308ba 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -329,6 +329,7 @@ public: {130, nullptr, "SwapNpadAssignment"}, {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, + {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, {200, &Hid::GetVibrationDeviceInfo, "GetVibrationDeviceInfo"}, {201, &Hid::SendVibrationValue, "SendVibrationValue"}, {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, @@ -336,12 +337,41 @@ public: {204, nullptr, "PermitVibration"}, {205, nullptr, "IsVibrationPermitted"}, {206, &Hid::SendVibrationValues, "SendVibrationValues"}, + {207, nullptr, "SendVibrationGcErmCommand"}, + {208, nullptr, "GetActualVibrationGcErmCommand"}, + {209, nullptr, "BeginPermitVibrationSession"}, + {210, nullptr, "EndPermitVibrationSession"}, {300, nullptr, "ActivateConsoleSixAxisSensor"}, {301, nullptr, "StartConsoleSixAxisSensor"}, {302, nullptr, "StopConsoleSixAxisSensor"}, + {303, nullptr, "ActivateSevenSixAxisSensor"}, + {304, nullptr, "StartSevenSixAxisSensor"}, + {305, nullptr, "StopSevenSixAxisSensor"}, + {306, nullptr, "InitializeSevenSixAxisSensor"}, + {307, nullptr, "FinalizeSevenSixAxisSensor"}, + {308, nullptr, "SetSevenSixAxisSensorFusionStrength"}, + {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, {400, nullptr, "IsUsbFullKeyControllerEnabled"}, {401, nullptr, "EnableUsbFullKeyController"}, {402, nullptr, "IsUsbFullKeyControllerConnected"}, + {403, nullptr, "HasBattery"}, + {404, nullptr, "HasLeftRightBattery"}, + {405, nullptr, "GetNpadInterfaceType"}, + {406, nullptr, "GetNpadLeftRightInterfaceType"}, + {500, nullptr, "GetPalmaConnectionHandle"}, + {501, nullptr, "InitializePalma"}, + {502, nullptr, "AcquirePalmaOperationCompleteEvent"}, + {503, nullptr, "GetPalmaOperationInfo"}, + {504, nullptr, "PlayPalmaActivity"}, + {505, nullptr, "SetPalmaFrModeType"}, + {506, nullptr, "ReadPalmaStep"}, + {507, nullptr, "EnablePalmaStep"}, + {508, nullptr, "SuspendPalmaStep"}, + {509, nullptr, "ResetPalmaStep"}, + {510, nullptr, "ReadPalmaApplicationSection"}, + {511, nullptr, "WritePalmaApplicationSection"}, + {512, nullptr, "ReadPalmaUniqueCode"}, + {513, nullptr, "SetPalmaUniqueCodeInvalid"}, {1000, nullptr, "SetNpadCommunicationMode"}, {1001, nullptr, "GetNpadCommunicationMode"}, }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 2ecf818f3..56b5ed60d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -26,11 +26,19 @@ public: private: enum class IoctlCommand : u32_le { IocSetNVMAPfdCommand = 0x40044801, + IocAllocGPFIFOCommand = 0x40084805, IocSetClientDataCommand = 0x40084714, IocGetClientDataCommand = 0x80084715, IocZCullBind = 0xc010480b, IocSetErrorNotifierCommand = 0xC018480C, IocChannelSetPriorityCommand = 0x4004480D, + IocEnableCommand = 0x0000480E, + IocDisableCommand = 0x0000480F, + IocPreemptCommand = 0x00004810, + IocForceResetCommand = 0x00004811, + IocEventIdControlCommand = 0x40084812, + IocGetErrorNotificationCommand = 0xC0104817, + IocAllocGPFIFOExCommand = 0x40204818, IocAllocGPFIFOEx2Command = 0xC020481A, IocAllocObjCtxCommand = 0xC0104809, IocChannelGetWaitbaseCommand = 0xC0080003, @@ -56,6 +64,12 @@ private: }; static_assert(sizeof(IoctlChannelSetTimeout) == 4, "IoctlChannelSetTimeout is incorrect size"); + struct IoctlAllocGPFIFO { + u32_le num_entries; + u32_le flags; + }; + static_assert(sizeof(IoctlAllocGPFIFO) == 8, "IoctlAllocGPFIFO is incorrect size"); + struct IoctlClientData { u64_le data; }; @@ -76,12 +90,45 @@ private: }; static_assert(sizeof(IoctlSetErrorNotifier) == 24, "IoctlSetErrorNotifier is incorrect size"); + struct IoctlChannelSetPriority { + u32_le priority; + }; + static_assert(sizeof(IoctlChannelSetPriority) == 4, + "IoctlChannelSetPriority is incorrect size"); + + struct IoctlEventIdControl { + u32_le cmd; // 0=disable, 1=enable, 2=clear + u32_le id; + }; + static_assert(sizeof(IoctlEventIdControl) == 8, "IoctlEventIdControl is incorrect size"); + + struct IoctlGetErrorNotification { + u64_le timestamp; + u32_le info32; + u16_le info16; + u16_le status; // always 0xFFFF + }; + static_assert(sizeof(IoctlGetErrorNotification) == 16, + "IoctlGetErrorNotification is incorrect size"); + struct IoctlFence { u32_le id; u32_le value; }; static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size"); + struct IoctlAllocGpfifoEx { + u32_le num_entries; + u32_le flags; + u32_le unk0; + u32_le unk1; + u32_le unk2; + u32_le unk3; + u32_le unk4; + u32_le unk5; + }; + static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); + struct IoctlAllocGpfifoEx2 { u32_le num_entries; // in u32_le flags; // in diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp new file mode 100644 index 000000000..0b6c22898 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -0,0 +1,32 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" + +namespace Service::Nvidia::Devices { + +u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { + NGLOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", + command.raw, input.size(), output.size()); + + switch (static_cast<IoctlCommand>(command.raw)) { + case IoctlCommand::IocSetNVMAPfdCommand: + return SetNVMAPfd(input, output); + } + + UNIMPLEMENTED_MSG("Unimplemented ioctl"); + return 0; +} + +u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), input.size()); + NGLOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + nvmap_fd = params.nvmap_fd; + return 0; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h new file mode 100644 index 000000000..0192aecdd --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -0,0 +1,38 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstdlib> +#include <cstring> +#include <vector> +#include "common/common_types.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { + +class nvhost_nvdec final : public nvdevice { +public: + nvhost_nvdec() = default; + ~nvhost_nvdec() override = default; + + u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; + +private: + enum class IoctlCommand : u32_le { + IocSetNVMAPfdCommand = 0x40044801, + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + u32_le nvmap_fd{}; + + u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); +}; + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index a6a4ab7d3..cc5cfe34e 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -9,6 +9,7 @@ #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" @@ -36,6 +37,7 @@ Module::Module() { devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); + devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); } u32 Module::Open(std::string device_name) { diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 2eb37fb42..654012189 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <chrono> +#include <ctime> #include "common/logging/log.h" #include "core/core_timing.h" #include "core/hle/ipc_helpers.h" @@ -77,7 +78,7 @@ public: {3, nullptr, "LoadLocationNameList"}, {4, &ITimeZoneService::LoadTimeZoneRule, "LoadTimeZoneRule"}, {5, nullptr, "GetTimeZoneRuleVersion"}, - {100, nullptr, "ToCalendarTime"}, + {100, &ITimeZoneService::ToCalendarTime, "ToCalendarTime"}, {101, &ITimeZoneService::ToCalendarTimeWithMyRule, "ToCalendarTimeWithMyRule"}, {200, nullptr, "ToPosixTime"}, {201, nullptr, "ToPosixTimeWithMyRule"}, @@ -86,9 +87,11 @@ public: } private: + LocationName location_name{"UTC"}; + TimeZoneRule my_time_zone_rule{}; + void GetDeviceLocationName(Kernel::HLERequestContext& ctx) { - NGLOG_WARNING(Service_Time, "(STUBBED) called"); - LocationName location_name{}; + NGLOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, (sizeof(LocationName) / 4) + 2}; rb.Push(RESULT_SUCCESS); rb.PushRaw(location_name); @@ -103,23 +106,70 @@ private: void LoadTimeZoneRule(Kernel::HLERequestContext& ctx) { NGLOG_WARNING(Service_Time, "(STUBBED) called"); + + ctx.WriteBuffer(&my_time_zone_rule, sizeof(TimeZoneRule)); + IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); } + void ToCalendarTime(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 posix_time = rp.Pop<u64>(); + + NGLOG_WARNING(Service_Time, "(STUBBED) called, posix_time=0x{:016X}", posix_time); + + TimeZoneRule time_zone_rule{}; + auto buffer = ctx.ReadBuffer(); + std::memcpy(&time_zone_rule, buffer.data(), buffer.size()); + + CalendarTime calendar_time{2018, 1, 1, 0, 0, 0}; + CalendarAdditionalInfo additional_info{}; + + PosixToCalendar(posix_time, calendar_time, additional_info, time_zone_rule); + + IPC::ResponseBuilder rb{ctx, 10}; + rb.Push(RESULT_SUCCESS); + rb.PushRaw(calendar_time); + rb.PushRaw(additional_info); + } + void ToCalendarTimeWithMyRule(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - u64 posix_time = rp.Pop<u64>(); + const u64 posix_time = rp.Pop<u64>(); NGLOG_WARNING(Service_Time, "(STUBBED) called, posix_time=0x{:016X}", posix_time); CalendarTime calendar_time{2018, 1, 1, 0, 0, 0}; CalendarAdditionalInfo additional_info{}; + + PosixToCalendar(posix_time, calendar_time, additional_info, my_time_zone_rule); + IPC::ResponseBuilder rb{ctx, 10}; rb.Push(RESULT_SUCCESS); rb.PushRaw(calendar_time); rb.PushRaw(additional_info); } + + void PosixToCalendar(u64 posix_time, CalendarTime& calendar_time, + CalendarAdditionalInfo& additional_info, const TimeZoneRule& /*rule*/) { + std::time_t t(posix_time); + std::tm* tm = std::localtime(&t); + if (!tm) { + return; + } + calendar_time.year = tm->tm_year + 1900; + calendar_time.month = tm->tm_mon + 1; + calendar_time.day = tm->tm_mday; + calendar_time.hour = tm->tm_hour; + calendar_time.minute = tm->tm_min; + calendar_time.second = tm->tm_sec; + + additional_info.day_of_week = tm->tm_wday; + additional_info.day_of_year = tm->tm_yday; + std::memcpy(additional_info.name.data(), "UTC", sizeof("UTC")); + additional_info.utc_offset = 0; + } }; void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/time/time.h b/src/core/hle/service/time/time.h index 12fe1995a..49af38589 100644 --- a/src/core/hle/service/time/time.h +++ b/src/core/hle/service/time/time.h @@ -4,13 +4,13 @@ #pragma once +#include <array> #include "core/hle/service/service.h" namespace Service::Time { -// TODO(Rozelette) RE this structure struct LocationName { - INSERT_PADDING_BYTES(0x24); + std::array<u8, 0x24> name; }; static_assert(sizeof(LocationName) == 0x24, "LocationName is incorrect size"); @@ -25,26 +25,34 @@ struct CalendarTime { }; static_assert(sizeof(CalendarTime) == 0x8, "CalendarTime structure has incorrect size"); -// TODO(Rozelette) RE this structure struct CalendarAdditionalInfo { - INSERT_PADDING_BYTES(0x18); + u32_le day_of_week; + u32_le day_of_year; + std::array<u8, 8> name; + INSERT_PADDING_BYTES(1); + s32_le utc_offset; }; static_assert(sizeof(CalendarAdditionalInfo) == 0x18, "CalendarAdditionalInfo structure has incorrect size"); -// TODO(bunnei) RE this structure -struct SystemClockContext { - INSERT_PADDING_BYTES(0x20); +// TODO(mailwl) RE this structure +struct TimeZoneRule { + INSERT_PADDING_BYTES(0x4000); }; -static_assert(sizeof(SystemClockContext) == 0x20, - "SystemClockContext structure has incorrect size"); struct SteadyClockTimePoint { - u64 value; + u64_le value; INSERT_PADDING_WORDS(4); }; static_assert(sizeof(SteadyClockTimePoint) == 0x18, "SteadyClockTimePoint is incorrect size"); +struct SystemClockContext { + u64_le offset; + SteadyClockTimePoint time_point; +}; +static_assert(sizeof(SystemClockContext) == 0x20, + "SystemClockContext structure has incorrect size"); + class Module final { public: class Interface : public ServiceFramework<Interface> { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 23e70cd8a..ef12d9300 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -156,16 +156,15 @@ void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - ASSERT_MSG(regs.query.query_get.short_query, - "Writing the entire query result structure is unimplemented"); u32 value = Memory::Read32(*address); - u32 result = 0; + u64 result = 0; // TODO(Subv): Support the other query variables switch (regs.query.query_get.select) { case Regs::QuerySelect::Zero: - result = 0; + // This seems to actually write the query sequence to the query address. + result = regs.query.query_sequence; break; default: UNIMPLEMENTED_MSG("Unimplemented query select type {}", @@ -174,15 +173,31 @@ void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Research and implement how query sync conditions work. + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); + switch (regs.query.query_get.mode) { case Regs::QueryMode::Write: case Regs::QueryMode::Write2: { - // Write the current query sequence to the sequence address. u32 sequence = regs.query.query_sequence; - Memory::Write32(*address, sequence); - - // TODO(Subv): Write the proper query response structure to the address when not using short - // mode. + if (regs.query.query_get.short_query) { + // Write the current query sequence to the sequence address. + // TODO(Subv): Find out what happens if you use a long query type but mark it as a short + // query. + Memory::Write32(*address, sequence); + } else { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{}; + query_result.value = result; + // TODO(Subv): Generate a real GPU timestamp and write it here instead of 0 + query_result.timestamp = 0; + Memory::WriteBlock(*address, &query_result, sizeof(query_result)); + } break; } default: diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 198a470c0..da64430e9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -75,6 +75,10 @@ union Attribute { enum class Index : u64 { Position = 7, Attribute_0 = 8, + // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex + // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval + // shader. + TessCoordInstanceIDVertexID = 47, }; union { @@ -257,6 +261,33 @@ union Instruction { BitField<50, 1, u64> saturate_a; } conversion; + union { + BitField<31, 4, u64> component_mask; + + bool IsComponentEnabled(size_t component) const { + return ((1 << component) & component_mask) != 0; + } + } tex; + + union { + BitField<50, 3, u64> component_mask_selector; + BitField<28, 8, Register> gpr28; + + bool HasTwoDestinations() const { + return gpr28.Value() != Register::ZeroIndex; + } + + bool IsComponentEnabled(size_t component) const { + static constexpr std::array<size_t, 5> one_dest_mask{0x1, 0x2, 0x4, 0x8, 0x3}; + static constexpr std::array<size_t, 5> two_dest_mask{0x7, 0xb, 0xd, 0xe, 0xf}; + const auto& mask{HasTwoDestinations() ? two_dest_mask : one_dest_mask}; + + ASSERT(component_mask_selector < mask.size()); + + return ((1 << component) & mask[component_mask_selector]) != 0; + } + } texs; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; @@ -277,6 +308,7 @@ public: KIL, LD_A, ST_A, + TEX, TEXQ, // Texture Query TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations @@ -293,8 +325,10 @@ public: FMUL_R, FMUL_IMM, FMUL32_IMM, - MUFU, // Multi-Function Operator - RRO, // Range Reduction Operator + MUFU, // Multi-Function Operator + RRO_C, // Range Reduction Operator + RRO_R, + RRO_IMM, F2F_C, F2F_R, F2F_IMM, @@ -438,6 +472,7 @@ private: INST("111000110011----", Id::KIL, Type::Flow, "KIL"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), @@ -455,7 +490,9 @@ private: INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"), + INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), + INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), + INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ac3e4bf27..d6048f639 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -49,17 +49,19 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported - ASSERT(component_type == ComponentType::UNorm); + // For now only UNORM components are supported, or RGBA16F which is type FLOAT + ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F); return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats @@ -110,8 +112,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, @@ -123,7 +126,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<false, PixelFormat::A2B10G10R10>, MortonCopy<false, PixelFormat::A1B5G5R5>, MortonCopy<false, PixelFormat::R8>, - // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + MortonCopy<false, PixelFormat::RGBA16F>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported + nullptr, nullptr, nullptr, nullptr, @@ -928,7 +933,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc // Use GetSurfaceSubRect instead ASSERT(params.width == params.stride); - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + ASSERT(!params.is_tiled || + (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0)); // Check for an exact match in existing surfaces Surface surface = diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index fc09f108c..6f08678ab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -59,9 +59,11 @@ struct SurfaceParams { A2B10G10R10 = 2, A1B5G5R5 = 3, R8 = 4, - DXT1 = 5, - DXT23 = 6, - DXT45 = 7, + RGBA16F = 5, + DXT1 = 6, + DXT23 = 7, + DXT45 = 8, + DXN1 = 9, // This is also known as BC4 Max, Invalid = 255, @@ -102,9 +104,11 @@ struct SurfaceParams { 1, // A2B10G10R10 1, // A1B5G5R5 1, // R8 + 2, // RGBA16F 4, // DXT1 4, // DXT23 4, // DXT45 + 4, // DXN1 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -124,9 +128,11 @@ struct SurfaceParams { 32, // A2B10G10R10 16, // A1B5G5R5 8, // R8 + 64, // RGBA16F 64, // DXT1 128, // DXT23 128, // DXT45 + 64, // DXN1 }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -143,6 +149,8 @@ struct SurfaceParams { return PixelFormat::ABGR8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return PixelFormat::A2B10G10R10; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + return PixelFormat::RGBA16F; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -172,12 +180,16 @@ struct SurfaceParams { return PixelFormat::A1B5G5R5; case Tegra::Texture::TextureFormat::R8: return PixelFormat::R8; + case Tegra::Texture::TextureFormat::R16_G16_B16_A16: + return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; case Tegra::Texture::TextureFormat::DXT23: return PixelFormat::DXT23; case Tegra::Texture::TextureFormat::DXT45: return PixelFormat::DXT45; + case Tegra::Texture::TextureFormat::DXN1: + return PixelFormat::DXN1; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -197,12 +209,16 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::A1B5G5R5; case PixelFormat::R8: return Tegra::Texture::TextureFormat::R8; + case PixelFormat::RGBA16F: + return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::DXT1: return Tegra::Texture::TextureFormat::DXT1; case PixelFormat::DXT23: return Tegra::Texture::TextureFormat::DXT23; case PixelFormat::DXT45: return Tegra::Texture::TextureFormat::DXT45; + case PixelFormat::DXN1: + return Tegra::Texture::TextureFormat::DXN1; default: UNREACHABLE(); } @@ -226,6 +242,8 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGBA8_SRGB: case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA16_FLOAT: + return ComponentType::Float; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 70ddea643..bb5209a7e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -299,7 +299,7 @@ public: * are stored as floats, so this may require conversion. * @param reg The destination register to use. * @param elem The element to use for the operation. - * @param attribute The input attibute to use as the source value. + * @param attribute The input attribute to use as the source value. */ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) { std::string dest = GetRegisterAsFloat(reg); @@ -451,6 +451,12 @@ private: switch (attribute) { case Attribute::Index::Position: return "position"; + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); + return "vec4(0, 0, gl_InstanceID, gl_VertexID)"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -786,8 +792,13 @@ private: 1, 1); break; } - case OpCode::Id::RRO: { - NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction"); + case OpCode::Id::RRO_C: + case OpCode::Id::RRO_R: + case OpCode::Id::RRO_IMM: { + // Currently RRO is only implemented as a register move. + // Usage of `abs_b` and `negate_b` here should also be correct. + regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); + NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: { @@ -885,10 +896,10 @@ private: instr.gpr0); break; } - case OpCode::Id::TEXS: { + case OpCode::Id::TEX: { ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string sampler = GetSampler(instr.sampler); const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; // Add an extra scope and declare the texture coords inside to prevent overwriting @@ -897,8 +908,52 @@ private: ++shader.scope; shader.AddLine(coord); const std::string texture = "texture(" + sampler + ", coords)"; - for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { - regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem); + + size_t dest_elem{}; + for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; + } + --shader.scope; + shader.AddLine("}"); + break; + } + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + // Add an extra scope and declare the texture coords inside to prevent + // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "texture(" + sampler + ", coords)"; + + // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes + // into gpr28+0 and gpr28+1 + size_t offset{}; + + for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) { + for (unsigned elem = 0; elem < 2; ++elem) { + if (!instr.texs.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem); + } + + if (!instr.texs.HasTwoDestinations()) { + // Skip the second destination + break; + } + + offset += 2; } --shader.scope; shader.AddLine("}"); @@ -955,8 +1010,8 @@ private: '(' + predicate + ") " + combiner + " (" + second_pred + ')'); if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled SetPredicate(instr.fsetp.pred0, "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 9d7b73b73..2d2af5554 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_ u32 BytesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::DXT1: + case TextureFormat::DXN1: // In this case a 'pixel' actually refers to a 4x4 tile. return 8; case TextureFormat::DXT23: @@ -60,6 +61,8 @@ u32 BytesPerPixel(TextureFormat format) { return 2; case TextureFormat::R8: return 1; + case TextureFormat::R16_G16_B16_A16: + return 8; default: UNIMPLEMENTED_MSG("Format not implemented"); break; @@ -77,7 +80,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::DXT1: case TextureFormat::DXT23: case TextureFormat::DXT45: - // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values. + case TextureFormat::DXN1: + // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel + // values. CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); break; @@ -86,6 +91,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::R16_G16_B16_A16: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); break; @@ -106,6 +112,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::DXT1: case TextureFormat::DXT23: case TextureFormat::DXT45: + case TextureFormat::DXN1: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A1B5G5R5: |