diff options
m--------- | externals/sirit | 0 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.cpp | 66 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.h | 19 | ||||
-rw-r--r-- | src/core/hle/kernel/kernel.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/kernel/svc.cpp | 20 | ||||
-rw-r--r-- | src/core/hle/kernel/svc_wrap.h | 21 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 13 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 18 | ||||
-rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 14 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 6 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 2 |
16 files changed, 167 insertions, 44 deletions
diff --git a/externals/sirit b/externals/sirit -Subproject e1a6729df7f11e33f6dc0939b18995a57c8bf3d +Subproject 12f40a80324d7c154f19f25c448a5ce27d38cd1 diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 8422d05e0..db189c8e3 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -17,10 +17,10 @@ #include "core/memory.h" namespace Kernel { -namespace { + // Wake up num_to_wake (or all) threads in a vector. -void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) { - auto& system = Core::System::GetInstance(); +void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, + s32 num_to_wake) { // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. std::size_t last = waiting_threads.size(); @@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3 for (std::size_t i = 0; i < last; i++) { ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb); waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); + RemoveThread(waiting_threads[i]); waiting_threads[i]->SetArbiterWaitAddress(0); waiting_threads[i]->ResumeFromWait(); system.PrepareReschedule(waiting_threads[i]->GetProcessorID()); } } -} // Anonymous namespace AddressArbiter::AddressArbiter(Core::System& system) : system{system} {} AddressArbiter::~AddressArbiter() = default; @@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); current_thread->SetArbiterWaitAddress(address); + InsertThread(SharedFrom(current_thread)); current_thread->SetStatus(ThreadStatus::WaitArb); current_thread->InvalidateWakeupCallback(); current_thread->WakeAfterDelay(timeout); @@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { return RESULT_TIMEOUT; } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( - VAddr address) const { - - // Retrieve all threads that are waiting for this address. - std::vector<std::shared_ptr<Thread>> threads; - const auto& scheduler = system.GlobalScheduler(); - const auto& thread_list = scheduler.GetThreadList(); +void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { + ASSERT(thread->GetStatus() == ThreadStatus::WaitArb); + RemoveThread(thread); + thread->SetArbiterWaitAddress(0); +} - for (const auto& thread : thread_list) { - if (thread->GetArbiterWaitAddress() == address) { - threads.push_back(thread); +void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { + const VAddr arb_addr = thread->GetArbiterWaitAddress(); + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + const std::shared_ptr<Thread>& current_thread = *it; + if (current_thread->GetPriority() >= thread->GetPriority()) { + thread_list.insert(it, thread); + return; } + ++it; } + thread_list.push_back(std::move(thread)); +} - // Sort them by priority, such that the highest priority ones come first. - std::sort(threads.begin(), threads.end(), - [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) { - return lhs->GetPriority() < rhs->GetPriority(); - }); +void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { + const VAddr arb_addr = thread->GetArbiterWaitAddress(); + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + const std::shared_ptr<Thread>& current_thread = *it; + if (current_thread.get() == thread.get()) { + thread_list.erase(it); + return; + } + ++it; + } + UNREACHABLE(); +} - return threads; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { + std::vector<std::shared_ptr<Thread>> result; + std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; + auto it = thread_list.begin(); + while (it != thread_list.end()) { + std::shared_ptr<Thread> current_thread = *it; + result.push_back(std::move(current_thread)); + ++it; + } + return result; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 1e1f00e60..386983e54 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -4,7 +4,9 @@ #pragma once +#include <list> #include <memory> +#include <unordered_map> #include <vector> #include "common/common_types.h" @@ -48,6 +50,9 @@ public: /// Waits on an address with a particular arbitration type. ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns); + /// Removes a thread from the container and resets its address arbiter adress to 0 + void HandleWakeupThread(std::shared_ptr<Thread> thread); + private: /// Signals an address being waited on. ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake); @@ -71,8 +76,20 @@ private: // Waits on the given address with a timeout in nanoseconds ResultCode WaitForAddressImpl(VAddr address, s64 timeout); + /// Wake up num_to_wake (or all) threads in a vector. + void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); + + /// Insert a thread into the address arbiter container + void InsertThread(std::shared_ptr<Thread> thread); + + /// Removes a thread from the address arbiter container + void RemoveThread(std::shared_ptr<Thread> thread); + // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + + /// List of threads waiting for a address arbiter + std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; Core::System& system; }; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 0b149067a..1d0783bd3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } } - if (thread->GetArbiterWaitAddress() != 0) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitArb); - thread->SetArbiterWaitAddress(0); + if (thread->GetStatus() == ThreadStatus::WaitArb) { + auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter(); + address_arbiter.HandleWakeupThread(thread); } if (resume) { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index bd25de478..dbcdb0b88 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add } /// Signal process wide key -static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, - s32 target) { +static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) { LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", condition_variable_addr, target); @@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var system.PrepareReschedule(thread->GetProcessorID()); } } - - return RESULT_SUCCESS; } // Wait for an address (via Address Arbiter) @@ -1781,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); } +static void KernelDebug([[maybe_unused]] Core::System& system, + [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1, + [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) { + // Intentionally do nothing, as this does nothing in released kernel binaries. +} + +static void ChangeKernelTraceState([[maybe_unused]] Core::System& system, + [[maybe_unused]] u32 trace_state) { + // Intentionally do nothing, as this does nothing in released kernel binaries. +} + /// This returns the total CPU ticks elapsed since the CPU was powered-on static u64 GetSystemTick(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); @@ -2418,8 +2426,8 @@ static const FunctionDef SVC_Table[] = { {0x39, nullptr, "Unknown"}, {0x3A, nullptr, "Unknown"}, {0x3B, nullptr, "Unknown"}, - {0x3C, nullptr, "DumpInfo"}, - {0x3D, nullptr, "DumpInfoNew"}, + {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, + {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, {0x3E, nullptr, "Unknown"}, {0x3F, nullptr, "Unknown"}, {0x40, nullptr, "CreateSession"}, diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index c2d8d0dc3..29a2cfa9d 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) { FuncReturn(system, retval); } -template <ResultCode func(Core::System&, u64, s32)> -void SvcWrap(Core::System& system) { - FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw); -} - template <ResultCode func(Core::System&, u64, u32)> void SvcWrap(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); @@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) { func(system); } +template <void func(Core::System&, u32)> +void SvcWrap(Core::System& system) { + func(system, static_cast<u32>(Param(system, 0))); +} + +template <void func(Core::System&, u32, u64, u64, u64)> +void SvcWrap(Core::System& system) { + func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), + Param(system, 3)); +} + template <void func(Core::System&, s64)> void SvcWrap(Core::System& system) { func(system, static_cast<s64>(Param(system, 0))); } +template <void func(Core::System&, u64, s32)> +void SvcWrap(Core::System& system) { + func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); +} + template <void func(Core::System&, u64, u64)> void SvcWrap(Core::System& system) { func(system, Param(system, 0), Param(system, 1)); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dcc7cd1fe..dbb4e597f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -310,6 +310,11 @@ public: } }; + enum class DepthMode : u32 { + MinusOneToOne = 0, + ZeroToOne = 1, + }; + enum class PrimitiveTopology : u32 { Points = 0x0, Lines = 0x1, @@ -491,11 +496,6 @@ public: INSERT_UNION_PADDING_WORDS(1); }; - enum class DepthMode : u32 { - MinusOneToOne = 0, - ZeroToOne = 1, - }; - enum class TessellationPrimitive : u32 { Isolines = 0, Triangles = 1, @@ -676,7 +676,7 @@ public: u32 count; } vertex_buffer; - INSERT_UNION_PADDING_WORDS(1); + DepthMode depth_mode; float clear_color[4]; float clear_depth; @@ -1425,6 +1425,7 @@ ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform, 0x280); ASSERT_REG_POSITION(viewports, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); +ASSERT_REG_POSITION(depth_mode, 0x35F); ASSERT_REG_POSITION(clear_color[0], 0x360); ASSERT_REG_POSITION(clear_depth, 0x364); ASSERT_REG_POSITION(clear_stencil, 0x368); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7703a76a3..290d929df 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -384,6 +384,15 @@ enum class IsberdMode : u64 { enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; +enum class MembarType : u64 { + CTA = 0, + GL = 1, + SYS = 2, + VC = 3, +}; + +enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; + enum class HalfType : u64 { H0_H1 = 0, F32 = 1, @@ -1546,6 +1555,11 @@ union Instruction { } isberd; union { + BitField<8, 2, MembarType> type; + BitField<0, 2, MembarUnknown> unknown; + } membar; + + union { BitField<48, 1, u64> signed_a; BitField<38, 1, u64> is_byte_chunk_a; BitField<36, 2, VideoType> type_a; @@ -1669,6 +1683,7 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + MEMBAR, VMAD, VSETP, FFMA_IMM, // Fused Multiply and Add @@ -1930,7 +1945,7 @@ private: INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("111000100101----", Id::BRX, Type::Flow, "BRX"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100---", Id::BRK, Type::Flow, "BRK"), + INST("111000110100----", Id::BRK, Type::Flow, "BRK"), INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), @@ -1969,6 +1984,7 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index fc6ecb899..d01db97da 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -5,6 +5,7 @@ #include <mutex> #include <boost/icl/interval_map.hpp> +#include <boost/range/iterator_range.hpp> #include "common/assert.h" #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9eef7fcd2..f20967d85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -277,6 +277,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } + // Currently this stages are not supported in the OpenGL backend. + // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl) { + continue; + } else if (program == Maxwell::ShaderProgram::TesselationEval) { + continue; + } + Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 @@ -1028,6 +1036,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { flip_y = !flip_y; } state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; + state.clip_control.depth_mode = + regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne + ? GL_ZERO_TO_ONE + : GL_NEGATIVE_ONE_TO_ONE; } void RasterizerOpenGL::SyncClipEnabled( diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 9700c2ebe..fa7049bbe 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1992,6 +1992,11 @@ private: return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } + Expression MemoryBarrierGL(Operation) { + code.AddLine("memoryBarrier();"); + return {}; + } + struct Func final { Func() = delete; ~Func() = delete; @@ -2173,6 +2178,8 @@ private: &GLSLDecompiler::ThreadId, &GLSLDecompiler::ShuffleIndexed, + + &GLSLDecompiler::MemoryBarrierGL, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 39b3986d3..ccc1e050a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -411,8 +411,9 @@ void OpenGLState::ApplyAlphaTest() { } void OpenGLState::ApplyClipControl() { - if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) { - glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE); + if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode), + std::tie(clip_control.origin, clip_control.depth_mode))) { + glClipControl(clip_control.origin, clip_control.depth_mode); } } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index e53c2c5f2..0b5895084 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -150,6 +150,7 @@ public: struct { GLenum origin = GL_LOWER_LEFT; + GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; } clip_control; OpenGLState(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8ad89b58a..6227bc70b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1971,6 +1971,18 @@ private: return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; } + Expression MemoryBarrierGL(Operation) { + const auto scope = spv::Scope::Device; + const auto semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | + spv::MemorySemanticsMask::WorkgroupMemory | + spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; + + OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)), + Constant(t_uint, static_cast<u32>(semantics))); + return {}; + } + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { const Id id = OpVariable(type, storage); Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); @@ -2374,6 +2386,8 @@ private: &SPIRVDecompiler::ThreadId, &SPIRVDecompiler::ShuffleIndexed, + + &SPIRVDecompiler::MemoryBarrierGL, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 5c802886b..7321698b2 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -257,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); break; } + case OpCode::Id::MEMBAR: { + UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); + UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); + bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + break; + } case OpCode::Id::DEPBAR: { LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 1a4d28ae9..abd40f582 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -189,6 +189,8 @@ enum class OperationCode { ThreadId, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint + MemoryBarrierGL, /// () -> void + Amount, }; |