diff options
Diffstat (limited to '')
-rw-r--r-- | src/core/hle/kernel/address_arbiter.cpp | 53 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.h | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/service/audio/hwopus.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/service/bcat/backend/backend.cpp | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 23 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 4 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
-rw-r--r-- | src/video_core/gpu.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 1 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/bfi.cpp | 2 |
15 files changed, 121 insertions, 93 deletions
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 2ea3dcb61..8475b698c 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread->GetPriority() >= thread->GetPriority()) { - thread_list.insert(it, thread); - return; - } - ++it; + + const auto iter = + std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { + return entry->GetPriority() >= thread->GetPriority(); + }); + + if (iter == thread_list.cend()) { + thread_list.push_back(std::move(thread)); + } else { + thread_list.insert(iter, std::move(thread)); } - thread_list.push_back(std::move(thread)); } void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread.get() == thread.get()) { - thread_list.erase(it); - return; - } - ++it; - } - UNREACHABLE(); + + const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), + [&thread](const auto& entry) { return thread == entry; }); + + ASSERT(iter != thread_list.cend()); + + thread_list.erase(iter); } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { - std::vector<std::shared_ptr<Thread>> result; - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - std::shared_ptr<Thread> current_thread = *it; - result.push_back(std::move(current_thread)); - ++it; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( + VAddr address) const { + const auto iter = arb_threads.find(address); + if (iter == arb_threads.cend()) { + return {}; } - return result; + + const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; + return {thread_list.cbegin(), thread_list.cend()}; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 386983e54..f958eee5a 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -86,7 +86,7 @@ private: void RemoveThread(std::shared_ptr<Thread> thread); // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; /// List of threads waiting for a address arbiter std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 43b30dd3d..ae5f2c8bd 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -481,7 +481,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); } @@ -513,7 +513,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index cb839e4a2..d19513cbb 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -170,8 +170,10 @@ public: {3, nullptr, "SetContextForMultiStream"}, {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, - {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, - {7, nullptr, "DecodeInterleavedForMultiStream"}, + {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"}, + {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"}, + {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {9, nullptr, "DecodeInterleavedForMultiStream"}, }; // clang-format on diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 6f5ea095a..def3410cc 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name, } bool NullBackend::Clear(u64 title_id) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id); return true; } void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id, Common::HexToString(passphrase)); } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); + } +} +void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: { + const u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); } - - // TODO(Subv): Research and implement how query sync conditions work. - - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write<u32>(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + case Regs::QueryOperation::Acquire: { + // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU + // to write a value that matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + } + case Regs::QueryOperation::Counter: { + u64 result{}; + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + result = 1; + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); } + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Trap: { + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; + } + default: { + UNIMPLEMENTED_MSG("Unknown query operation"); break; } - default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast<u32>(regs.query.query_get.mode.Value())); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e437bacb7..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -862,7 +861,11 @@ public: float point_size; - INSERT_UNION_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 point_sprite_enable; + + INSERT_UNION_PADDING_WORDS(0x5); u32 zeta_enable; @@ -1077,7 +1080,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1409,6 +1412,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + // Writes the query result accordingly + void StampQueryResult(u64 payload, bool long_query); + // Handles Conditional Rendering void ProcessQueryCondition(); @@ -1494,6 +1500,7 @@ ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); ASSERT_REG_POSITION(point_size, 0x546); +ASSERT_REG_POSITION(point_sprite_enable, 0x548); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 402869fde..c9bc83cd7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1677,11 +1677,11 @@ union Instruction { } xmad; union { - BitField<20, 14, u64> offset; + BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; u64 GetOffset() const { - return offset * 4; + return shifted_offset * 4; } } cbuf34; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock<std::mutex> LockSync() { return std::unique_lock{sync_mutex}; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 46a7433ea..b0eb14c8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1220,6 +1220,7 @@ void RasterizerOpenGL::SyncPointState() { // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). state.point.program_control = regs.vp_point_size.enable != 0; + state.point.sprite = regs.point_sprite_enable != 0; state.point.size = std::max(1.0f, regs.point_size); } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index cc185e9e1..ab1f7983c 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() { void OpenGLState::ApplyPointSize() { Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); + Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 678e5cd89..4953eeda2 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -132,6 +132,7 @@ public: struct { bool program_control = false; // GL_PROGRAM_POINT_SIZE + bool sprite = false; // GL_POINT_SPRITE GLfloat size = 1.0f; // GL_POINT_SIZE } point; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index e60875cc4..21366869d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_R: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::ICMP_IMM: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index f992bbe2a..70d1c055b 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::BFI_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::BFI_IMM_R: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: |