summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/uint128.cpp41
-rw-r--r--src/common/uint128.h14
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp5
-rw-r--r--src/core/core_timing_util.cpp6
-rw-r--r--src/core/core_timing_util.h3
-rw-r--r--src/core/hle/kernel/scheduler.cpp6
-rw-r--r--src/core/hle/kernel/svc.cpp32
-rw-r--r--src/core/hle/kernel/thread.cpp13
-rw-r--r--src/core/hle/kernel/thread.h13
-rw-r--r--src/core/memory.cpp81
-rw-r--r--src/core/memory.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
13 files changed, 122 insertions, 103 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 3d30f0e3e..c538c6415 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -114,6 +114,8 @@ add_library(common STATIC
threadsafe_queue.h
timer.cpp
timer.h
+ uint128.cpp
+ uint128.h
vector_math.h
web_result.h
)
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..2238a52c5
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+ u128 result;
+#ifdef _MSC_VER
+ result[0] = _umul128(a, b, &result[1]);
+#else
+ unsigned __int128 tmp = a;
+ tmp *= b;
+ std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+ return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+ u64 remainder = dividend[0] % divisor;
+ u64 accum = dividend[0] / divisor;
+ if (dividend[1] == 0)
+ return {accum, remainder};
+ // We ignore dividend[1] / divisor as that overflows
+ const u64 first_segment = (dividend[1] % divisor) << 32;
+ accum += (first_segment / divisor) << 32;
+ const u64 second_segment = (first_segment % divisor) << 32;
+ accum += (second_segment / divisor);
+ remainder += second_segment % divisor;
+ if (remainder >= divisor) {
+ accum++;
+ remainder -= divisor;
+ }
+ return {accum, remainder};
+}
+
+} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..52e6b46eb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9b7ca4030..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
return std::max(parent.core_timing.GetDowncount(), 0);
}
u64 GetCNTPCT() override {
- return parent.core_timing.GetTicks();
+ return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
}
ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
config.tpidr_el0 = &cb->tpidr_el0;
config.dczid_el0 = 4;
config.ctr_el0 = 0x8444c004;
- config.cntfrq_el0 = 19200000; // Value from fusee.
+ config.cntfrq_el0 = Timing::CNTFREQ;
// Unpredictable instructions
config.define_unpredictable_behaviour = true;
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 88ff70233..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
#include <cinttypes>
#include <limits>
#include "common/logging/log.h"
+#include "common/uint128.h"
namespace Core::Timing {
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
}
+u64 CpuCyclesToClockCycles(u64 ticks) {
+ const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+ return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 513cfac1b..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
// The exact value used is of course unverified.
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000; // Value from fusee.
inline s64 msToCycles(int ms) {
// since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
return cycles * 1000 / BASE_CLOCK_RATE;
}
+u64 CpuCyclesToClockCycles(u64 ticks);
+
} // namespace Core::Timing
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5fccfd9f4..e524509df 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -199,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
// Yield this thread -- sleep for zero time and force reschedule to different thread
- WaitCurrentThread_Sleep();
- GetCurrentThread()->WakeAfterDelay(0);
+ GetCurrentThread()->Sleep(0);
}
void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -215,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
ASSERT(priority < THREADPRIO_COUNT);
// Sleep for zero time to be able to force reschedule to different thread
- WaitCurrentThread_Sleep();
- GetCurrentThread()->WakeAfterDelay(0);
+ GetCurrentThread()->Sleep(0);
Thread* suggested_thread = nullptr;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 77d0e3d96..047fa0c19 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1284,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {
/// Called when a thread exits
static void ExitThread() {
- LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+ auto& system = Core::System::GetInstance();
- ExitCurrentThread();
- Core::System::GetInstance().PrepareReschedule();
+ LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+
+ auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+ current_thread->Stop();
+ system.CurrentScheduler().RemoveThread(current_thread);
+ system.PrepareReschedule();
}
/// Sleep the current thread
@@ -1300,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
YieldAndWaitForLoadBalancing = -2,
};
+ auto& system = Core::System::GetInstance();
+ auto& scheduler = system.CurrentScheduler();
+ auto* const current_thread = scheduler.GetCurrentThread();
+
if (nanoseconds <= 0) {
- auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
switch (static_cast<SleepType>(nanoseconds)) {
case SleepType::YieldWithoutLoadBalancing:
- scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+ scheduler.YieldWithoutLoadBalancing(current_thread);
break;
case SleepType::YieldWithLoadBalancing:
- scheduler.YieldWithLoadBalancing(GetCurrentThread());
+ scheduler.YieldWithLoadBalancing(current_thread);
break;
case SleepType::YieldAndWaitForLoadBalancing:
- scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+ scheduler.YieldAndWaitForLoadBalancing(current_thread);
break;
default:
UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
}
} else {
- // Sleep current thread and check for next thread to schedule
- WaitCurrentThread_Sleep();
-
- // Create an event to wake the thread up after the specified nanosecond delay has passed
- GetCurrentThread()->WakeAfterDelay(nanoseconds);
+ current_thread->Sleep(nanoseconds);
}
// Reschedule all CPU cores
- for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
- Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+ for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+ system.CpuCore(i).PrepareReschedule();
+ }
}
/// Wait process wide key atomic
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index eb54d6651..2e712c9cb 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -68,11 +68,6 @@ void Thread::Stop() {
owner_process->FreeTLSSlot(tls_address);
}
-void WaitCurrentThread_Sleep() {
- Thread* thread = GetCurrentThread();
- thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
void ExitCurrentThread() {
Thread* thread = GetCurrentThread();
thread->Stop();
@@ -391,6 +386,14 @@ void Thread::SetActivity(ThreadActivity value) {
}
}
+void Thread::Sleep(s64 nanoseconds) {
+ // Sleep current thread and check for next thread to schedule
+ SetStatus(ThreadStatus::WaitSleep);
+
+ // Create an event to wake the thread up after the specified nanosecond delay has passed
+ WakeAfterDelay(nanoseconds);
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..ccdefeecc 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:
void SetActivity(ThreadActivity value);
+ /// Sleeps this thread for the given amount of nanoseconds.
+ void Sleep(s64 nanoseconds);
+
private:
explicit Thread(KernelCore& kernel);
~Thread() override;
@@ -460,14 +463,4 @@ private:
*/
Thread* GetCurrentThread();
-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
} // namespace Kernel
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4fde53033..e0cc5175f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -18,6 +18,7 @@
#include "core/hle/lock.h"
#include "core/memory.h"
#include "core/memory_setup.h"
+#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
namespace Memory {
@@ -69,8 +70,8 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
// During boot, current_page_table might not be set yet, in which case we need not flush
if (current_page_table) {
- RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
- FlushMode::FlushAndInvalidate);
+ Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+ size * PAGE_SIZE);
}
VAddr end = base + size;
@@ -183,10 +184,10 @@ T Read(const VAddr vaddr) {
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+ auto host_ptr{GetPointerFromVMA(vaddr)};
+ Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
T value;
- std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+ std::memcpy(&value, host_ptr, sizeof(T));
return value;
}
default:
@@ -214,8 +215,9 @@ void Write(const VAddr vaddr, const T data) {
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
- std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+ auto host_ptr{GetPointerFromVMA(vaddr)};
+ Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+ std::memcpy(host_ptr, &data, sizeof(T));
break;
}
default:
@@ -338,47 +340,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
}
}
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
- auto& system_instance = Core::System::GetInstance();
-
- // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
- // null here
- if (!system_instance.IsPoweredOn()) {
- return;
- }
-
- const VAddr end = start + size;
-
- const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
- if (start >= region_end || end <= region_start) {
- // No overlap with region
- return;
- }
-
- const VAddr overlap_start = std::max(start, region_start);
- const VAddr overlap_end = std::min(end, region_end);
- const VAddr overlap_size = overlap_end - overlap_start;
-
- auto& gpu = system_instance.GPU();
- switch (mode) {
- case FlushMode::Flush:
- gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
- break;
- case FlushMode::Invalidate:
- gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
- break;
- case FlushMode::FlushAndInvalidate:
- gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
- break;
- }
- };
-
- const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
- CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
- CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
u8 Read8(const VAddr addr) {
return Read<u8>(addr);
}
@@ -424,9 +385,9 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
break;
}
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
- FlushMode::Flush);
- std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+ const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+ Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+ std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
@@ -487,9 +448,9 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
break;
}
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
- FlushMode::Invalidate);
- std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+ const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+ Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+ std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
@@ -533,9 +494,9 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
break;
}
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
- FlushMode::Invalidate);
- std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+ const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+ Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+ std::memset(host_ptr, 0, copy_amount);
break;
}
default:
@@ -575,9 +536,9 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
break;
}
case PageType::RasterizerCachedMemory: {
- RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
- FlushMode::Flush);
- WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+ const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+ Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+ WriteBlock(process, dest_addr, host_ptr, copy_amount);
break;
}
default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..c2c6643ee 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -161,10 +161,4 @@ enum class FlushMode {
*/
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
} // namespace Memory
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b97576309..5e3d862c6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -164,8 +164,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
- Memory::FlushMode::Flush);
+ rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
constexpr u32 linear_bpp = 4;
VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,