diff options
62 files changed, 776 insertions, 397 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index ae665ed2e..7461fb093 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -34,11 +34,14 @@ import androidx.core.view.WindowCompat import androidx.core.view.WindowInsetsCompat import androidx.core.view.WindowInsetsControllerCompat import androidx.navigation.fragment.NavHostFragment +import androidx.preference.PreferenceManager import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.databinding.ActivityEmulationBinding import org.yuzu.yuzu_emu.features.settings.model.BooleanSetting import org.yuzu.yuzu_emu.features.settings.model.IntSetting +import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.features.settings.model.SettingsViewModel import org.yuzu.yuzu_emu.model.Game import org.yuzu.yuzu_emu.utils.ControllerMappingHelper @@ -47,6 +50,7 @@ import org.yuzu.yuzu_emu.utils.InputHandler import org.yuzu.yuzu_emu.utils.MemoryUtil import org.yuzu.yuzu_emu.utils.NfcReader import org.yuzu.yuzu_emu.utils.ThemeHelper +import java.text.NumberFormat import kotlin.math.roundToInt class EmulationActivity : AppCompatActivity(), SensorEventListener { @@ -106,17 +110,26 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener { inputHandler = InputHandler() inputHandler.initialize() - val memoryUtil = MemoryUtil(this) - if (memoryUtil.isLessThan(8, MemoryUtil.Gb)) { - Toast.makeText( - this, - getString( - R.string.device_memory_inadequate, - memoryUtil.getDeviceRAM(), - "8 ${getString(R.string.memory_gigabyte)}" - ), - Toast.LENGTH_LONG - ).show() + val preferences = PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) + if (!preferences.getBoolean(Settings.PREF_MEMORY_WARNING_SHOWN, false)) { + if (MemoryUtil.isLessThan(MemoryUtil.REQUIRED_MEMORY, MemoryUtil.Gb)) { + Toast.makeText( + this, + getString( + R.string.device_memory_inadequate, + MemoryUtil.getDeviceRAM(), + getString( + R.string.memory_formatted, + NumberFormat.getInstance().format(MemoryUtil.REQUIRED_MEMORY), + getString(R.string.memory_gigabyte) + ) + ), + Toast.LENGTH_LONG + ).show() + preferences.edit() + .putBoolean(Settings.PREF_MEMORY_WARNING_SHOWN, true) + .apply() + } } // Start a foreground service to prevent the app from getting killed in the background diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt index 88afb2223..be6e17e65 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt @@ -110,6 +110,8 @@ class Settings { const val SECTION_THEME = "Theme" const val SECTION_DEBUG = "Debug" + const val PREF_MEMORY_WARNING_SHOWN = "MemoryWarningShown" + const val PREF_OVERLAY_INIT = "OverlayInit" const val PREF_CONTROL_SCALE = "controlScale" const val PREF_CONTROL_OPACITY = "controlOpacity" diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt index 18e5fa0b0..aa4a5539a 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt @@ -5,35 +5,101 @@ package org.yuzu.yuzu_emu.utils import android.app.ActivityManager import android.content.Context +import android.os.Build import org.yuzu.yuzu_emu.R +import org.yuzu.yuzu_emu.YuzuApplication import java.util.Locale +import kotlin.math.ceil -class MemoryUtil(val context: Context) { +object MemoryUtil { + private val context get() = YuzuApplication.appContext - private val Long.floatForm: String - get() = String.format(Locale.ROOT, "%.2f", this.toDouble()) + private val Float.hundredths: String + get() = String.format(Locale.ROOT, "%.2f", this) - private fun bytesToSizeUnit(size: Long): String { - return when { - size < Kb -> "${size.floatForm} ${context.getString(R.string.memory_byte)}" - size < Mb -> "${(size / Kb).floatForm} ${context.getString(R.string.memory_kilobyte)}" - size < Gb -> "${(size / Mb).floatForm} ${context.getString(R.string.memory_megabyte)}" - size < Tb -> "${(size / Gb).floatForm} ${context.getString(R.string.memory_gigabyte)}" - size < Pb -> "${(size / Tb).floatForm} ${context.getString(R.string.memory_terabyte)}" - size < Eb -> "${(size / Pb).floatForm} ${context.getString(R.string.memory_petabyte)}" - else -> "${(size / Eb).floatForm} ${context.getString(R.string.memory_exabyte)}" + // Required total system memory + const val REQUIRED_MEMORY = 8 + + const val Kb: Float = 1024F + const val Mb = Kb * 1024 + const val Gb = Mb * 1024 + const val Tb = Gb * 1024 + const val Pb = Tb * 1024 + const val Eb = Pb * 1024 + + private fun bytesToSizeUnit(size: Float): String = + when { + size < Kb -> { + context.getString( + R.string.memory_formatted, + size.hundredths, + context.getString(R.string.memory_byte) + ) + } + size < Mb -> { + context.getString( + R.string.memory_formatted, + (size / Kb).hundredths, + context.getString(R.string.memory_kilobyte) + ) + } + size < Gb -> { + context.getString( + R.string.memory_formatted, + (size / Mb).hundredths, + context.getString(R.string.memory_megabyte) + ) + } + size < Tb -> { + context.getString( + R.string.memory_formatted, + (size / Gb).hundredths, + context.getString(R.string.memory_gigabyte) + ) + } + size < Pb -> { + context.getString( + R.string.memory_formatted, + (size / Tb).hundredths, + context.getString(R.string.memory_terabyte) + ) + } + size < Eb -> { + context.getString( + R.string.memory_formatted, + (size / Pb).hundredths, + context.getString(R.string.memory_petabyte) + ) + } + else -> { + context.getString( + R.string.memory_formatted, + (size / Eb).hundredths, + context.getString(R.string.memory_exabyte) + ) + } } - } - private val totalMemory = - with(context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager) { + // Devices are unlikely to have 0.5GB increments of memory so we'll just round up to account for + // the potential error created by memInfo.totalMem + private val totalMemory: Float + get() { val memInfo = ActivityManager.MemoryInfo() - getMemoryInfo(memInfo) - memInfo.totalMem + with(context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager) { + getMemoryInfo(memInfo) + } + + return ceil( + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.UPSIDE_DOWN_CAKE) { + memInfo.advertisedMem.toFloat() + } else { + memInfo.totalMem.toFloat() + } + ) } - fun isLessThan(minimum: Int, size: Long): Boolean { - return when (size) { + fun isLessThan(minimum: Int, size: Float): Boolean = + when (size) { Kb -> totalMemory < Mb && totalMemory < minimum Mb -> totalMemory < Gb && (totalMemory / Mb) < minimum Gb -> totalMemory < Tb && (totalMemory / Gb) < minimum @@ -42,18 +108,6 @@ class MemoryUtil(val context: Context) { Eb -> totalMemory / Eb < minimum else -> totalMemory < Kb && totalMemory < minimum } - } - - fun getDeviceRAM(): String { - return bytesToSizeUnit(totalMemory) - } - - companion object { - const val Kb: Long = 1024 - const val Mb = Kb * 1024 - const val Gb = Mb * 1024 - const val Tb = Gb * 1024 - const val Pb = Tb * 1024 - const val Eb = Pb * 1024 - } + + fun getDeviceRAM(): String = bytesToSizeUnit(totalMemory) } diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index af7450619..b3c737979 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -273,6 +273,7 @@ <string name="fatal_error_message">A fatal error occurred. Check the log for details.\nContinuing emulation may result in crashes and bugs.</string> <string name="performance_warning">Turning off this setting will significantly reduce emulation performance! For the best experience, it is recommended that you leave this setting enabled.</string> <string name="device_memory_inadequate">Device RAM: %1$s\nRecommended: %2$s</string> + <string name="memory_formatted">%1$s %2$s</string> <!-- Region Names --> <string name="region_japan">Japan</string> diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index 404dcd0e9..6081352a2 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -12,6 +12,7 @@ #include "audio_core/sink/sink_stream.h" #include "common/common_types.h" #include "common/fixed_point.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" @@ -19,9 +20,12 @@ namespace AudioCore::Sink { void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) { - if (type == StreamType::In) { + SCOPE_EXIT({ queue.enqueue(buffer); - queued_buffers++; + ++queued_buffers; + }); + + if (type == StreamType::In) { return; } @@ -66,16 +70,17 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) { static_cast<s16>(std::clamp(right_sample, min, max)); } - samples = samples.subspan(0, samples.size() / system_channels * device_channels); + samples_buffer.Push(samples.subspan(0, samples.size() / system_channels * device_channels)); + return; + } - } else if (system_channels == 2 && device_channels == 6) { + if (system_channels == 2 && device_channels == 6) { // We need moar samples! Not all games will provide 6 channel audio. // TODO: Implement some upmixing here. Currently just passthrough, with other // channels left as silence. - auto new_size = samples.size() / system_channels * device_channels; - tmp_samples.resize_destructive(new_size); + std::vector<s16> new_samples(samples.size() / system_channels * device_channels); - for (u32 read_index = 0, write_index = 0; read_index < new_size; + for (u32 read_index = 0, write_index = 0; read_index < samples.size(); read_index += system_channels, write_index += device_channels) { const auto left_sample{static_cast<s16>(std::clamp( static_cast<s32>( @@ -83,7 +88,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) { volume), min, max))}; - tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; + new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; const auto right_sample{static_cast<s16>(std::clamp( static_cast<s32>( @@ -91,20 +96,21 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) { volume), min, max))}; - tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; + new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; } - samples = std::span<s16>(tmp_samples); - } else if (volume != 1.0f) { - for (u32 i = 0; i < samples.size(); i++) { + samples_buffer.Push(new_samples); + return; + } + + if (volume != 1.0f) { + for (u32 i = 0; i < samples.size(); ++i) { samples[i] = static_cast<s16>( std::clamp(static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max)); } } samples_buffer.Push(samples); - queue.enqueue(buffer); - queued_buffers++; } std::vector<s16> SinkStream::ReleaseBuffer(u64 num_samples) { diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h index 98d72ace1..6a4996ca3 100644 --- a/src/audio_core/sink/sink_stream.h +++ b/src/audio_core/sink/sink_stream.h @@ -16,7 +16,6 @@ #include "common/polyfill_thread.h" #include "common/reader_writer_queue.h" #include "common/ring_buffer.h" -#include "common/scratch_buffer.h" #include "common/thread.h" namespace Core { @@ -256,8 +255,6 @@ private: /// Signalled when ring buffer entries are consumed std::condition_variable_any release_cv; std::mutex release_mutex; - /// Temporary buffer for appending samples when upmixing - Common::ScratchBuffer<s16> tmp_samples{}; }; using SinkStreamPtr = std::unique_ptr<SinkStream>; diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 416680d44..5c961b202 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -54,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::span<T> input) { + std::size_t Push(std::span<const T> input) { return Push(input.data(), input.size()); } diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 6fe907953..d5961b020 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -5,7 +5,6 @@ #include <iterator> -#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -19,15 +18,22 @@ namespace Common { template <typename T> class ScratchBuffer { public: - using iterator = T*; - using const_iterator = const T*; - using value_type = T; using element_type = T; - using iterator_category = std::contiguous_iterator_tag; + using value_type = T; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using iterator = pointer; + using const_iterator = const_pointer; + using iterator_category = std::random_access_iterator_tag; + using iterator_concept = std::contiguous_iterator_tag; ScratchBuffer() = default; - explicit ScratchBuffer(size_t initial_capacity) + explicit ScratchBuffer(size_type initial_capacity) : last_requested_size{initial_capacity}, buffer_capacity{initial_capacity}, buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {} @@ -39,7 +45,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will remain intact. - void resize(size_t size) { + void resize(size_type size) { if (size > buffer_capacity) { auto new_buffer = Common::make_unique_for_overwrite<T[]>(size); std::move(buffer.get(), buffer.get() + buffer_capacity, new_buffer.get()); @@ -51,7 +57,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will be destroyed if a reallocation occurs. - void resize_destructive(size_t size) { + void resize_destructive(size_type size) { if (size > buffer_capacity) { buffer_capacity = size; buffer = Common::make_unique_for_overwrite<T[]>(buffer_capacity); @@ -59,43 +65,43 @@ public: last_requested_size = size; } - [[nodiscard]] T* data() noexcept { + [[nodiscard]] pointer data() noexcept { return buffer.get(); } - [[nodiscard]] const T* data() const noexcept { + [[nodiscard]] const_pointer data() const noexcept { return buffer.get(); } - [[nodiscard]] T* begin() noexcept { + [[nodiscard]] iterator begin() noexcept { return data(); } - [[nodiscard]] const T* begin() const noexcept { + [[nodiscard]] const_iterator begin() const noexcept { return data(); } - [[nodiscard]] T* end() noexcept { + [[nodiscard]] iterator end() noexcept { return data() + last_requested_size; } - [[nodiscard]] const T* end() const noexcept { + [[nodiscard]] const_iterator end() const noexcept { return data() + last_requested_size; } - [[nodiscard]] T& operator[](size_t i) { + [[nodiscard]] reference operator[](size_type i) { return buffer[i]; } - [[nodiscard]] const T& operator[](size_t i) const { + [[nodiscard]] const_reference operator[](size_type i) const { return buffer[i]; } - [[nodiscard]] size_t size() const noexcept { + [[nodiscard]] size_type size() const noexcept { return last_requested_size; } - [[nodiscard]] size_t capacity() const noexcept { + [[nodiscard]] size_type capacity() const noexcept { return buffer_capacity; } @@ -106,8 +112,8 @@ public: } private: - size_t last_requested_size{}; - size_t buffer_capacity{}; + size_type last_requested_size{}; + size_type buffer_capacity{}; std::unique_ptr<T[]> buffer{}; }; diff --git a/src/common/settings.h b/src/common/settings.h index ae5ed93d8..59e96e74f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -527,12 +527,10 @@ struct Values { Setting<bool> mouse_panning{false, "mouse_panning"}; Setting<u8, true> mouse_panning_x_sensitivity{50, 1, 100, "mouse_panning_x_sensitivity"}; Setting<u8, true> mouse_panning_y_sensitivity{50, 1, 100, "mouse_panning_y_sensitivity"}; - Setting<u8, true> mouse_panning_deadzone_x_counterweight{ - 0, 0, 100, "mouse_panning_deadzone_x_counterweight"}; - Setting<u8, true> mouse_panning_deadzone_y_counterweight{ - 0, 0, 100, "mouse_panning_deadzone_y_counterweight"}; - Setting<u8, true> mouse_panning_decay_strength{22, 0, 100, "mouse_panning_decay_strength"}; - Setting<u8, true> mouse_panning_min_decay{5, 0, 100, "mouse_panning_min_decay"}; + Setting<u8, true> mouse_panning_deadzone_counterweight{20, 0, 100, + "mouse_panning_deadzone_counterweight"}; + Setting<u8, true> mouse_panning_decay_strength{18, 0, 100, "mouse_panning_decay_strength"}; + Setting<u8, true> mouse_panning_min_decay{6, 0, 100, "mouse_panning_min_decay"}; Setting<bool> mouse_enabled{false, "mouse_enabled"}; Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; diff --git a/src/core/core.cpp b/src/core/core.cpp index b74fd0a58..9e3eb3795 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -27,6 +27,7 @@ #include "core/file_sys/savedata_factory.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" +#include "core/gpu_dirty_memory_manager.h" #include "core/hid/hid_core.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_process.h" @@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, - cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} + cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, + gpu_dirty_memory_write_manager{} { + memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); + } void Initialize(System& system) { device_memory = std::make_unique<Core::DeviceMemory>(); @@ -234,6 +238,8 @@ struct System::Impl { // Setting changes may require a full system reinitialization (e.g., disabling multicore). ReinitializeIfNecessary(system); + memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); + kernel.Initialize(); cpu_manager.Initialize(); @@ -540,6 +546,9 @@ struct System::Impl { std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; + + std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> + gpu_dirty_memory_write_manager{}; }; System::System() : impl{std::make_unique<Impl>(*this)} {} @@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) { impl->kernel.PrepareReschedule(core_index); } +Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() { + const std::size_t core = impl->kernel.GetCurrentHostThreadID(); + return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES + ? core + : Core::Hardware::NUM_CPU_CORES - 1]; +} + +/// Provides a constant reference to the current gou dirty memory manager. +const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const { + const std::size_t core = impl->kernel.GetCurrentHostThreadID(); + return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES + ? core + : Core::Hardware::NUM_CPU_CORES - 1]; +} + size_t System::GetCurrentHostThreadID() const { return impl->kernel.GetCurrentHostThreadID(); } +void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { + for (auto& manager : impl->gpu_dirty_memory_write_manager) { + manager.Gather(callback); + } +} + PerfStatsResults System::GetAndResetPerfStats() { return impl->GetAndResetPerfStats(); } diff --git a/src/core/core.h b/src/core/core.h index 93afc9303..14b2f7785 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -108,9 +108,10 @@ class CpuManager; class Debugger; class DeviceMemory; class ExclusiveMonitor; -class SpeedLimiter; +class GPUDirtyMemoryManager; class PerfStats; class Reporter; +class SpeedLimiter; class TelemetrySession; struct PerfStatsResults; @@ -225,6 +226,14 @@ public: /// Prepare the core emulation for a reschedule void PrepareReschedule(u32 core_index); + /// Provides a reference to the gou dirty memory manager. + [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager(); + + /// Provides a constant reference to the current gou dirty memory manager. + [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const; + + void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); + [[nodiscard]] size_t GetCurrentHostThreadID() const; /// Gets and resets core performance statistics diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp index 1ff83c08c..e39c7b62b 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.cpp +++ b/src/core/file_sys/fsmitm_romfsbuild.cpp @@ -105,19 +105,11 @@ static u64 romfs_get_hash_table_count(u64 num_entries) { return count; } -void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir, +void RomFSBuildContext::VisitDirectory(VirtualDir romfs_dir, VirtualDir ext_dir, std::shared_ptr<RomFSBuildDirectoryContext> parent) { std::vector<std::shared_ptr<RomFSBuildDirectoryContext>> child_dirs; - VirtualDir dir; - - if (parent->path_len == 0) { - dir = root_romfs; - } else { - dir = root_romfs->GetDirectoryRelative(parent->path); - } - - const auto entries = dir->GetEntries(); + const auto entries = romfs_dir->GetEntries(); for (const auto& kv : entries) { if (kv.second == VfsEntryType::Directory) { @@ -127,7 +119,7 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size()); child->path = parent->path + "/" + kv.first; - if (ext_dir != nullptr && ext_dir->GetFileRelative(child->path + ".stub") != nullptr) { + if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) { continue; } @@ -144,17 +136,17 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size()); child->path = parent->path + "/" + kv.first; - if (ext_dir != nullptr && ext_dir->GetFileRelative(child->path + ".stub") != nullptr) { + if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) { continue; } // Sanity check on path_len ASSERT(child->path_len < FS_MAX_PATH); - child->source = root_romfs->GetFileRelative(child->path); + child->source = romfs_dir->GetFile(kv.first); if (ext_dir != nullptr) { - if (const auto ips = ext_dir->GetFileRelative(child->path + ".ips")) { + if (const auto ips = ext_dir->GetFile(kv.first + ".ips")) { if (auto patched = PatchIPS(child->source, ips)) { child->source = std::move(patched); } @@ -168,23 +160,27 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir } for (auto& child : child_dirs) { - this->VisitDirectory(root_romfs, ext_dir, child); + auto subdir_name = std::string_view(child->path).substr(child->cur_path_ofs); + auto child_romfs_dir = romfs_dir->GetSubdirectory(subdir_name); + auto child_ext_dir = ext_dir != nullptr ? ext_dir->GetSubdirectory(subdir_name) : nullptr; + this->VisitDirectory(child_romfs_dir, child_ext_dir, child); } } bool RomFSBuildContext::AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx, std::shared_ptr<RomFSBuildDirectoryContext> dir_ctx) { // Check whether it's already in the known directories. - const auto existing = directories.find(dir_ctx->path); - if (existing != directories.end()) + const auto [it, is_new] = directories.emplace(dir_ctx->path, nullptr); + if (!is_new) { return false; + } // Add a new directory. num_dirs++; dir_table_size += sizeof(RomFSDirectoryEntry) + Common::AlignUp(dir_ctx->path_len - dir_ctx->cur_path_ofs, 4); dir_ctx->parent = parent_dir_ctx; - directories.emplace(dir_ctx->path, dir_ctx); + it->second = dir_ctx; return true; } @@ -192,8 +188,8 @@ bool RomFSBuildContext::AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext> bool RomFSBuildContext::AddFile(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx, std::shared_ptr<RomFSBuildFileContext> file_ctx) { // Check whether it's already in the known files. - const auto existing = files.find(file_ctx->path); - if (existing != files.end()) { + const auto [it, is_new] = files.emplace(file_ctx->path, nullptr); + if (!is_new) { return false; } @@ -202,7 +198,7 @@ bool RomFSBuildContext::AddFile(std::shared_ptr<RomFSBuildDirectoryContext> pare file_table_size += sizeof(RomFSFileEntry) + Common::AlignUp(file_ctx->path_len - file_ctx->cur_path_ofs, 4); file_ctx->parent = parent_dir_ctx; - files.emplace(file_ctx->path, file_ctx); + it->second = file_ctx; return true; } diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h new file mode 100644 index 000000000..9687531e8 --- /dev/null +++ b/src/core/gpu_dirty_memory_manager.h @@ -0,0 +1,122 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <atomic> +#include <bit> +#include <functional> +#include <mutex> +#include <utility> +#include <vector> + +#include "core/memory.h" + +namespace Core { + +class GPUDirtyMemoryManager { +public: + GPUDirtyMemoryManager() : current{default_transform} { + back_buffer.reserve(256); + front_buffer.reserve(256); + } + + ~GPUDirtyMemoryManager() = default; + + void Collect(VAddr address, size_t size) { + TransformAddress t = BuildTransform(address, size); + TransformAddress tmp, original; + do { + tmp = current.load(std::memory_order_acquire); + original = tmp; + if (tmp.address != t.address) { + if (IsValid(tmp.address)) { + std::scoped_lock lk(guard); + back_buffer.emplace_back(tmp); + current.exchange(t, std::memory_order_relaxed); + return; + } + tmp.address = t.address; + tmp.mask = 0; + } + if ((tmp.mask | t.mask) == tmp.mask) { + return; + } + tmp.mask |= t.mask; + } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, + std::memory_order_relaxed)); + } + + void Gather(std::function<void(VAddr, size_t)>& callback) { + { + std::scoped_lock lk(guard); + TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); + front_buffer.swap(back_buffer); + if (IsValid(t.address)) { + front_buffer.emplace_back(t); + } + } + for (auto& transform : front_buffer) { + size_t offset = 0; + u64 mask = transform.mask; + while (mask != 0) { + const size_t empty_bits = std::countr_zero(mask); + offset += empty_bits << align_bits; + mask = mask >> empty_bits; + + const size_t continuous_bits = std::countr_one(mask); + callback((static_cast<VAddr>(transform.address) << page_bits) + offset, + continuous_bits << align_bits); + mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; + offset += continuous_bits << align_bits; + } + } + front_buffer.clear(); + } + +private: + struct alignas(8) TransformAddress { + u32 address; + u32 mask; + }; + + constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; + constexpr static size_t page_size = 1ULL << page_bits; + constexpr static size_t page_mask = page_size - 1; + + constexpr static size_t align_bits = 6U; + constexpr static size_t align_size = 1U << align_bits; + constexpr static size_t align_mask = align_size - 1; + constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; + + bool IsValid(VAddr address) { + return address < (1ULL << 39); + } + + template <typename T> + T CreateMask(size_t top_bit, size_t minor_bit) { + T mask = ~T(0); + mask <<= (sizeof(T) * 8 - top_bit); + mask >>= (sizeof(T) * 8 - top_bit); + mask >>= minor_bit; + mask <<= minor_bit; + return mask; + } + + TransformAddress BuildTransform(VAddr address, size_t size) { + const size_t minor_address = address & page_mask; + const size_t minor_bit = minor_address >> align_bits; + const size_t top_bit = (minor_address + size + align_mask) >> align_bits; + TransformAddress result{}; + result.address = static_cast<u32>(address >> page_bits); + result.mask = CreateMask<u32>(top_bit, minor_bit); + return result; + } + + std::atomic<TransformAddress> current{}; + std::mutex guard; + std::vector<TransformAddress> back_buffer; + std::vector<TransformAddress> front_buffer; +}; + +} // namespace Core diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index dd662b3f8..d178c2453 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -338,6 +338,15 @@ public: return m_parent != nullptr; } + std::span<KSynchronizationObject*> GetSynchronizationObjectBuffer() { + return m_sync_object_buffer.sync_objects; + } + + std::span<Handle> GetHandleBuffer() { + return {m_sync_object_buffer.handles.data() + Svc::ArgumentHandleCountMax, + Svc::ArgumentHandleCountMax}; + } + u16 GetUserDisableCount() const; void SetInterruptFlag(); void ClearInterruptFlag(); @@ -855,6 +864,7 @@ private: u32* m_light_ipc_data{}; KProcessAddress m_tls_address{}; KLightLock m_activity_pause_lock; + SyncObjectBuffer m_sync_object_buffer{}; s64 m_schedule_count{}; s64 m_last_scheduled_tick{}; std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> m_per_core_priority_queue_entry{}; diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index 60247df2e..bb94f6934 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -38,22 +38,31 @@ Result SendAsyncRequestWithUserBuffer(Core::System& system, Handle* out_event_ha Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_addr, s32 num_handles, Handle reply_target, s64 timeout_ns) { + // Ensure number of handles is valid. + R_UNLESS(0 <= num_handles && num_handles <= ArgumentHandleCountMax, ResultOutOfRange); + + // Get the synchronization context. auto& kernel = system.Kernel(); auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); - - R_UNLESS(0 <= num_handles && num_handles <= ArgumentHandleCountMax, ResultOutOfRange); - R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange( - handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), - ResultInvalidPointer); - - std::array<Handle, Svc::ArgumentHandleCountMax> handles; - GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); - - // Convert handle list to object table. - std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs; - R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(), - num_handles), - ResultInvalidHandle); + auto objs = GetCurrentThread(kernel).GetSynchronizationObjectBuffer(); + auto handles = GetCurrentThread(kernel).GetHandleBuffer(); + + // Copy user handles. + if (num_handles > 0) { + // Ensure we can try to get the handles. + R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange( + handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), + ResultInvalidPointer); + + // Get the handles. + GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), + sizeof(Handle) * num_handles); + + // Convert the handles to objects. + R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>( + objs.data(), handles.data(), num_handles), + ResultInvalidHandle); + } // Ensure handles are closed when we're done. SCOPE_EXIT({ diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp index 53df5bcd8..f02d03f30 100644 --- a/src/core/hle/kernel/svc/svc_synchronization.cpp +++ b/src/core/hle/kernel/svc/svc_synchronization.cpp @@ -47,21 +47,35 @@ Result ResetSignal(Core::System& system, Handle handle) { R_THROW(ResultInvalidHandle); } -static Result WaitSynchronization(Core::System& system, int32_t* out_index, const Handle* handles, - int32_t num_handles, int64_t timeout_ns) { +/// Wait for the given handles to synchronize, timeout after the specified nanoseconds +Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_handles, + int32_t num_handles, int64_t timeout_ns) { + LOG_TRACE(Kernel_SVC, "called user_handles={:#x}, num_handles={}, timeout_ns={}", user_handles, + num_handles, timeout_ns); + // Ensure number of handles is valid. R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); // Get the synchronization context. auto& kernel = system.Kernel(); auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); - std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs; + auto objs = GetCurrentThread(kernel).GetSynchronizationObjectBuffer(); + auto handles = GetCurrentThread(kernel).GetHandleBuffer(); // Copy user handles. if (num_handles > 0) { + // Ensure we can try to get the handles. + R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange( + user_handles, static_cast<u64>(sizeof(Handle) * num_handles)), + ResultInvalidPointer); + + // Get the handles. + GetCurrentMemory(kernel).ReadBlock(user_handles, handles.data(), + sizeof(Handle) * num_handles); + // Convert the handles to objects. - R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles, - num_handles), + R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>( + objs.data(), handles.data(), num_handles), ResultInvalidHandle); } @@ -80,23 +94,6 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons R_RETURN(res); } -/// Wait for the given handles to synchronize, timeout after the specified nanoseconds -Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_handles, - int32_t num_handles, int64_t timeout_ns) { - LOG_TRACE(Kernel_SVC, "called user_handles={:#x}, num_handles={}, timeout_ns={}", user_handles, - num_handles, timeout_ns); - - // Ensure number of handles is valid. - R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); - std::array<Handle, Svc::ArgumentHandleCountMax> handles; - if (num_handles > 0) { - GetCurrentMemory(system.Kernel()) - .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); - } - - R_RETURN(WaitSynchronization(system, out_index, handles.data(), num_handles, timeout_ns)); -} - /// Resumes a thread waiting on WaitSynchronization Result CancelSynchronization(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "called handle=0x{:X}", handle); diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index c8d574993..526a39130 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -5,7 +5,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" -#include "common/settings.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -124,12 +124,15 @@ private: void GetReleasedAudioInBuffer(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); - tmp_buffer.resize_destructive(write_buffer_size); - tmp_buffer[0] = 0; + released_buffer.resize_destructive(write_buffer_size); + released_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(tmp_buffer); + const auto count = impl->GetReleasedBuffers(released_buffer); - ctx.WriteBuffer(tmp_buffer); + LOG_TRACE(Service_Audio, "called. Session {} released {} buffers", + impl->GetSystem().GetSessionId(), count); + + ctx.WriteBuffer(released_buffer); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); @@ -155,7 +158,6 @@ private: LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count); IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(ResultSuccess); rb.Push(buffer_count); } @@ -195,7 +197,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr<AudioCore::AudioIn::In> impl; - Common::ScratchBuffer<u64> tmp_buffer; + Common::ScratchBuffer<u64> released_buffer; }; AudInU::AudInU(Core::System& system_) diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 032c8c11f..23f84a29f 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -9,6 +9,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "common/swap.h" #include "core/core.h" @@ -102,8 +103,8 @@ private: AudioOutBuffer buffer{}; std::memcpy(&buffer, in_buffer.data(), sizeof(AudioOutBuffer)); - [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", sessionid, tag); + LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", + impl->GetSystem().GetSessionId(), tag); auto result = impl->AppendBuffer(buffer, tag); @@ -123,12 +124,15 @@ private: void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); - tmp_buffer.resize_destructive(write_buffer_size); - tmp_buffer[0] = 0; + released_buffer.resize_destructive(write_buffer_size); + released_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(tmp_buffer); + const auto count = impl->GetReleasedBuffers(released_buffer); - ctx.WriteBuffer(tmp_buffer); + ctx.WriteBuffer(released_buffer); + + LOG_TRACE(Service_Audio, "called. Session {} released {} buffers", + impl->GetSystem().GetSessionId(), count); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); @@ -154,7 +158,6 @@ private: LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count); IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(ResultSuccess); rb.Push(buffer_count); } @@ -165,7 +168,6 @@ private: LOG_DEBUG(Service_Audio, "called. Played samples={}", samples_played); IPC::ResponseBuilder rb{ctx, 4}; - rb.Push(ResultSuccess); rb.Push(samples_played); } @@ -205,7 +207,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr<AudioCore::AudioOut::Out> impl; - Common::ScratchBuffer<u64> tmp_buffer; + Common::ScratchBuffer<u64> released_buffer; }; AudOutU::AudOutU(Core::System& system_) diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 12845c23a..003870176 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/logging/log.h" #include "common/polyfill_ranges.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -119,23 +120,23 @@ private: auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; if (is_buffer_b) { const auto buffersB{ctx.BufferDescriptorB()}; - tmp_output.resize_destructive(buffersB[0].Size()); - tmp_performance.resize_destructive(buffersB[1].Size()); + output_buffer.resize_destructive(buffersB[0].Size()); + performance_buffer.resize_destructive(buffersB[1].Size()); } else { const auto buffersC{ctx.BufferDescriptorC()}; - tmp_output.resize_destructive(buffersC[0].Size()); - tmp_performance.resize_destructive(buffersC[1].Size()); + output_buffer.resize_destructive(buffersC[0].Size()); + performance_buffer.resize_destructive(buffersC[1].Size()); } - auto result = impl->RequestUpdate(input, tmp_performance, tmp_output); + auto result = impl->RequestUpdate(input, performance_buffer, output_buffer); if (result.IsSuccess()) { if (is_buffer_b) { - ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0); - ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1); + ctx.WriteBufferB(output_buffer.data(), output_buffer.size(), 0); + ctx.WriteBufferB(performance_buffer.data(), performance_buffer.size(), 1); } else { - ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0); - ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1); + ctx.WriteBufferC(output_buffer.data(), output_buffer.size(), 0); + ctx.WriteBufferC(performance_buffer.data(), performance_buffer.size(), 1); } } else { LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); @@ -233,8 +234,8 @@ private: Kernel::KEvent* rendered_event; Manager& manager; std::unique_ptr<Renderer> impl; - Common::ScratchBuffer<u8> tmp_output; - Common::ScratchBuffer<u8> tmp_performance; + Common::ScratchBuffer<u8> output_buffer; + Common::ScratchBuffer<u8> performance_buffer; }; class IAudioDevice final : public ServiceFramework<IAudioDevice> { diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index c835f6cb7..fa77007f3 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -11,6 +11,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/hle/service/audio/hwopus.h" #include "core/hle/service/ipc_helpers.h" @@ -68,13 +69,13 @@ private: ExtraBehavior extra_behavior) { u32 consumed = 0; u32 sample_count = 0; - tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>()); + samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>()); if (extra_behavior == ExtraBehavior::ResetContext) { ResetDecoderContext(); } - if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) { + if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { LOG_ERROR(Audio, "Failed to decode opus data"); IPC::ResponseBuilder rb{ctx, 2}; // TODO(ogniK): Use correct error code @@ -90,7 +91,7 @@ private: if (performance) { rb.Push<u64>(*performance); } - ctx.WriteBuffer(tmp_samples); + ctx.WriteBuffer(samples); } bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input, @@ -154,7 +155,7 @@ private: OpusDecoderPtr decoder; u32 sample_rate; u32 channel_count; - Common::ScratchBuffer<opus_int16> tmp_samples; + Common::ScratchBuffer<opus_int16> samples; }; class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index 348207e25..c8a880e84 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: 2021 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later -#include <cinttypes> #include "common/logging/log.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -63,12 +62,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) { } // Check device - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); const auto input_buffer = ctx.ReadBuffer(0); - const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output); + const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); + ctx.WriteBuffer(output_buffer); } IPC::ResponseBuilder rb{ctx, 3}; @@ -90,12 +89,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) { const auto input_buffer = ctx.ReadBuffer(0); const auto input_inlined_buffer = ctx.ReadBuffer(1); - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); const auto nv_result = - nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output); + nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); + ctx.WriteBuffer(output_buffer); } IPC::ResponseBuilder rb{ctx, 3}; @@ -116,12 +115,14 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) { } const auto input_buffer = ctx.ReadBuffer(0); - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); - tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1)); - const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); + inline_output_buffer.resize_destructive(ctx.GetWriteBufferSize(1)); + + const auto nv_result = + nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, inline_output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output, 0); - ctx.WriteBuffer(tmp_output_inline, 1); + ctx.WriteBuffer(output_buffer, 0); + ctx.WriteBuffer(inline_output_buffer, 1); } IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 4b593ff90..6e98115dc 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h @@ -4,6 +4,7 @@ #pragma once #include <memory> + #include "common/scratch_buffer.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/service.h" @@ -34,8 +35,8 @@ private: u64 pid{}; bool is_initialized{}; - Common::ScratchBuffer<u8> tmp_output; - Common::ScratchBuffer<u8> tmp_output_inline; + Common::ScratchBuffer<u8> output_buffer; + Common::ScratchBuffer<u8> inline_output_buffer; }; } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h index 23ba315a0..e2c9bbd50 100644 --- a/src/core/hle/service/nvnflinger/parcel.h +++ b/src/core/hle/service/nvnflinger/parcel.h @@ -6,6 +6,7 @@ #include <memory> #include <span> #include <vector> + #include <boost/container/small_vector.hpp> #include "common/alignment.h" @@ -148,9 +149,9 @@ public: this->WriteImpl(0U, m_object_buffer); } - std::vector<u8> Serialize() const { - std::vector<u8> output_buffer(sizeof(ParcelHeader) + m_data_buffer.size() + - m_object_buffer.size()); + std::span<u8> Serialize() { + m_output_buffer.resize(sizeof(ParcelHeader) + m_data_buffer.size() + + m_object_buffer.size()); ParcelHeader header{}; header.data_size = static_cast<u32>(m_data_buffer.size()); @@ -158,17 +159,17 @@ public: header.objects_size = static_cast<u32>(m_object_buffer.size()); header.objects_offset = header.data_offset + header.data_size; - std::memcpy(output_buffer.data(), &header, sizeof(header)); - std::ranges::copy(m_data_buffer, output_buffer.data() + header.data_offset); - std::ranges::copy(m_object_buffer, output_buffer.data() + header.objects_offset); + std::memcpy(m_output_buffer.data(), &header, sizeof(ParcelHeader)); + std::ranges::copy(m_data_buffer, m_output_buffer.data() + header.data_offset); + std::ranges::copy(m_object_buffer, m_output_buffer.data() + header.objects_offset); - return output_buffer; + return m_output_buffer; } private: - template <typename T> + template <typename T, size_t BufferSize> requires(std::is_trivially_copyable_v<T>) - void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) { + void WriteImpl(const T& val, boost::container::small_vector<u8, BufferSize>& buffer) { const size_t aligned_size = Common::AlignUp(sizeof(T), 4); const size_t old_size = buffer.size(); buffer.resize(old_size + aligned_size); @@ -177,8 +178,9 @@ private: } private: - boost::container::small_vector<u8, 0x200> m_data_buffer; - boost::container::small_vector<u8, 0x200> m_object_buffer; + boost::container::small_vector<u8, 0x1B0> m_data_buffer; + boost::container::small_vector<u8, 0x40> m_object_buffer; + boost::container::small_vector<u8, 0x200> m_output_buffer; }; } // namespace Service::android diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 514ba0d66..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -3,6 +3,7 @@ #include <algorithm> #include <cstring> +#include <span> #include "common/assert.h" #include "common/atomic_ops.h" @@ -13,6 +14,7 @@ #include "common/swap.h" #include "core/core.h" #include "core/device_memory.h" +#include "core/gpu_dirty_memory_manager.h" #include "core/hardware_properties.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" @@ -678,7 +680,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); }, - [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); if (ptr) { std::memcpy(ptr, &data, sizeof(T)); } @@ -692,7 +694,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); }, - [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -707,7 +709,7 @@ struct Memory::Impl { LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); }, - [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); + [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); if (ptr) { const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); @@ -717,7 +719,7 @@ struct Memory::Impl { void HandleRasterizerDownload(VAddr address, size_t size) { const size_t core = system.GetCurrentHostThreadID(); - auto& current_area = rasterizer_areas[core]; + auto& current_area = rasterizer_read_areas[core]; const VAddr end_address = address + size; if (current_area.start_address <= address && end_address <= current_area.end_address) [[likely]] { @@ -726,9 +728,31 @@ struct Memory::Impl { current_area = system.GPU().OnCPURead(address, size); } - Common::PageTable* current_page_table = nullptr; - std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; + void HandleRasterizerWrite(VAddr address, size_t size) { + const size_t core = system.GetCurrentHostThreadID(); + auto& current_area = rasterizer_write_areas[core]; + VAddr subaddress = address >> YUZU_PAGEBITS; + bool do_collection = current_area.last_address == subaddress; + if (!do_collection) [[unlikely]] { + do_collection = system.GPU().OnCPUWrite(address, size); + if (!do_collection) { + return; + } + current_area.last_address = subaddress; + } + gpu_dirty_managers[core].Collect(address, size); + } + + struct GPUDirtyState { + VAddr last_address; + }; + Core::System& system; + Common::PageTable* current_page_table = nullptr; + std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> + rasterizer_read_areas{}; + std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; + std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; }; Memory::Memory(Core::System& system_) : system{system_} { @@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); } +void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { + impl->gpu_dirty_managers = managers; +} + Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); } diff --git a/src/core/memory.h b/src/core/memory.h index 72a0be813..ea01824f8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -5,6 +5,7 @@ #include <cstddef> #include <memory> +#include <span> #include <string> #include "common/typed_address.h" #include "core/hle/result.h" @@ -15,7 +16,8 @@ struct PageTable; namespace Core { class System; -} +class GPUDirtyMemoryManager; +} // namespace Core namespace Kernel { class PhysicalMemory; @@ -458,6 +460,8 @@ public: */ void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); + void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); + private: Core::System& system; diff --git a/src/input_common/drivers/mouse.cpp b/src/input_common/drivers/mouse.cpp index f07cf8a0e..dac29c78f 100644 --- a/src/input_common/drivers/mouse.cpp +++ b/src/input_common/drivers/mouse.cpp @@ -12,9 +12,13 @@ namespace InputCommon { constexpr int update_time = 10; -constexpr float default_stick_sensitivity = 0.0044f; -constexpr float default_motion_sensitivity = 0.0003f; +constexpr float default_panning_sensitivity = 0.0010f; +constexpr float default_stick_sensitivity = 0.0006f; +constexpr float default_deadzone_counterweight = 0.01f; +constexpr float default_motion_panning_sensitivity = 2.5f; +constexpr float default_motion_sensitivity = 0.416f; constexpr float maximum_rotation_speed = 2.0f; +constexpr float maximum_stick_range = 1.5f; constexpr int mouse_axis_x = 0; constexpr int mouse_axis_y = 1; constexpr int wheel_axis_x = 2; @@ -81,7 +85,7 @@ void Mouse::UpdateThread(std::stop_token stop_token) { } void Mouse::UpdateStickInput() { - if (!Settings::values.mouse_panning) { + if (!IsMousePanningEnabled()) { return; } @@ -89,26 +93,13 @@ void Mouse::UpdateStickInput() { // Prevent input from exceeding the max range (1.0f) too much, // but allow some room to make it easier to sustain - if (length > 1.2f) { + if (length > maximum_stick_range) { last_mouse_change /= length; - last_mouse_change *= 1.2f; + last_mouse_change *= maximum_stick_range; } - auto mouse_change = last_mouse_change; - - // Bind the mouse change to [0 <= deadzone_counterweight <= 1,1] - if (length < 1.0f) { - const float deadzone_h_counterweight = - Settings::values.mouse_panning_deadzone_x_counterweight.GetValue(); - const float deadzone_v_counterweight = - Settings::values.mouse_panning_deadzone_y_counterweight.GetValue(); - mouse_change /= length; - mouse_change.x *= length + (1 - length) * deadzone_h_counterweight * 0.01f; - mouse_change.y *= length + (1 - length) * deadzone_v_counterweight * 0.01f; - } - - SetAxis(identifier, mouse_axis_x, mouse_change.x); - SetAxis(identifier, mouse_axis_y, -mouse_change.y); + SetAxis(identifier, mouse_axis_x, last_mouse_change.x); + SetAxis(identifier, mouse_axis_y, -last_mouse_change.y); // Decay input over time const float clamped_length = std::min(1.0f, length); @@ -120,14 +111,13 @@ void Mouse::UpdateStickInput() { } void Mouse::UpdateMotionInput() { - // This may need its own sensitivity instead of using the average - const float sensitivity = (Settings::values.mouse_panning_x_sensitivity.GetValue() + - Settings::values.mouse_panning_y_sensitivity.GetValue()) / - 2.0f * default_motion_sensitivity; + const float sensitivity = + IsMousePanningEnabled() ? default_motion_panning_sensitivity : default_motion_sensitivity; const float rotation_velocity = std::sqrt(last_motion_change.x * last_motion_change.x + last_motion_change.y * last_motion_change.y); + // Clamp rotation speed if (rotation_velocity > maximum_rotation_speed / sensitivity) { const float multiplier = maximum_rotation_speed / rotation_velocity / sensitivity; last_motion_change.x = last_motion_change.x * multiplier; @@ -144,7 +134,7 @@ void Mouse::UpdateMotionInput() { .delta_timestamp = update_time * 1000, }; - if (Settings::values.mouse_panning) { + if (IsMousePanningEnabled()) { last_motion_change.x = 0; last_motion_change.y = 0; } @@ -154,33 +144,42 @@ void Mouse::UpdateMotionInput() { } void Mouse::Move(int x, int y, int center_x, int center_y) { - if (Settings::values.mouse_panning) { + if (IsMousePanningEnabled()) { const auto mouse_change = (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>(); const float x_sensitivity = - Settings::values.mouse_panning_x_sensitivity.GetValue() * default_stick_sensitivity; + Settings::values.mouse_panning_x_sensitivity.GetValue() * default_panning_sensitivity; const float y_sensitivity = - Settings::values.mouse_panning_y_sensitivity.GetValue() * default_stick_sensitivity; + Settings::values.mouse_panning_y_sensitivity.GetValue() * default_panning_sensitivity; + const float deadzone_counterweight = + Settings::values.mouse_panning_deadzone_counterweight.GetValue() * + default_deadzone_counterweight; + + last_motion_change += {-mouse_change.y * x_sensitivity, -mouse_change.x * y_sensitivity, 0}; + last_mouse_change.x += mouse_change.x * x_sensitivity; + last_mouse_change.y += mouse_change.y * y_sensitivity; - last_motion_change += {-mouse_change.y, -mouse_change.x, 0}; - last_mouse_change.x += mouse_change.x * x_sensitivity * 0.09f; - last_mouse_change.y += mouse_change.y * y_sensitivity * 0.09f; + // Bind the mouse change to [0 <= deadzone_counterweight <= 1.0] + if (last_mouse_change.Length() < deadzone_counterweight) { + last_mouse_change /= last_mouse_change.Length(); + last_mouse_change *= deadzone_counterweight; + } return; } if (button_pressed) { const auto mouse_move = Common::MakeVec<int>(x, y) - mouse_origin; - const float x_sensitivity = Settings::values.mouse_panning_x_sensitivity.GetValue(); - const float y_sensitivity = Settings::values.mouse_panning_y_sensitivity.GetValue(); - SetAxis(identifier, mouse_axis_x, - static_cast<float>(mouse_move.x) * x_sensitivity * 0.0012f); - SetAxis(identifier, mouse_axis_y, - static_cast<float>(-mouse_move.y) * y_sensitivity * 0.0012f); + const float x_sensitivity = + Settings::values.mouse_panning_x_sensitivity.GetValue() * default_stick_sensitivity; + const float y_sensitivity = + Settings::values.mouse_panning_y_sensitivity.GetValue() * default_stick_sensitivity; + SetAxis(identifier, mouse_axis_x, static_cast<float>(mouse_move.x) * x_sensitivity); + SetAxis(identifier, mouse_axis_y, static_cast<float>(-mouse_move.y) * y_sensitivity); last_motion_change = { - static_cast<float>(-mouse_move.y) / 50.0f, - static_cast<float>(-mouse_move.x) / 50.0f, + static_cast<float>(-mouse_move.y) * x_sensitivity, + static_cast<float>(-mouse_move.x) * y_sensitivity, last_motion_change.z, }; } @@ -220,7 +219,7 @@ void Mouse::ReleaseButton(MouseButton button) { SetButton(real_mouse_identifier, static_cast<int>(button), false); SetButton(touch_identifier, static_cast<int>(button), false); - if (!Settings::values.mouse_panning) { + if (!IsMousePanningEnabled()) { SetAxis(identifier, mouse_axis_x, 0); SetAxis(identifier, mouse_axis_y, 0); } @@ -234,7 +233,7 @@ void Mouse::ReleaseButton(MouseButton button) { void Mouse::MouseWheelChange(int x, int y) { wheel_position.x += x; wheel_position.y += y; - last_motion_change.z += static_cast<f32>(y) / 100.0f; + last_motion_change.z += static_cast<f32>(y); SetAxis(identifier, wheel_axis_x, static_cast<f32>(wheel_position.x)); SetAxis(identifier, wheel_axis_y, static_cast<f32>(wheel_position.y)); } @@ -244,6 +243,11 @@ void Mouse::ReleaseAllButtons() { button_pressed = false; } +bool Mouse::IsMousePanningEnabled() { + // Disable mouse panning when a real mouse is connected + return Settings::values.mouse_panning && !Settings::values.mouse_enabled; +} + std::vector<Common::ParamPackage> Mouse::GetInputDevices() const { std::vector<Common::ParamPackage> devices; devices.emplace_back(Common::ParamPackage{ diff --git a/src/input_common/drivers/mouse.h b/src/input_common/drivers/mouse.h index 0e8edcce1..2b93a40b9 100644 --- a/src/input_common/drivers/mouse.h +++ b/src/input_common/drivers/mouse.h @@ -99,6 +99,8 @@ private: void UpdateStickInput(); void UpdateMotionInput(); + bool IsMousePanningEnabled(); + Common::Input::ButtonNames GetUIButtonName(const Common::ParamPackage& params) const; Common::Vec2<int> mouse_origin; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 58a45ab67..b5ed3380f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { template <class P> void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { - memory_tracker.CachedCpuWrite(cpu_addr, size); + const bool is_dirty = IsRegionRegistered(cpu_addr, size); + if (!is_dirty) { + return; + } + VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); + VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); + if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { + WriteMemory(cpu_addr, size); + return; + } + + tmp_buffer.resize_destructive(size); + cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); + + InlineMemoryImplementation(cpu_addr, size, tmp_buffer); +} + +template <class P> +bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { + const bool is_dirty = IsRegionRegistered(cpu_addr, size); + if (!is_dirty) { + return false; + } + if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { + return true; + } + WriteMemory(cpu_addr, size); + return false; } template <class P> @@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, return false; } + InlineMemoryImplementation(dest_address, copy_size, inlined_buffer); + + return true; +} + +template <class P> +void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, + std::span<const u8> inlined_buffer) { const IntervalType subtract_interval{dest_address, dest_address + copy_size}; ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); @@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, } else { buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); } - - return true; } template <class P> diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index fe6068cfe..460fc7551 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -245,6 +245,8 @@ public: void CachedWriteMemory(VAddr cpu_addr, u64 size); + bool OnCPUWrite(VAddr cpu_addr, u64 size); + void DownloadMemory(VAddr cpu_addr, u64 size); std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); @@ -543,6 +545,9 @@ private: void ClearDownload(IntervalType subtract_interval); + void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, + std::span<const u8> inlined_buffer); + VideoCore::RasterizerInterface& rasterizer; Core::Memory::Memory& cpu_memory; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a290d6ea7..f8598fd98 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -174,8 +174,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_operand.address = regs.offset_in; DMA::BufferOperand dst_operand; - u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); - dst_operand.pitch = abs_pitch_out; + dst_operand.pitch = static_cast<u32>(std::abs(regs.pitch_out)); dst_operand.width = regs.line_length_in; dst_operand.height = regs.line_count; dst_operand.address = regs.offset_out; @@ -222,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; + const size_t dst_size = dst_operand.pitch * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); @@ -231,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - abs_pitch_out); + dst_operand.pitch); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 35d699bbf..ab20ff30f 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -69,7 +69,6 @@ public: } void SignalFence(std::function<void()>&& func) { - rasterizer.InvalidateGPUCache(); bool delay_fence = Settings::IsGPULevelHigh(); if constexpr (!can_async_check) { TryReleasePendingFences<false>(); @@ -96,6 +95,7 @@ public: guard.unlock(); cv.notify_all(); } + rasterizer.InvalidateGPUCache(); } void SignalSyncPoint(u32 value) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index db385076d..c192e33b2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -95,7 +95,9 @@ struct GPU::Impl { /// Synchronizes CPU writes with Host GPU memory. void InvalidateGPUCache() { - rasterizer->InvalidateGPUCache(); + std::function<void(VAddr, size_t)> callback_writes( + [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); + system.GatherGPUDirtyMemory(callback_writes); } /// Signal the ending of command list. @@ -299,6 +301,10 @@ struct GPU::Impl { gpu_thread.InvalidateRegion(addr, size); } + bool OnCPUWrite(VAddr addr, u64 size) { + return rasterizer->OnCPUWrite(addr, size); + } + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size) { gpu_thread.FlushAndInvalidateRegion(addr, size); @@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) { impl->InvalidateRegion(addr, size); } +bool GPU::OnCPUWrite(VAddr addr, u64 size) { + return impl->OnCPUWrite(addr, size); +} + void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { impl->FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e49c40cf2..ba2838b89 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -250,6 +250,10 @@ public: /// Notify rasterizer that any caches of the specified region should be invalidated void InvalidateRegion(VAddr addr, u64 size); + /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is + /// sensible, false otherwise + bool OnCPUWrite(VAddr addr, u64 size); + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 889144f38..2f0f9f593 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { rasterizer->FlushRegion(flush->addr, flush->size); } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { - rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); + rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); } else { ASSERT(false); } @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() { } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { - rasterizer->OnCPUWrite(addr, size); + rasterizer->OnCacheInvalidation(addr, size); } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important - rasterizer->OnCPUWrite(addr, size); + rasterizer->OnCacheInvalidation(addr, size); } u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index cd6a3a9b8..da07a556f 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -290,7 +290,7 @@ void Codec::Decode() { return vp9_decoder->GetFrameBytes(); default: ASSERT(false); - return std::vector<u8>{}; + return std::span<const u8>{}; } }(); AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index ce827eb6c..862904e39 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -29,15 +29,15 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {} H264::~H264() = default; -const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, - bool is_first_frame) { +std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, + bool is_first_frame) { H264DecoderContext context; host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.stream_len); + frame.resize_destructive(context.stream_len); host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); return frame; } @@ -135,14 +135,14 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); std::span<const u8> matrix{context.weight_scale}; - writer.WriteScalingList(matrix, index * 16, 16); + writer.WriteScalingList(scan, matrix, index * 16, 16); } if (context.h264_parameter_set.transform_8x8_mode_flag) { for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); std::span<const u8> matrix{context.weight_scale_8x8}; - writer.WriteScalingList(matrix, index * 64, 64); + writer.WriteScalingList(scan, matrix, index * 64, 64); } } @@ -188,8 +188,8 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { - static Common::ScratchBuffer<u8> scan{}; +void H264BitWriter::WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list, + s32 start, s32 count) { scan.resize_destructive(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index 5cc86454e..d6b556322 100644 --- a/src/video_core/host1x/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h @@ -5,9 +5,11 @@ #include <span> #include <vector> + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -37,7 +39,8 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(std::span<const u8> list, s32 start, s32 count); + void WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list, s32 start, + s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -63,11 +66,12 @@ public: ~H264(); /// Compose the H264 frame for FFmpeg decoding - [[nodiscard]] const std::vector<u8>& ComposeFrame( - const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); + [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); private: - std::vector<u8> frame; + Common::ScratchBuffer<u8> frame; + Common::ScratchBuffer<u8> scan; Host1x::Host1x& host1x; struct H264ParameterSet { diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index 28fb12cb8..ee6392ff9 100644 --- a/src/video_core/host1x/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp @@ -12,7 +12,7 @@ VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {} VP8::~VP8() = default; -const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { +std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { VP8PictureInfo info; host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h index 5bf07ecab..7926b73f3 100644 --- a/src/video_core/host1x/codecs/vp8.h +++ b/src/video_core/host1x/codecs/vp8.h @@ -4,10 +4,11 @@ #pragma once #include <array> -#include <vector> +#include <span> #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -24,11 +25,11 @@ public: ~VP8(); /// Compose the VP8 frame for FFmpeg decoding - [[nodiscard]] const std::vector<u8>& ComposeFrame( + [[nodiscard]] std::span<const u8> ComposeFrame( const Host1x::NvdecCommon::NvdecRegisters& state); private: - std::vector<u8> frame; + Common::ScratchBuffer<u8> frame; Host1x::Host1x& host1x; struct VP8PictureInfo { diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index cf40c9012..306c3d0e8 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -3,6 +3,7 @@ #include <algorithm> // for std::copy #include <numeric> + #include "common/assert.h" #include "video_core/host1x/codecs/vp9.h" #include "video_core/host1x/host1x.h" diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h index d4083e8d3..f1ed19508 100644 --- a/src/video_core/host1x/codecs/vp9.h +++ b/src/video_core/host1x/codecs/vp9.h @@ -4,9 +4,11 @@ #pragma once #include <array> +#include <span> #include <vector> #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "common/stream.h" #include "video_core/host1x/codecs/vp9_types.h" #include "video_core/host1x/nvdec_common.h" @@ -128,8 +130,8 @@ public: return !current_frame_info.show_frame; } - /// Returns a const reference to the composed frame data. - [[nodiscard]] const std::vector<u8>& GetFrameBytes() const { + /// Returns a const span to the composed frame data. + [[nodiscard]] std::span<const u8> GetFrameBytes() const { return frame; } @@ -181,7 +183,7 @@ private: [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); Host1x::Host1x& host1x; - std::vector<u8> frame; + Common::ScratchBuffer<u8> frame; std::array<s8, 4> loop_filter_ref_deltas{}; std::array<s8, 2> loop_filter_mode_deltas{}; diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h index adad8ed7e..cc9b25690 100644 --- a/src/video_core/host1x/codecs/vp9_types.h +++ b/src/video_core/host1x/codecs/vp9_types.h @@ -5,6 +5,7 @@ #include <array> #include <vector> + #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 7566a8c4e..cb8029a4f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -109,7 +109,9 @@ public: } /// Notify rasterizer that any caches of the specified region are desync with guest - virtual void OnCPUWrite(VAddr addr, u64 size) = 0; + virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; + + virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; /// Sync memory between guest and host. virtual void InvalidateGPUCache() = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index bf2ce4c49..92ecf6682 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp return false; } void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} -void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} +bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { + return false; +} +void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { VideoCore::RasterizerDownloadArea new_area{ .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a8d35d2c1..93b9a6971 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -53,7 +53,8 @@ public: VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; - void OnCPUWrite(VAddr addr, u64 size) override; + void OnCacheInvalidation(VAddr addr, u64 size) override; + bool OnCPUWrite(VAddr addr, u64 size) override; VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index edf527f2d..aadd6967c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache } } -void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { +bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (addr == 0 || size == 0) { + return false; + } + + { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.OnCPUWrite(addr, size)) { + return true; + } + } + + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.WriteMemory(addr, size); + } + + shader_cache.InvalidateRegion(addr, size); + return false; +} + +void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); if (addr == 0 || size == 0) { return; } - shader_cache.OnCPUWrite(addr, size); { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.CachedWriteMemory(addr, size); } + shader_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::InvalidateGPUCache() { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - shader_cache.SyncGuestHost(); - { - std::scoped_lock lock{buffer_cache.mutex}; - buffer_cache.FlushCachedWrites(); - } + gpu.InvalidateGPUCache(); } void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - shader_cache.OnCPUWrite(addr, size); + shader_cache.OnCacheInvalidation(addr, size); } void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a73ad15c1..8eda2ddba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -98,7 +98,8 @@ public: VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; - void OnCPUWrite(VAddr addr, u64 size) override; + void OnCacheInvalidation(VAddr addr, u64 size) override; + bool OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index b72f95235..51df18ec3 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -591,7 +591,7 @@ void BufferCacheRuntime::ReserveNullBuffer() { .flags = 0, .size = 4, .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -599,7 +599,6 @@ void BufferCacheRuntime::ReserveNullBuffer() { if (device.IsExtTransformFeedbackSupported()) { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } - create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { null_buffer.SetObjectNameEXT("Null buffer"); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f7c0d939a..456bb040e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s } } -void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { +bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return false; + } + + { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.OnCPUWrite(addr, size)) { + return true; + } + } + + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.WriteMemory(addr, size); + } + + pipeline_cache.InvalidateRegion(addr, size); + return false; +} + +void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - pipeline_cache.OnCPUWrite(addr, size); + { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); @@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.CachedWriteMemory(addr, size); } + pipeline_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::InvalidateGPUCache() { - pipeline_cache.SyncGuestHost(); - { - std::scoped_lock lock{buffer_cache.mutex}; - buffer_cache.FlushCachedWrites(); - } + gpu.InvalidateGPUCache(); } void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { @@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - pipeline_cache.OnCPUWrite(addr, size); + pipeline_cache.OnCacheInvalidation(addr, size); } void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b39710b3c..73257d964 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -96,7 +96,8 @@ public: void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; - void OnCPUWrite(VAddr addr, u64 size) override; + void OnCacheInvalidation(VAddr addr, u64 size) override; + bool OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 4db948b6d..01701201d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { RemovePendingShaders(); } -void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { +void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) { std::scoped_lock lock{invalidation_mutex}; InvalidatePagesInRegion(addr, size); } diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index f3cc4c70b..de8e08002 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -62,7 +62,7 @@ public: /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, size_t size); + void OnCacheInvalidation(VAddr addr, size_t size); /// @brief Flushes delayed removal operations void SyncGuestHost(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8190f3ba1..79f158db4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -865,11 +865,15 @@ void TextureCache<P>::PopAsyncFlushes() { template <class P> ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); - const ImageId image_id = FindDMAImage(dst_info, operand.address); - if (!image_id) { + const ImageId dst_id = FindDMAImage(dst_info, operand.address); + if (!dst_id) { + return NULL_IMAGE_ID; + } + auto& image = slot_images[dst_id]; + if (False(image.flags & ImageFlagBits::GpuModified)) { + // No need to waste time on an image that's synced with guest return NULL_IMAGE_ID; } - auto& image = slot_images[image_id]; if (image.info.type == ImageType::e3D) { // Don't accelerate 3D images. return NULL_IMAGE_ID; @@ -883,7 +887,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo if (!base) { return NULL_IMAGE_ID; } - return image_id; + return dst_id; } template <class P> diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 7624a9b32..6a294c1da 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -19,11 +19,9 @@ #include <windows.h> // ensure include order #include <vulkan/vulkan_win32.h> -#elif defined(__APPLE__) -#include <vulkan/vulkan_macos.h> #elif defined(__ANDROID__) #include <vulkan/vulkan_android.h> -#else +#elif !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -68,7 +66,7 @@ namespace { break; #elif defined(__APPLE__) case Core::Frontend::WindowSystemType::Cocoa: - extensions.push_back(VK_MVK_MACOS_SURFACE_EXTENSION_NAME); + extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME); break; #elif defined(__ANDROID__) case Core::Frontend::WindowSystemType::Android: diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp index c34599365..cfea4cd7b 100644 --- a/src/video_core/vulkan_common/vulkan_surface.cpp +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -11,11 +11,9 @@ #include <windows.h> // ensure include order #include <vulkan/vulkan_win32.h> -#elif defined(__APPLE__) -#include <vulkan/vulkan_macos.h> #elif defined(__ANDROID__) #include <vulkan/vulkan_android.h> -#else +#elif !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -44,12 +42,13 @@ vk::SurfaceKHR CreateSurface( } #elif defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::Cocoa) { - const VkMacOSSurfaceCreateInfoMVK mvk_ci{VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK, - nullptr, 0, window_info.render_surface}; - const auto vkCreateMacOSSurfaceMVK = reinterpret_cast<PFN_vkCreateMacOSSurfaceMVK>( - dld.vkGetInstanceProcAddr(*instance, "vkCreateMacOSSurfaceMVK")); - if (!vkCreateMacOSSurfaceMVK || - vkCreateMacOSSurfaceMVK(*instance, &mvk_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { + const VkMetalSurfaceCreateInfoEXT macos_ci = { + .pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface), + }; + const auto vkCreateMetalSurfaceEXT = reinterpret_cast<PFN_vkCreateMetalSurfaceEXT>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateMetalSurfaceEXT")); + if (!vkCreateMetalSurfaceEXT || + vkCreateMetalSurfaceEXT(*instance, &macos_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { LOG_ERROR(Render_Vulkan, "Failed to initialize Metal surface"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index b5e70fcd4..32bd75ad8 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -15,6 +15,8 @@ #define VK_NO_PROTOTYPES #ifdef _WIN32 #define VK_USE_PLATFORM_WIN32_KHR +#elif defined(__APPLE__) +#define VK_USE_PLATFORM_METAL_EXT #endif #include <vulkan/vulkan.h> diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 29467d380..195d3556c 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -503,8 +503,7 @@ void Config::ReadMousePanningValues() { ReadBasicSetting(Settings::values.mouse_panning); ReadBasicSetting(Settings::values.mouse_panning_x_sensitivity); ReadBasicSetting(Settings::values.mouse_panning_y_sensitivity); - ReadBasicSetting(Settings::values.mouse_panning_deadzone_x_counterweight); - ReadBasicSetting(Settings::values.mouse_panning_deadzone_y_counterweight); + ReadBasicSetting(Settings::values.mouse_panning_deadzone_counterweight); ReadBasicSetting(Settings::values.mouse_panning_decay_strength); ReadBasicSetting(Settings::values.mouse_panning_min_decay); } @@ -1122,8 +1121,7 @@ void Config::SaveMousePanningValues() { // Don't overwrite values.mouse_panning WriteBasicSetting(Settings::values.mouse_panning_x_sensitivity); WriteBasicSetting(Settings::values.mouse_panning_y_sensitivity); - WriteBasicSetting(Settings::values.mouse_panning_deadzone_x_counterweight); - WriteBasicSetting(Settings::values.mouse_panning_deadzone_y_counterweight); + WriteBasicSetting(Settings::values.mouse_panning_deadzone_counterweight); WriteBasicSetting(Settings::values.mouse_panning_decay_strength); WriteBasicSetting(Settings::values.mouse_panning_min_decay); } diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index 43f6c7b50..611a79477 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui @@ -3105,21 +3105,6 @@ </property> <item> <widget class="QPushButton" name="mousePanningButton"> - <property name="minimumSize"> - <size> - <width>68</width> - <height>0</height> - </size> - </property> - <property name="maximumSize"> - <size> - <width>68</width> - <height>16777215</height> - </size> - </property> - <property name="styleSheet"> - <string notr="true">min-width: 68px;</string> - </property> <property name="text"> <string>Configure</string> </property> diff --git a/src/yuzu/configuration/configure_mouse_panning.cpp b/src/yuzu/configuration/configure_mouse_panning.cpp index f183d2740..e37c546b0 100644 --- a/src/yuzu/configuration/configure_mouse_panning.cpp +++ b/src/yuzu/configuration/configure_mouse_panning.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <QCloseEvent> +#include <QMessageBox> #include "common/settings.h" #include "ui_configure_mouse_panning.h" @@ -27,31 +28,34 @@ void ConfigureMousePanning::SetConfiguration(float right_stick_deadzone, float r ui->enable->setChecked(Settings::values.mouse_panning.GetValue()); ui->x_sensitivity->setValue(Settings::values.mouse_panning_x_sensitivity.GetValue()); ui->y_sensitivity->setValue(Settings::values.mouse_panning_y_sensitivity.GetValue()); - ui->deadzone_x_counterweight->setValue( - Settings::values.mouse_panning_deadzone_x_counterweight.GetValue()); - ui->deadzone_y_counterweight->setValue( - Settings::values.mouse_panning_deadzone_y_counterweight.GetValue()); + ui->deadzone_counterweight->setValue( + Settings::values.mouse_panning_deadzone_counterweight.GetValue()); ui->decay_strength->setValue(Settings::values.mouse_panning_decay_strength.GetValue()); ui->min_decay->setValue(Settings::values.mouse_panning_min_decay.GetValue()); if (right_stick_deadzone > 0.0f || right_stick_range != 1.0f) { - ui->warning_label->setText(QString::fromStdString( - "Mouse panning works better with a deadzone of 0% and a range of 100%.\n" - "Current values are " + - std::to_string(static_cast<int>(right_stick_deadzone * 100.0f)) + "% and " + - std::to_string(static_cast<int>(right_stick_range * 100.0f)) + "% respectively.")); - } else { - ui->warning_label->hide(); + const QString right_stick_deadzone_str = + QString::fromStdString(std::to_string(static_cast<int>(right_stick_deadzone * 100.0f))); + const QString right_stick_range_str = + QString::fromStdString(std::to_string(static_cast<int>(right_stick_range * 100.0f))); + + ui->warning_label->setText( + tr("Mouse panning works better with a deadzone of 0% and a range of 100%.\nCurrent " + "values are %1% and %2% respectively.") + .arg(right_stick_deadzone_str, right_stick_range_str)); + } + + if (Settings::values.mouse_enabled) { + ui->warning_label->setText( + tr("Emulated mouse is enabled. This is incompatible with mouse panning.")); } } void ConfigureMousePanning::SetDefaultConfiguration() { ui->x_sensitivity->setValue(Settings::values.mouse_panning_x_sensitivity.GetDefault()); ui->y_sensitivity->setValue(Settings::values.mouse_panning_y_sensitivity.GetDefault()); - ui->deadzone_x_counterweight->setValue( - Settings::values.mouse_panning_deadzone_x_counterweight.GetDefault()); - ui->deadzone_y_counterweight->setValue( - Settings::values.mouse_panning_deadzone_y_counterweight.GetDefault()); + ui->deadzone_counterweight->setValue( + Settings::values.mouse_panning_deadzone_counterweight.GetDefault()); ui->decay_strength->setValue(Settings::values.mouse_panning_decay_strength.GetDefault()); ui->min_decay->setValue(Settings::values.mouse_panning_min_decay.GetDefault()); } @@ -68,12 +72,19 @@ void ConfigureMousePanning::ApplyConfiguration() { Settings::values.mouse_panning = ui->enable->isChecked(); Settings::values.mouse_panning_x_sensitivity = static_cast<float>(ui->x_sensitivity->value()); Settings::values.mouse_panning_y_sensitivity = static_cast<float>(ui->y_sensitivity->value()); - Settings::values.mouse_panning_deadzone_x_counterweight = - static_cast<float>(ui->deadzone_x_counterweight->value()); - Settings::values.mouse_panning_deadzone_y_counterweight = - static_cast<float>(ui->deadzone_y_counterweight->value()); + Settings::values.mouse_panning_deadzone_counterweight = + static_cast<float>(ui->deadzone_counterweight->value()); Settings::values.mouse_panning_decay_strength = static_cast<float>(ui->decay_strength->value()); Settings::values.mouse_panning_min_decay = static_cast<float>(ui->min_decay->value()); + if (Settings::values.mouse_enabled && Settings::values.mouse_panning) { + Settings::values.mouse_panning = false; + QMessageBox::critical( + this, tr("Emulated mouse is enabled"), + tr("Real mouse input and mouse panning are incompatible. Please disable the " + "emulated mouse in input advanced settings to allow mouse panning.")); + return; + } + accept(); } diff --git a/src/yuzu/configuration/configure_mouse_panning.ui b/src/yuzu/configuration/configure_mouse_panning.ui index 75795b727..84fb7ee80 100644 --- a/src/yuzu/configuration/configure_mouse_panning.ui +++ b/src/yuzu/configuration/configure_mouse_panning.ui @@ -9,10 +9,10 @@ <item> <widget class="QCheckBox" name="enable"> <property name="text"> - <string>Enable</string> + <string>Enable mouse panning</string> </property> <property name="toolTip"> - <string>Can be toggled via a hotkey</string> + <string>Can be toggled via a hotkey. Default hotkey is Ctrl + F9</string> </property> </widget> </item> @@ -89,40 +89,14 @@ </property> <layout class="QGridLayout"> <item row="0" column="0"> - <widget class="QLabel" name="deadzone_x_counterweight_label"> + <widget class="QLabel" name="deadzone_counterweight_label"> <property name="text"> - <string>Horizontal</string> + <string>Deadzone</string> </property> </widget> </item> <item row="0" column="1"> - <widget class="QSpinBox" name="deadzone_x_counterweight"> - <property name="alignment"> - <set>Qt::AlignCenter</set> - </property> - <property name="suffix"> - <string>%</string> - </property> - <property name="minimum"> - <number>0</number> - </property> - <property name="maximum"> - <number>100</number> - </property> - <property name="value"> - <number>0</number> - </property> - </widget> - </item> - <item row="1" column="0"> - <widget class="QLabel" name="deadzone_y_counterweight_label"> - <property name="text"> - <string>Vertical</string> - </property> - </widget> - </item> - <item row="1" column="1"> - <widget class="QSpinBox" name="deadzone_y_counterweight"> + <widget class="QSpinBox" name="deadzone_counterweight"> <property name="alignment"> <set>Qt::AlignCenter</set> </property> diff --git a/src/yuzu/qt_common.cpp b/src/yuzu/qt_common.cpp index 5d0fd7674..413402165 100644 --- a/src/yuzu/qt_common.cpp +++ b/src/yuzu/qt_common.cpp @@ -10,6 +10,8 @@ #if !defined(WIN32) && !defined(__APPLE__) #include <qpa/qplatformnativeinterface.h> +#elif defined(__APPLE__) +#include <objc/message.h> #endif namespace QtCommon { @@ -37,9 +39,12 @@ Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) Core::Frontend::EmuWindow::WindowSystemInfo wsi; wsi.type = GetWindowSystemType(); +#if defined(WIN32) // Our Win32 Qt external doesn't have the private API. -#if defined(WIN32) || defined(__APPLE__) - wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; + wsi.render_surface = reinterpret_cast<void*>(window->winId()); +#elif defined(__APPLE__) + wsi.render_surface = reinterpret_cast<void* (*)(id, SEL)>(objc_msgSend)( + reinterpret_cast<id>(window->winId()), sel_registerName("layer")); #else QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); wsi.display_connection = pni->nativeResourceForWindow("display", window); diff --git a/src/yuzu/vk_device_info.cpp b/src/yuzu/vk_device_info.cpp index 7c26a3dc7..e1a0e6a2a 100644 --- a/src/yuzu/vk_device_info.cpp +++ b/src/yuzu/vk_device_info.cpp @@ -26,7 +26,10 @@ Record::~Record() = default; void PopulateRecords(std::vector<Record>& records, QWindow* window) try { using namespace Vulkan; - auto wsi = QtCommon::GetWindowSystemInfo(window); + // Create a test window with a Vulkan surface type for checking present modes. + QWindow test_window(window); + test_window.setSurfaceType(QWindow::VulkanSurface); + auto wsi = QtCommon::GetWindowSystemInfo(&test_window); vk::InstanceDispatch dld; const auto library = OpenLibrary(); |