diff options
Diffstat (limited to 'src')
78 files changed, 2007 insertions, 911 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fbebed715..eeceaa655 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -106,6 +106,8 @@ add_library(common STATIC common_funcs.h common_paths.h common_types.h + dynamic_library.cpp + dynamic_library.h file_util.cpp file_util.h hash.h diff --git a/src/common/dynamic_library.cpp b/src/common/dynamic_library.cpp new file mode 100644 index 000000000..7ab54e9e4 --- /dev/null +++ b/src/common/dynamic_library.cpp @@ -0,0 +1,106 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include <cstring> +#include <string> +#include <utility> + +#include <fmt/format.h> + +#include "common/dynamic_library.h" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <dlfcn.h> +#endif + +namespace Common { + +DynamicLibrary::DynamicLibrary() = default; + +DynamicLibrary::DynamicLibrary(const char* filename) { + Open(filename); +} + +DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)} {} + +DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept { + Close(); + handle = std::exchange(rhs.handle, nullptr); + return *this; +} + +DynamicLibrary::~DynamicLibrary() { + Close(); +} + +std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) { +#if defined(_WIN32) + return std::string(filename) + ".dll"; +#elif defined(__APPLE__) + return std::string(filename) + ".dylib"; +#else + return std::string(filename) + ".so"; +#endif +} + +std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) { +#if defined(_WIN32) + if (major >= 0 && minor >= 0) + return fmt::format("{}-{}-{}.dll", libname, major, minor); + else if (major >= 0) + return fmt::format("{}-{}.dll", libname, major); + else + return fmt::format("{}.dll", libname); +#elif defined(__APPLE__) + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.{}.dylib", prefix, libname, major); + else + return fmt::format("{}{}.dylib", prefix, libname); +#else + const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; + if (major >= 0 && minor >= 0) + return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor); + else if (major >= 0) + return fmt::format("{}{}.so.{}", prefix, libname, major); + else + return fmt::format("{}{}.so", prefix, libname); +#endif +} + +bool DynamicLibrary::Open(const char* filename) { +#ifdef _WIN32 + handle = reinterpret_cast<void*>(LoadLibraryA(filename)); +#else + handle = dlopen(filename, RTLD_NOW); +#endif + return handle != nullptr; +} + +void DynamicLibrary::Close() { + if (!IsOpen()) + return; + +#ifdef _WIN32 + FreeLibrary(reinterpret_cast<HMODULE>(handle)); +#else + dlclose(handle); +#endif + handle = nullptr; +} + +void* DynamicLibrary::GetSymbolAddress(const char* name) const { +#ifdef _WIN32 + return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name)); +#else + return reinterpret_cast<void*>(dlsym(handle, name)); +#endif +} + +} // namespace Common diff --git a/src/common/dynamic_library.h b/src/common/dynamic_library.h new file mode 100644 index 000000000..2a06372fd --- /dev/null +++ b/src/common/dynamic_library.h @@ -0,0 +1,75 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +namespace Common { + +/** + * Provides a platform-independent interface for loading a dynamic library and retrieving symbols. + * The interface maintains an internal reference count to allow one handle to be shared between + * multiple users. + */ +class DynamicLibrary final { +public: + /// Default constructor, does not load a library. + explicit DynamicLibrary(); + + /// Automatically loads the specified library. Call IsOpen() to check validity before use. + explicit DynamicLibrary(const char* filename); + + /// Moves the library. + DynamicLibrary(DynamicLibrary&&) noexcept; + DynamicLibrary& operator=(DynamicLibrary&&) noexcept; + + /// Delete copies, we can't copy a dynamic library. + DynamicLibrary(const DynamicLibrary&) = delete; + DynamicLibrary& operator=(const DynamicLibrary&) = delete; + + /// Closes the library. + ~DynamicLibrary(); + + /// Returns the specified library name with the platform-specific suffix added. + static std::string GetUnprefixedFilename(const char* filename); + + /// Returns the specified library name in platform-specific format. + /// Major/minor versions will not be included if set to -1. + /// If libname already contains the "lib" prefix, it will not be added again. + /// Windows: LIBNAME-MAJOR-MINOR.dll + /// Linux: libLIBNAME.so.MAJOR.MINOR + /// Mac: libLIBNAME.MAJOR.MINOR.dylib + static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1); + + /// Returns true if a module is loaded, otherwise false. + bool IsOpen() const { + return handle != nullptr; + } + + /// Loads (or replaces) the handle with the specified library file name. + /// Returns true if the library was loaded and can be used. + bool Open(const char* filename); + + /// Unloads the library, any function pointers from this library are no longer valid. + void Close(); + + /// Returns the address of the specified symbol (function or variable) as an untyped pointer. + /// If the specified symbol does not exist in this library, nullptr is returned. + void* GetSymbolAddress(const char* name) const; + + /// Obtains the address of the specified symbol, automatically casting to the correct type. + /// Returns true if the symbol was found and assigned, otherwise false. + template <typename T> + bool GetSymbol(const char* name, T* ptr) const { + *ptr = reinterpret_cast<T>(GetSymbolAddress(name)); + return *ptr != nullptr; + } + +private: + /// Platform-dependent data type representing a dynamic library handle. + void* handle = nullptr; +}; + +} // namespace Common diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 41167f57a..35eee0096 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <array> +#include <limits> #include <memory> #include <sstream> #include <unordered_map> @@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) { std::optional<std::string> GetCurrentDir() { // Get the current working directory (getcwd uses malloc) #ifdef _WIN32 - wchar_t* dir; - if (!(dir = _wgetcwd(nullptr, 0))) { + wchar_t* dir = _wgetcwd(nullptr, 0); + if (!dir) { #else - char* dir; - if (!(dir = getcwd(nullptr, 0))) { + char* dir = getcwd(nullptr, 0); + if (!dir) { #endif LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); return {}; @@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept { bool IOFile::Open(const std::string& filename, const char openmode[], int flags) { Close(); + bool m_good; #ifdef _WIN32 if (flags != 0) { m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(), Common::UTF8ToUTF16W(openmode).c_str(), flags); + m_good = m_file != nullptr; } else { - _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), - Common::UTF8ToUTF16W(openmode).c_str()); + m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), + Common::UTF8ToUTF16W(openmode).c_str()) == 0; } #else - m_file = fopen(filename.c_str(), openmode); + m_file = std::fopen(filename.c_str(), openmode); + m_good = m_file != nullptr; #endif - return IsOpen(); + return m_good; } bool IOFile::Close() { @@ -956,7 +960,7 @@ u64 IOFile::Tell() const { if (IsOpen()) return ftello(m_file); - return -1; + return std::numeric_limits<u64>::max(); } bool IOFile::Flush() { diff --git a/src/common/thread.cpp b/src/common/thread.cpp index fe7a420cc..0cd2d10bf 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -28,11 +28,8 @@ namespace Common { #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. -// Uses undocumented (actually, it is now documented) trick. -// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp - -// This is implemented much nicer in upcoming msvc++, see: -// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx +// Uses trick documented in: +// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code void SetCurrentThreadName(const char* name) { static const DWORD MS_VC_EXCEPTION = 0x406D1388; @@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) { info.dwType = 0x1000; info.szName = name; - info.dwThreadID = -1; // dwThreadID; + info.dwThreadID = std::numeric_limits<DWORD>::max(); info.dwFlags = 0; __try { diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 72294d4d8..13aa14934 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -12,6 +12,15 @@ namespace Core::Frontend { +/// Information for the Graphics Backends signifying what type of screen pointer is in +/// WindowInformation +enum class WindowSystemType { + Headless, + Windows, + X11, + Wayland, +}; + /** * Represents a drawing context that supports graphics operations. */ @@ -76,6 +85,23 @@ public: std::pair<unsigned, unsigned> min_client_area_size; }; + /// Data describing host window system information + struct WindowSystemInfo { + // Window system type. Determines which GL context or Vulkan WSI is used. + WindowSystemType type = WindowSystemType::Headless; + + // Connection to a display server. This is used on X11 and Wayland platforms. + void* display_connection = nullptr; + + // Render surface. This is a pointer to the native window handle, which depends + // on the platform. e.g. HWND for Windows, Window for X11. If the surface is + // set to nullptr, the video backend will run in headless mode. + void* render_surface = nullptr; + + // Scale of the render surface. For hidpi systems, this will be >1. + float render_surface_scale = 1.0f; + }; + /// Polls window events virtual void PollEvents() = 0; @@ -87,10 +113,6 @@ public: /// Returns if window is shown (not minimized) virtual bool IsShown() const = 0; - /// Retrieves Vulkan specific handlers from the window - virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const = 0; - /** * Signal that a touch pressed event has occurred (e.g. mouse click pressed) * @param framebuffer_x Framebuffer x-coordinate that was pressed @@ -128,6 +150,13 @@ public: } /** + * Returns system information about the drawing area. + */ + const WindowSystemInfo& GetWindowInfo() const { + return window_info; + } + + /** * Gets the framebuffer layout (width, height, and screen regions) * @note This method is thread-safe */ @@ -142,7 +171,7 @@ public: void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); protected: - EmuWindow(); + explicit EmuWindow(); virtual ~EmuWindow(); /** @@ -179,6 +208,8 @@ protected: client_area_height = size.second; } + WindowSystemInfo window_info; + private: /** * Handler called when the minimal client area was requested to be changed via SetConfig. diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 32b6f4b27..f1e3d832a 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) buffer.slot = slot; buffer.igbp_buffer = igbp_buffer; buffer.status = Buffer::Status::Free; + free_buffers.push_back(slot); queue.emplace_back(buffer); buffer_wait_event.writable->Signal(); @@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, u32 height) { - auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { - // Only consider free buffers. Buffers become free once again after they've been Acquired - // and Released by the compositor, see the NVFlinger::Compose method. - if (buffer.status != Buffer::Status::Free) { - return false; - } - // Make sure that the parameters match. - return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; - }); + if (free_buffers.empty()) { + return {}; + } + + auto f_itr = free_buffers.begin(); + auto itr = queue.end(); + + while (f_itr != free_buffers.end()) { + auto slot = *f_itr; + itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { + // Only consider free buffers. Buffers become free once again after they've been + // Acquired and Released by the compositor, see the NVFlinger::Compose method. + if (buffer.status != Buffer::Status::Free) { + return false; + } + + if (buffer.slot != slot) { + return false; + } + + // Make sure that the parameters match. + return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; + }); + + if (itr != queue.end()) { + free_buffers.erase(f_itr); + break; + } + ++f_itr; + } if (itr == queue.end()) { return {}; @@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) { ASSERT(itr != queue.end()); ASSERT(itr->status == Buffer::Status::Acquired); itr->status = Buffer::Status::Free; + free_buffers.push_back(slot); buffer_wait_event.writable->Signal(); } +void BufferQueue::Disconnect() { + queue.clear(); + queue_sequence.clear(); + id = 1; + layer_id = 1; +} + u32 BufferQueue::Query(QueryType type) { LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index f4bbfd945..d5f31e567 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -87,6 +87,7 @@ public: Service::Nvidia::MultiFence& multi_fence); std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); void ReleaseBuffer(u32 slot); + void Disconnect(); u32 Query(QueryType type); u32 GetId() const { @@ -101,6 +102,7 @@ private: u32 id; u64 layer_id; + std::list<u32> free_buffers; std::vector<Buffer> queue; std::list<u32> queue_sequence; Kernel::EventPair buffer_wait_event; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 519da74e0..fdc62d05b 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -513,7 +513,8 @@ private: auto& buffer_queue = nv_flinger->FindBufferQueue(id); - if (transaction == TransactionId::Connect) { + switch (transaction) { + case TransactionId::Connect: { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; IGBPConnectResponseParcel response{ static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * @@ -521,14 +522,18 @@ private: static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * Settings::values.resolution_factor)}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::SetPreallocatedBuffer) { + break; + } + case TransactionId::SetPreallocatedBuffer: { IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); IGBPSetPreallocatedBufferResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::DequeueBuffer) { + break; + } + case TransactionId::DequeueBuffer: { IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; const u32 width{request.data.width}; const u32 height{request.data.height}; @@ -556,14 +561,18 @@ private: }, buffer_queue.GetWritableBufferWaitEvent()); } - } else if (transaction == TransactionId::RequestBuffer) { + break; + } + case TransactionId::RequestBuffer: { IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; auto& buffer = buffer_queue.RequestBuffer(request.slot); IGBPRequestBufferResponseParcel response{buffer}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::QueueBuffer) { + break; + } + case TransactionId::QueueBuffer: { IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; buffer_queue.QueueBuffer(request.data.slot, request.data.transform, @@ -572,7 +581,9 @@ private: IGBPQueueBufferResponseParcel response{1280, 720}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::Query) { + break; + } + case TransactionId::Query: { IGBPQueryRequestParcel request{ctx.ReadBuffer()}; const u32 value = @@ -580,15 +591,30 @@ private: IGBPQueryResponseParcel response{value}; ctx.WriteBuffer(response.Serialize()); - } else if (transaction == TransactionId::CancelBuffer) { + break; + } + case TransactionId::CancelBuffer: { LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); - } else if (transaction == TransactionId::Disconnect || - transaction == TransactionId::DetachBuffer) { + break; + } + case TransactionId::Disconnect: { + LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect"); + const auto buffer = ctx.ReadBuffer(); + + buffer_queue.Disconnect(); + + IGBPEmptyResponseParcel response{}; + ctx.WriteBuffer(response.Serialize()); + break; + } + case TransactionId::DetachBuffer: { const auto buffer = ctx.ReadBuffer(); IGBPEmptyResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); - } else { + break; + } + default: ASSERT_MSG(false, "Unimplemented"); } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f0888327f..6061d37ae 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -242,7 +242,52 @@ struct Memory::Impl { } case Common::PageType::RasterizerCachedMemory: { const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); + system.GPU().FlushRegion(current_vaddr, copy_amount); + std::memcpy(dest_buffer, host_ptr, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } + } + + void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, + const std::size_t size) { + const auto& page_table = process.VMManager().page_table; + + std::size_t remaining_size = size; + std::size_t page_index = src_addr >> PAGE_BITS; + std::size_t page_offset = src_addr & PAGE_MASK; + + while (remaining_size > 0) { + const std::size_t copy_amount = + std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); + const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); + + switch (page_table.attributes[page_index]) { + case Common::PageType::Unmapped: { + LOG_ERROR(HW_Memory, + "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, src_addr, size); + std::memset(dest_buffer, 0, copy_amount); + break; + } + case Common::PageType::Memory: { + DEBUG_ASSERT(page_table.pointers[page_index]); + + const u8* const src_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); + std::memcpy(dest_buffer, src_ptr, copy_amount); + break; + } + case Common::PageType::RasterizerCachedMemory: { + const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); std::memcpy(dest_buffer, host_ptr, copy_amount); break; } @@ -261,6 +306,10 @@ struct Memory::Impl { ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); } + void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { + ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); + } + void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, const std::size_t size) { const auto& page_table = process.VMManager().page_table; @@ -290,7 +339,50 @@ struct Memory::Impl { } case Common::PageType::RasterizerCachedMemory: { u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); + system.GPU().InvalidateRegion(current_vaddr, copy_amount); + std::memcpy(host_ptr, src_buffer, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } + } + + void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr, + const void* src_buffer, const std::size_t size) { + const auto& page_table = process.VMManager().page_table; + std::size_t remaining_size = size; + std::size_t page_index = dest_addr >> PAGE_BITS; + std::size_t page_offset = dest_addr & PAGE_MASK; + + while (remaining_size > 0) { + const std::size_t copy_amount = + std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); + const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); + + switch (page_table.attributes[page_index]) { + case Common::PageType::Unmapped: { + LOG_ERROR(HW_Memory, + "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, dest_addr, size); + break; + } + case Common::PageType::Memory: { + DEBUG_ASSERT(page_table.pointers[page_index]); + + u8* const dest_ptr = + page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); + std::memcpy(dest_ptr, src_buffer, copy_amount); + break; + } + case Common::PageType::RasterizerCachedMemory: { + u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); std::memcpy(host_ptr, src_buffer, copy_amount); break; } @@ -309,6 +401,10 @@ struct Memory::Impl { WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); } + void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { + WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); + } + void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { const auto& page_table = process.VMManager().page_table; std::size_t remaining_size = size; @@ -337,7 +433,7 @@ struct Memory::Impl { } case Common::PageType::RasterizerCachedMemory: { u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); + system.GPU().InvalidateRegion(current_vaddr, copy_amount); std::memset(host_ptr, 0, copy_amount); break; } @@ -384,7 +480,7 @@ struct Memory::Impl { } case Common::PageType::RasterizerCachedMemory: { const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); + system.GPU().FlushRegion(current_vaddr, copy_amount); WriteBlock(process, dest_addr, host_ptr, copy_amount); break; } @@ -545,7 +641,7 @@ struct Memory::Impl { break; case Common::PageType::RasterizerCachedMemory: { const u8* const host_ptr = GetPointerFromVMA(vaddr); - system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); + system.GPU().FlushRegion(vaddr, sizeof(T)); T value; std::memcpy(&value, host_ptr, sizeof(T)); return value; @@ -587,7 +683,7 @@ struct Memory::Impl { break; case Common::PageType::RasterizerCachedMemory: { u8* const host_ptr{GetPointerFromVMA(vaddr)}; - system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); + system.GPU().InvalidateRegion(vaddr, sizeof(T)); std::memcpy(host_ptr, &data, sizeof(T)); break; } @@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_ impl->ReadBlock(src_addr, dest_buffer, size); } +void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, + void* dest_buffer, const std::size_t size) { + impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); +} + +void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { + impl->ReadBlockUnsafe(src_addr, dest_buffer, size); +} + void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, std::size_t size) { impl->WriteBlock(process, dest_addr, src_buffer, size); @@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std impl->WriteBlock(dest_addr, src_buffer, size); } +void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, + const void* src_buffer, std::size_t size) { + impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); +} + +void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, + const std::size_t size) { + impl->WriteBlockUnsafe(dest_addr, src_buffer, size); +} + void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { impl->ZeroBlock(process, dest_addr, size); } diff --git a/src/core/memory.h b/src/core/memory.h index 8913a9da4..b92d678a4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -295,6 +295,27 @@ public: std::size_t size); /** + * Reads a contiguous block of bytes from a specified process' address space. + * This unsafe version does not trigger GPU flushing. + * + * @param process The process to read the data from. + * @param src_addr The virtual address to begin reading from. + * @param dest_buffer The buffer to place the read bytes into. + * @param size The amount of data to read, in bytes. + * + * @note If a size of 0 is specified, then this function reads nothing and + * no attempts to access memory are made at all. + * + * @pre dest_buffer must be at least size bytes in length, otherwise a + * buffer overrun will occur. + * + * @post The range [dest_buffer, size) contains the read bytes from the + * process' address space. + */ + void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, + std::size_t size); + + /** * Reads a contiguous block of bytes from the current process' address space. * * @param src_addr The virtual address to begin reading from. @@ -313,6 +334,25 @@ public: void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); /** + * Reads a contiguous block of bytes from the current process' address space. + * This unsafe version does not trigger GPU flushing. + * + * @param src_addr The virtual address to begin reading from. + * @param dest_buffer The buffer to place the read bytes into. + * @param size The amount of data to read, in bytes. + * + * @note If a size of 0 is specified, then this function reads nothing and + * no attempts to access memory are made at all. + * + * @pre dest_buffer must be at least size bytes in length, otherwise a + * buffer overrun will occur. + * + * @post The range [dest_buffer, size) contains the read bytes from the + * current process' address space. + */ + void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size); + + /** * Writes a range of bytes into a given process' address space at the specified * virtual address. * @@ -336,6 +376,26 @@ public: std::size_t size); /** + * Writes a range of bytes into a given process' address space at the specified + * virtual address. + * This unsafe version does not invalidate GPU Memory. + * + * @param process The process to write data into the address space of. + * @param dest_addr The destination virtual address to begin writing the data at. + * @param src_buffer The data to write into the process' address space. + * @param size The size of the data to write, in bytes. + * + * @post The address range [dest_addr, size) in the process' address space + * contains the data that was within src_buffer. + * + * @post If an attempt is made to write into an unmapped region of memory, the writes + * will be ignored and an error will be logged. + * + */ + void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, + std::size_t size); + + /** * Writes a range of bytes into the current process' address space at the specified * virtual address. * @@ -357,6 +417,24 @@ public: void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); /** + * Writes a range of bytes into the current process' address space at the specified + * virtual address. + * This unsafe version does not invalidate GPU Memory. + * + * @param dest_addr The destination virtual address to begin writing the data at. + * @param src_buffer The data to write into the current process' address space. + * @param size The size of the data to write, in bytes. + * + * @post The address range [dest_addr, size) in the current process' address space + * contains the data that was within src_buffer. + * + * @post If an attempt is made to write into an unmapped region of memory, the writes + * will be ignored and an error will be logged. + * + */ + void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); + + /** * Fills the specified address range within a process' address space with zeroes. * * @param process The process that will have a portion of its memory zeroed out. diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index effe76a63..f7febd6a2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -148,6 +148,7 @@ add_library(video_core STATIC textures/convert.h textures/decoders.cpp textures/decoders.h + textures/texture.cpp textures/texture.h video_core.cpp video_core.h diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,37 +15,29 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { - return cache_addr <= other_start && other_end <= cache_addr_end; + bool IsInside(const VAddr other_start, const VAddr other_end) const { + return cpu_addr <= other_start && other_end <= cpu_addr_end; } - u8* GetWritableHostPtr() const { - return FromCacheAddr(cache_addr); + std::size_t GetOffset(const VAddr in_addr) { + return static_cast<std::size_t>(in_addr - cpu_addr); } - u8* GetWritableHostPtr(std::size_t offset) const { - return FromCacheAddr(cache_addr + offset); + VAddr GetCpuAddr() const { + return cpu_addr; } - std::size_t GetOffset(const CacheAddr in_addr) { - return static_cast<std::size_t>(in_addr - cache_addr); + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - CacheAddr GetCacheAddr() const { - return cache_addr; - } - - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; - } - - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + size; } std::size_t GetSize() const { @@ -61,14 +53,14 @@ public: } protected: - explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { - SetCacheAddr(cache_addr); + explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { + SetCpuAddr(cpu_addr); } ~BufferBlock() = default; private: - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; + VAddr cpu_addr{}; + VAddr cpu_addr_end{}; std::size_t size{}; u64 epoch{}; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..b57c0d4d4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -19,6 +19,7 @@ #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" +#include "core/memory.h" #include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/map_interval.h" #include "video_core/memory_manager.h" @@ -37,28 +38,45 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { + const std::optional<VAddr> cpu_addr_opt = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + + if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - const auto cache_addr = ToCacheAddr(host_ptr); + + VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { + if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { + auto& memory_manager = system.GPU().MemoryManager(); if (use_fast_cbuf) { - return ConstBufferUpload(host_ptr, size); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return ConstBufferUpload(host_ptr, size); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return ConstBufferUpload(staging_buffer.data(), size); + } } else { - return StreamBufferUpload(host_ptr, size, alignment); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(host_ptr, size, alignment); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return StreamBufferUpload(staging_buffer.data(), size, alignment); + } } } } - auto block = GetBlock(cache_addr, size); - auto map = MapAddress(block, gpu_addr, cache_addr, size); + auto block = GetBlock(cpu_addr, size); + auto map = MapAddress(block, gpu_addr, cpu_addr, size); if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (!map->IsWritten()) { @@ -71,7 +89,7 @@ public: } } - const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); + const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); return {ToHandle(block), offset}; } @@ -112,7 +130,7 @@ public: } /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -127,7 +145,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -152,7 +170,7 @@ protected: virtual void WriteBarrier() = 0; - virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; @@ -169,20 +187,17 @@ protected: /// Register an object into the cache void Register(const MapInterval& new_map, bool inherit_written = false) { - const CacheAddr cache_ptr = new_map->GetStart(); - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); - if (!cache_ptr || !cpu_addr) { + const VAddr cpu_addr = new_map->GetStart(); + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", new_map->GetGpuAddress()); return; } const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->SetCpuAddress(*cpu_addr); new_map->MarkAsRegistered(true); const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; mapped_addresses.insert({interval, new_map}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); if (inherit_written) { MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); new_map->MarkAsWritten(true); @@ -192,7 +207,7 @@ protected: /// Unregisters an object from the cache void Unregister(MapInterval& map) { const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); + rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); map->MarkAsRegistered(false); if (map->IsWritten()) { UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); @@ -202,32 +217,39 @@ protected: } private: - MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { + MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { return std::make_shared<MapIntervalBase>(start, end, gpu_addr); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, const std::size_t size) { + MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, + const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); + std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - const CacheAddr cache_addr_end = cache_addr + size; - MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); - u8* host_ptr = FromCacheAddr(cache_addr); - UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); + auto& memory_manager = system.GPU().MemoryManager(); + const VAddr cpu_addr_end = cpu_addr + size; + MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + u8* host_ptr = memory_manager.GetPointer(gpu_addr); + UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + } Register(new_map); return new_map; } - const CacheAddr cache_addr_end = cache_addr + size; + const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { MapInterval& current_map = overlaps[0]; - if (current_map->IsInside(cache_addr, cache_addr_end)) { + if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } } - CacheAddr new_start = cache_addr; - CacheAddr new_end = cache_addr_end; + VAddr new_start = cpu_addr; + VAddr new_end = cpu_addr_end; bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters @@ -237,7 +259,7 @@ private: write_inheritance |= overlap->IsWritten(); modified_inheritance |= overlap->IsModified(); } - GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } @@ -250,7 +272,7 @@ private: return new_map; } - void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, + void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, std::vector<MapInterval>& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -262,13 +284,15 @@ private: for (auto& interval : interval_set) { std::size_t size = interval.upper() - interval.lower(); if (size > 0) { - u8* host_ptr = FromCacheAddr(interval.lower()); - UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); + staging_buffer.resize(size); + system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(interval.lower()), size, + staging_buffer.data()); } } } - std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { + std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { if (size == 0) { return {}; } @@ -290,8 +314,9 @@ private: void FlushMap(MapInterval map) { std::size_t size = map->GetEnd() - map->GetStart(); TBuffer block = blocks[map->GetStart() >> block_page_bits]; - u8* host_ptr = FromCacheAddr(map->GetStart()); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); + staging_buffer.resize(size); + DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -316,14 +341,14 @@ private: TBuffer EnlargeBlock(TBuffer buffer) { const std::size_t old_size = buffer->GetSize(); const std::size_t new_size = old_size + block_page_size; - const CacheAddr cache_addr = buffer->GetCacheAddr(); - TBuffer new_buffer = CreateBlock(cache_addr, new_size); + const VAddr cpu_addr = buffer->GetCpuAddr(); + TBuffer new_buffer = CreateBlock(cpu_addr, new_size); CopyBlock(buffer, new_buffer, 0, 0, old_size); buffer->SetEpoch(epoch); pending_destruction.push_back(buffer); - const CacheAddr cache_addr_end = cache_addr + new_size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const VAddr cpu_addr_end = cpu_addr + new_size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -334,9 +359,9 @@ private: TBuffer MergeBlocks(TBuffer first, TBuffer second) { const std::size_t size_1 = first->GetSize(); const std::size_t size_2 = second->GetSize(); - const CacheAddr first_addr = first->GetCacheAddr(); - const CacheAddr second_addr = second->GetCacheAddr(); - const CacheAddr new_addr = std::min(first_addr, second_addr); + const VAddr first_addr = first->GetCpuAddr(); + const VAddr second_addr = second->GetCpuAddr(); + const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; TBuffer new_buffer = CreateBlock(new_addr, new_size); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); @@ -345,9 +370,9 @@ private: second->SetEpoch(epoch); pending_destruction.push_back(first); pending_destruction.push_back(second); - const CacheAddr cache_addr_end = new_addr + new_size - 1; + const VAddr cpu_addr_end = new_addr + new_size - 1; u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -355,18 +380,18 @@ private: return new_buffer; } - TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { + TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { TBuffer found{}; - const CacheAddr cache_addr_end = cache_addr + size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const VAddr cpu_addr_end = cpu_addr + size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); } else { - const CacheAddr start_addr = (page_start << block_page_bits); + const VAddr start_addr = (page_start << block_page_bits); found = CreateBlock(start_addr, block_page_size); blocks[page_start] = found; } @@ -386,7 +411,7 @@ private: return found; } - void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void MarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -400,7 +425,7 @@ private: } } - void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -416,7 +441,7 @@ private: } } - bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { + bool IsRegionWritten(const VAddr start, const VAddr end) const { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -440,8 +465,8 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<CacheAddr>; - using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; using IntervalType = typename IntervalCache::interval_type; IntervalCache mapped_addresses; @@ -456,6 +481,8 @@ private: u64 epoch = 0; u64 modified_ticks = 0; + std::vector<u8> staging_buffer; + std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -11,7 +11,7 @@ namespace VideoCommon { class MapIntervalBase { public: - MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) + MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) : start{start}, end{end}, gpu_addr{gpu_addr} {} void SetCpuAddress(VAddr new_cpu_addr) { @@ -26,7 +26,7 @@ public: return gpu_addr; } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { + bool IsInside(const VAddr other_start, const VAddr other_end) const { return (start <= other_start && other_end <= end); } @@ -46,11 +46,11 @@ public: return is_registered; } - CacheAddr GetStart() const { + VAddr GetStart() const { return start; } - CacheAddr GetEnd() const { + VAddr GetEnd() const { return end; } @@ -76,8 +76,8 @@ public: } private: - CacheAddr start; - CacheAddr end; + VAddr start; + VAddr end; GPUVAddr gpu_addr; VAddr cpu_addr{}; bool is_written{}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d24c9f657..4637ddabd 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -312,6 +312,35 @@ public: } }; + struct MsaaSampleLocation { + union { + BitField<0, 4, u32> x0; + BitField<4, 4, u32> y0; + BitField<8, 4, u32> x1; + BitField<12, 4, u32> y1; + BitField<16, 4, u32> x2; + BitField<20, 4, u32> y2; + BitField<24, 4, u32> x3; + BitField<28, 4, u32> y3; + }; + + constexpr std::pair<u32, u32> Location(int index) const { + switch (index) { + case 0: + return {x0, y0}; + case 1: + return {x1, y1}; + case 2: + return {x2, y2}; + case 3: + return {x3, y3}; + default: + UNREACHABLE(); + return {0, 0}; + } + } + }; + enum class DepthMode : u32 { MinusOneToOne = 0, ZeroToOne = 1, @@ -793,7 +822,13 @@ public: u32 rt_separate_frag_data; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 multisample_raster_enable; + u32 multisample_raster_samples; + std::array<u32, 4> multisample_sample_mask; + + INSERT_UNION_PADDING_WORDS(0x5); struct { u32 address_high; @@ -830,7 +865,16 @@ public: std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; - INSERT_UNION_PADDING_WORDS(0xF); + std::array<MsaaSampleLocation, 4> multisample_sample_locations; + + INSERT_UNION_PADDING_WORDS(0x2); + + union { + BitField<0, 1, u32> enable; + BitField<4, 3, u32> target; + } multisample_coverage_to_color; + + INSERT_UNION_PADDING_WORDS(0x8); struct { union { @@ -943,7 +987,7 @@ public: CounterReset counter_reset; - INSERT_UNION_PADDING_WORDS(0x1); + u32 multisample_enable; u32 zeta_enable; @@ -1007,7 +1051,11 @@ public: float polygon_offset_units; - INSERT_UNION_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x4); + + Tegra::Texture::MsaaMode multisample_mode; + + INSERT_UNION_PADDING_WORDS(0xC); union { BitField<2, 1, u32> coord_origin; @@ -1507,12 +1555,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); -ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(depth_bounds, 0x3E7); +ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); +ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); +ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); +ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); +ASSERT_REG_POSITION(multisample_sample_locations, 0x478); +ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); @@ -1545,11 +1598,12 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); ASSERT_REG_POSITION(counter_reset, 0x54C); +ASSERT_REG_POSITION(multisample_enable, 0x54D); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); ASSERT_REG_POSITION(tsc, 0x557); -ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); +ASSERT_REG_POSITION(polygon_offset_factor, 0x55B); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); @@ -1558,6 +1612,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); ASSERT_REG_POSITION(stencil_back_func_func, 0x569); ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); ASSERT_REG_POSITION(polygon_offset_units, 0x56F); +ASSERT_REG_POSITION(multisample_mode, 0x574); ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 59d070d7d..c66c66f6c 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -290,6 +290,23 @@ enum class VmadShr : u64 { Shr15 = 2, }; +enum class VmnmxType : u64 { + Bits8, + Bits16, + Bits32, +}; + +enum class VmnmxOperation : u64 { + Mrg_16H = 0, + Mrg_16L = 1, + Mrg_8B0 = 2, + Mrg_8B2 = 3, + Acc = 4, + Min = 5, + Max = 6, + Nop = 7, +}; + enum class XmadMode : u64 { None = 0, CLo = 1, @@ -1651,6 +1668,42 @@ union Instruction { } vmad; union { + BitField<54, 1, u64> is_dest_signed; + BitField<48, 1, u64> is_src_a_signed; + BitField<49, 1, u64> is_src_b_signed; + BitField<37, 2, u64> src_format_a; + BitField<29, 2, u64> src_format_b; + BitField<56, 1, u64> mx; + BitField<55, 1, u64> sat; + BitField<36, 2, u64> selector_a; + BitField<28, 2, u64> selector_b; + BitField<50, 1, u64> is_op_b_register; + BitField<51, 3, VmnmxOperation> operation; + + VmnmxType SourceFormatA() const { + switch (src_format_a) { + case 0b11: + return VmnmxType::Bits32; + case 0b10: + return VmnmxType::Bits16; + default: + return VmnmxType::Bits8; + } + } + + VmnmxType SourceFormatB() const { + switch (src_format_b) { + case 0b11: + return VmnmxType::Bits32; + case 0b10: + return VmnmxType::Bits16; + default: + return VmnmxType::Bits8; + } + } + } vmnmx; + + union { BitField<20, 16, u64> imm20_16; BitField<35, 1, u64> high_b_rr; // used on RR BitField<36, 1, u64> product_shift_left; @@ -1712,6 +1765,7 @@ public: BRK, DEPBAR, VOTE, + VOTE_VTG, SHFL, FSWZADD, BFE_C, @@ -1758,9 +1812,11 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + BAR, MEMBAR, VMAD, VSETP, + VMNMX, FFMA_IMM, // Fused Multiply and Add FFMA_CR, FFMA_RC, @@ -1842,7 +1898,7 @@ public: MOV_C, MOV_R, MOV_IMM, - MOV_SYS, + S2R, MOV32_IMM, SHL_C, SHL_R, @@ -2026,6 +2082,7 @@ private: INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), + INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), @@ -2063,9 +2120,11 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), + INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), @@ -2134,7 +2193,7 @@ private: INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), + INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ced9d7e28..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -270,13 +270,13 @@ public: virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; protected: virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 925be8d7b..cc434faf7 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { gpu_thread.SwapBuffers(framebuffer); } -void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); } -void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { gpu_thread.InvalidateRegion(addr, size); } -void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { gpu_thread.FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 265c62758..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -27,9 +27,9 @@ public: void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override; protected: diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index bd5278a5c..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { renderer->SwapBuffers(framebuffer); } -void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushRegion(VAddr addr, u64 size) { renderer->Rasterizer().FlushRegion(addr, size); } -void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { renderer->Rasterizer().InvalidateRegion(addr, size); } -void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 866a94c8c..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -26,9 +26,9 @@ public: void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override {} protected: diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 270c7ae0d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } -void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushRegion(VAddr addr, u64 size) { PushCommand(FlushRegionCommand(addr, size)); } -void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { system.Renderer().Rasterizer().InvalidateRegion(addr, size); } -void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important InvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index be36c580e..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -47,26 +47,26 @@ struct SwapBuffersCommand final { /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; @@ -111,13 +111,13 @@ public: void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(CacheAddr addr, u64 size); + void FlushRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(CacheAddr addr, u64 size); + void InvalidateRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + void FlushAndInvalidateRegion(VAddr addr, u64 size); // Wait until the gpu thread is idle. void WaitIdle() const; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..a3389d0d2 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { ASSERT((gpu_addr & page_mask) == 0); const u64 aligned_size{Common::AlignUp(size, page_size)}; - const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); + system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() @@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { return {}; } - const u8* page_pointer{page_table.pointers[addr >> page_bits]}; + const u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block T value; - std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); + std::memcpy(&value, page_pointer, sizeof(T)); return value; } @@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - u8* page_pointer{page_table.pointers[addr >> page_bits]}; + u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block - std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); + std::memcpy(page_pointer, &data, sizeof(T)); return; } @@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { return {}; } - u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } - const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + const auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; // Flush must happen on the rasterizer interface, such that memory is always synchronous // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - std::memcpy(dest_buffer, src_ptr, copy_amount); + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); break; } default: @@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; const u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - const u8* src_ptr{page_pointer + page_offset}; - std::memcpy(dest_buffer, src_ptr, copy_amount); + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } else { std::memset(dest_buffer, 0, copy_amount); } @@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { - u8* dest_ptr{page_table.pointers[page_index] + page_offset}; + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; // Invalidate must happen on the rasterizer interface, such that memory is always // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); - std::memcpy(dest_ptr, src_buffer, copy_amount); + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); break; } default: @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - u8* dest_ptr{page_pointer + page_offset}; - std::memcpy(dest_ptr, src_buffer, copy_amount); + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } page_index++; page_offset = 0; @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, } void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { - std::size_t remaining_size{size}; - std::size_t page_index{src_addr >> page_bits}; - std::size_t page_offset{src_addr & page_mask}; - - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is copied (even when in asynchronous GPU mode). - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - WriteBlock(dest_addr, src_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - dest_addr += static_cast<VAddr>(copy_amount); - src_addr += static_cast<VAddr>(copy_amount); - remaining_size -= copy_amount; - } + std::vector<u8> tmp_buffer(size); + ReadBlock(src_addr, tmp_buffer.data(), size); + WriteBlock(dest_addr, tmp_buffer.data(), size); } void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); } +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { + const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; + const std::size_t page = (addr & Memory::PAGE_MASK) + size; + return page <= Memory::PAGE_SIZE; +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0d9468535 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -97,6 +97,11 @@ public: void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + /** + * IsGranularRange checks if a gpu region can be simply read with a pointer + */ + bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); + private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; using VMAHandle = VMAMap::const_iterator; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -98,12 +98,12 @@ public: static_cast<QueryCache&>(*this), VideoCore::QueryType::SamplesPassed}}} {} - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } @@ -117,14 +117,16 @@ public: void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { std::unique_lock lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); + const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr_opt); + VAddr cpu_addr = *cpu_addr_opt; - CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + CachedQuery* query = TryGet(cpu_addr); if (!query) { - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT_OR_EXECUTE(cpu_addr, return;); + ASSERT_OR_EXECUTE(cpu_addr_opt, return;); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); - query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); } query->BindCounter(Stream(type).Current(), timestamp); @@ -173,11 +175,11 @@ protected: private: /// Flushes a memory range to guest memory and removes it from the cache. - void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + void FlushAndRemoveRegion(VAddr addr, std::size_t size) { const u64 addr_begin = static_cast<u64>(addr); const u64 addr_end = addr_begin + static_cast<u64>(size); const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_begin = query.GetCpuAddr(); const u64 cache_end = cache_begin + query.SizeInBytes(); return cache_begin < addr_end && addr_begin < cache_end; }; @@ -193,7 +195,7 @@ private: if (!in_range(query)) { continue; } - rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); query.Flush(); } contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), @@ -204,22 +206,21 @@ private: /// Registers the passed parameters as cached and returns a pointer to the stored cached query. CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); - const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, host_ptr); } /// Tries to a get a cached query. Returns nullptr on failure. - CachedQuery* TryGet(CacheAddr addr) { + CachedQuery* TryGet(VAddr addr) { const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; const auto it = cached_queries.find(page); if (it == std::end(cached_queries)) { return nullptr; } auto& contents = it->second; - const auto found = - std::find_if(std::begin(contents), std::end(contents), - [addr](auto& query) { return query.GetCacheAddr() == addr; }); + const auto found = std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCpuAddr() == addr; }); return found != std::end(contents) ? &*found : nullptr; } @@ -323,14 +324,10 @@ public: timestamp = timestamp_; } - VAddr CpuAddr() const noexcept { + VAddr GetCpuAddr() const noexcept { return cpu_addr; } - CacheAddr GetCacheAddr() const noexcept { - return ToCacheAddr(host_ptr); - } - u64 SizeInBytes() const noexcept { return SizeInBytes(timestamp.has_value()); } diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -18,22 +18,14 @@ class RasterizerCacheObject { public: - explicit RasterizerCacheObject(const u8* host_ptr) - : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} + explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} virtual ~RasterizerCacheObject(); - CacheAddr GetCacheAddr() const { - return cache_addr; + VAddr GetCpuAddr() const { + return cpu_addr; } - const u8* GetHostPtr() const { - return host_ptr; - } - - /// Gets the address of the shader in guest memory, required for cache management - virtual VAddr GetCpuAddr() const = 0; - /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; @@ -68,8 +60,7 @@ private: bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing - const u8* host_ptr{}; ///< Pointer to the memory backing this cached region - CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space + VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space }; template <class T> @@ -80,7 +71,7 @@ public: explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -90,7 +81,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -114,27 +105,20 @@ public: protected: /// Tries to get an object from the cache with the specified cache address - T TryGet(CacheAddr addr) const { + T TryGet(VAddr addr) const { const auto iter = map_cache.find(addr); if (iter != map_cache.end()) return iter->second; return nullptr; } - T TryGet(const void* addr) const { - const auto iter = map_cache.find(ToCacheAddr(addr)); - if (iter != map_cache.end()) - return iter->second; - return nullptr; - } - /// Register an object into the cache virtual void Register(const T& object) { std::lock_guard lock{mutex}; object->SetIsRegistered(true); interval_cache.add({GetInterval(object), ObjectSet{object}}); - map_cache.insert({object->GetCacheAddr(), object}); + map_cache.insert({object->GetCpuAddr(), object}); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); } @@ -144,7 +128,7 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); - const CacheAddr addr = object->GetCacheAddr(); + const VAddr addr = object->GetCpuAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); map_cache.erase(addr); } @@ -173,7 +157,7 @@ protected: private: /// Returns a list of cached objects from the specified memory region, ordered by access time - std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { + std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { if (size == 0) { return {}; } @@ -197,13 +181,13 @@ private: } using ObjectSet = std::set<T>; - using ObjectCache = std::unordered_map<CacheAddr, T>; - using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; + using ObjectCache = std::unordered_map<VAddr, T>; + using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; using ObjectInterval = typename IntervalCache::interval_type; static auto GetInterval(const T& object) { - return ObjectInterval::right_open(object->GetCacheAddr(), - object->GetCacheAddr() + object->GetSizeInBytes()); + return ObjectInterval::right_open(object->GetCpuAddr(), + object->GetCpuAddr() + object->GetSizeInBytes()); } ObjectCache map_cache; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -53,14 +53,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..4eb37a96c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { +CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); } @@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() { glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); } -Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(cache_addr, size); +Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(cpu_addr, size); } void OGLBufferCache::WriteBarrier() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..d94a11252 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf class CachedBufferBlock : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); + explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); ~CachedBufferBlock(); const GLuint* GetHandle() const { @@ -55,7 +55,7 @@ public: } protected: - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; void WriteBarrier() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 346feeb2f..368f399df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.FlushRegion(addr, size); @@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { query_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.InvalidateRegion(addr, size); @@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { query_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { if (Settings::values.use_accurate_gpu_emulation) { FlushRegion(addr, size); } @@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{ - texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return {}; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..212dad852 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -65,9 +65,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 046ee55a5..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, +CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program) - : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, - cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} + : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, + size_in_bytes{size_in_bytes}, program{std::move(program)} {} CachedShader::~CachedShader() = default; @@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { @@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, const PrecompiledShader& precompiled_shader, std::size_t size_in_bytes) { - return std::shared_ptr<CachedShader>(new CachedShader( - params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, - precompiled_shader.entries, precompiled_shader.program)); + return std::shared_ptr<CachedShader>( + new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(address)}; - Shader shader{TryGet(host_ptr)}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; + Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } + const auto host_ptr{memory_manager.GetPointer(address)}; + // No shader found - create a new one ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; ProgramCode code_b; @@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { @@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto& memory_manager{system.GPU().MemoryManager()}; - const auto host_ptr{memory_manager.GetPointer(code_addr)}; - auto kernel = TryGet(host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + + auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (kernel) { return kernel; } + const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,11 +65,6 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; - /// Returns the guest CPU address of the shader - VAddr GetCpuAddr() const override { - return cpu_addr; - } - /// Returns the size in bytes of the shader std::size_t GetSizeInBytes() const override { return size_in_bytes; @@ -90,13 +85,12 @@ public: std::size_t size_in_bytes); private: - explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; - VAddr cpu_addr = 0; std::size_t size_in_bytes = 0; std::shared_ptr<OGLProgram> program; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 160ae4340..1f1f01313 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1819,15 +1819,15 @@ private: } Expression HMergeH0(Operation operation) { - std::string dest = VisitOperand(operation, 0).AsUint(); - std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; + const std::string dest = VisitOperand(operation, 0).AsUint(); + const std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint}; } Expression HMergeH1(Operation operation) { - std::string dest = VisitOperand(operation, 0).AsUint(); - std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; + const std::string dest = VisitOperand(operation, 0).AsUint(); + const std::string src = VisitOperand(operation, 1).AsUint(); + return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint}; } Expression HPack2(Operation operation) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 36590a6d0..0b4d999d7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -411,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { ASSERT(params.num_levels == 1); - const GLuint texture = surface.GetTexture(); if (params.num_layers > 1) { // Layered framebuffer attachments UNIMPLEMENTED_IF(params.base_layer != 0); switch (params.target) { case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, texture, params.base_level); + glFramebufferTexture(target, attachment, GetTexture(), params.base_level); break; default: UNIMPLEMENTED(); @@ -427,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { } const GLenum view_target = surface.GetTarget(); + const GLuint texture = surface.GetTexture(); switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h index 323bf6b39..89a035ca4 100644 --- a/src/video_core/renderer_vulkan/declarations.h +++ b/src/video_core/renderer_vulkan/declarations.h @@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>; using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; using UniqueImage = UniqueHandle<vk::Image>; using UniqueImageView = UniqueHandle<vk::ImageView>; +using UniqueInstance = UniqueHandle<vk::Instance>; using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; using UniquePipeline = UniqueHandle<vk::Pipeline>; @@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>; using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; using UniqueSemaphore = UniqueHandle<vk::Semaphore>; using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; +using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>; using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6953aaafe..9cdb4b627 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,13 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <cstring> #include <memory> #include <optional> +#include <string> #include <vector> #include <fmt/format.h> #include "common/assert.h" +#include "common/dynamic_library.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -30,15 +35,30 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size +#ifdef _WIN32 +#include <windows.h> +// ensure include order +#include <vulkan/vulkan_win32.h> +#endif + +#ifdef __linux__ +#include <X11/Xlib.h> +#include <vulkan/vulkan_wayland.h> +#include <vulkan/vulkan_xlib.h> +#endif + namespace Vulkan { namespace { +using Core::Frontend::WindowSystemType; + VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; + const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)}; const char* message{data->pMessage}; if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { @@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, return VK_FALSE; } +Common::DynamicLibrary OpenVulkanLibrary() { + Common::DynamicLibrary library; +#ifdef __APPLE__ + // Check if a path to a specific Vulkan library has been specified. + char* libvulkan_env = getenv("LIBVULKAN_PATH"); + if (!libvulkan_env || !library.Open(libvulkan_env)) { + // Use the libvulkan.dylib from the application bundle. + std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + library.Open(filename.c_str()); + } +#else + std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); + if (!library.Open(filename.c_str())) { + // Android devices may not have libvulkan.so.1, only libvulkan.so. + filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); + library.Open(filename.c_str()); + } +#endif + return library; +} + +UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld, + WindowSystemType window_type = WindowSystemType::Headless, + bool enable_layers = false) { + if (!library.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Vulkan library not available"); + return UniqueInstance{}; + } + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) { + LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); + return UniqueInstance{}; + } + dld.init(vkGetInstanceProcAddr); + + std::vector<const char*> extensions; + extensions.reserve(4); + switch (window_type) { + case Core::Frontend::WindowSystemType::Headless: + break; +#ifdef _WIN32 + case Core::Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#endif +#ifdef __linux__ + case Core::Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Core::Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + if (window_type != Core::Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + if (enable_layers) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + u32 num_properties; + if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) != + vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties"); + return UniqueInstance{}; + } + std::vector<vk::ExtensionProperties> properties(num_properties); + if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(), + dld) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return UniqueInstance{}; + } + + for (const char* extension : extensions) { + const auto it = + std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) { + return !std::strcmp(extension, prop.extensionName); + }); + if (it == properties.end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return UniqueInstance{}; + } + } + + const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), + "yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), + VK_API_VERSION_1_1); + const std::array layers = {"VK_LAYER_LUNARG_standard_validation"}; + const vk::InstanceCreateInfo instance_ci( + {}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(), + static_cast<u32>(extensions.size()), extensions.data()); + vk::Instance unsafe_instance; + if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); + return UniqueInstance{}; + } + dld.init(unsafe_instance); + return UniqueInstance(unsafe_instance, {nullptr, dld}); +} + std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -147,27 +271,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) { } bool RendererVulkan::Init() { - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; - render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); - const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); - - std::optional<vk::DebugUtilsMessengerEXT> callback; - if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { - callback = CreateDebugCallback(dldi); - if (!callback) { - return false; - } - } - - if (!PickDevices(dldi)) { - if (callback) { - instance.destroy(*callback, nullptr, dldi); - } + library = OpenVulkanLibrary(); + instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, + Settings::values.renderer_debug); + if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } - debug_callback = UniqueDebugUtilsMessengerEXT( - *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( - instance, nullptr, device->GetDispatchLoader())); Report(); @@ -176,7 +285,7 @@ bool RendererVulkan::Init() { resource_manager = std::make_unique<VKResourceManager>(*device); const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(surface, *device); + swapchain = std::make_unique<VKSwapchain>(*surface, *device); swapchain->Create(framebuffer.width, framebuffer.height, false); state_tracker = std::make_unique<StateTracker>(system); @@ -213,8 +322,10 @@ void RendererVulkan::ShutDown() { device.reset(); } -std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi) { +bool RendererVulkan::CreateDebugCallback() { + if (!Settings::values.renderer_debug) { + return true; + } const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( {}, vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | @@ -225,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, &DebugCallback, nullptr); - vk::DebugUtilsMessengerEXT callback; - if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != + vk::DebugUtilsMessengerEXT unsafe_callback; + if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) != vk::Result::eSuccess) { LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); - return {}; + return false; + } + debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld}); + return true; +} + +bool RendererVulkan::CreateSurface() { + [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); + VkSurfaceKHR unsafe_surface = nullptr; + +#ifdef _WIN32 + if (window_info.type == Core::Frontend::WindowSystemType::Windows) { + const HWND hWnd = static_cast<HWND>(window_info.render_surface); + const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + nullptr, 0, nullptr, hWnd}; + const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); + if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr, + &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); + return false; + } + } +#endif +#ifdef __linux__ + if (window_info.type == Core::Frontend::WindowSystemType::X11) { + const VkXlibSurfaceCreateInfoKHR xlib_ci{ + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<Display*>(window_info.display_connection), + reinterpret_cast<Window>(window_info.render_surface)}; + const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); + if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr, + &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + return false; + } + } + if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { + const VkWaylandSurfaceCreateInfoKHR wayland_ci{ + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<wl_display*>(window_info.display_connection), + static_cast<wl_surface*>(window_info.render_surface)}; + const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); + if (!vkCreateWaylandSurfaceKHR || + vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + return false; + } + } +#endif + if (!unsafe_surface) { + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + return false; } - return callback; + + surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld}); + return true; } -bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { - const auto devices = instance.enumeratePhysicalDevices(dldi); +bool RendererVulkan::PickDevices() { + const auto devices = instance->enumeratePhysicalDevices(dld); - // TODO(Rodrigo): Choose device from config file const s32 device_index = Settings::values.vulkan_device; if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; } - const vk::PhysicalDevice physical_device = devices[device_index]; + const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)]; - if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { + if (!VKDevice::IsSuitable(physical_device, *surface, dld)) { return false; } - device = std::make_unique<VKDevice>(dldi, physical_device, surface); - return device->Create(dldi, instance); + device = std::make_unique<VKDevice>(dld, physical_device, *surface); + return device->Create(*instance); } void RendererVulkan::Report() const { @@ -276,4 +443,33 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } +std::vector<std::string> RendererVulkan::EnumerateDevices() { + // Avoid putting DispatchLoaderDynamic, it's too large + auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>(); + auto& dld = *dld_memory; + + Common::DynamicLibrary library = OpenVulkanLibrary(); + UniqueInstance instance = CreateInstance(library, dld); + if (!instance) { + return {}; + } + + u32 num_devices; + if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) { + return {}; + } + std::vector<vk::PhysicalDevice> devices(num_devices); + if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) != + vk::Result::eSuccess) { + return {}; + } + + std::vector<std::string> names; + names.reserve(num_devices); + for (auto& device : devices) { + names.push_back(device.getProperties(dld).deviceName); + } + return names; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index d14384e79..42e253de5 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -6,8 +6,11 @@ #include <memory> #include <optional> +#include <string> #include <vector> +#include "common/dynamic_library.h" + #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/declarations.h" @@ -44,18 +47,24 @@ public: void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; bool TryPresent(int timeout_ms) override; + static std::vector<std::string> EnumerateDevices(); + private: - std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi); + bool CreateDebugCallback(); - bool PickDevices(const vk::DispatchLoaderDynamic& dldi); + bool CreateSurface(); + + bool PickDevices(); void Report() const; Core::System& system; - vk::Instance instance; - vk::SurfaceKHR surface; + Common::DynamicLibrary library; + vk::DispatchLoaderDynamic dld; + + UniqueInstance instance; + UniqueSurfaceKHR surface; VKScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..326d74f29 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { } // Anonymous namespace CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { + VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, @@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S VKBufferCache::~VKBufferCache() = default; -Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); +Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); } const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..508214618 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -30,7 +30,7 @@ class VKScheduler; class CachedBufferBlock final : public VideoCommon::BufferBlock { public: explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size); + VAddr cpu_addr, std::size_t size); ~CachedBufferBlock(); const vk::Buffer* GetHandle() const { @@ -55,7 +55,7 @@ public: protected: void WriteBarrier() override {} - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; const vk::Buffer* ToHandle(const Buffer& buffer) override; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 7aafb5e59..6f4ae9132 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -10,6 +10,7 @@ #include <string_view> #include <thread> #include <vector> + #include "common/assert.h" #include "core/settings.h" #include "video_core/renderer_vulkan/declarations.h" @@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) { } template <typename T> -T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { +T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { vk::PhysicalDeviceFeatures2 features; T extension_features; features.pNext = &extension_features; - physical.getFeatures2(&features, dldi); + physical.getFeatures2(&features, dld); return extension_features; } template <typename T> -T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { +T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { vk::PhysicalDeviceProperties2 properties; T extension_properties; properties.pNext = &extension_properties; - physical.getProperties2(&properties, dldi); + physical.getProperties2(&properties, dld); return extension_properties; } @@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format } // Anonymous namespace -VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, +VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, vk::SurfaceKHR surface) - : physical{physical}, properties{physical.getProperties(dldi)}, - format_properties{GetFormatProperties(dldi, physical)} { - SetupFamilies(dldi, surface); - SetupFeatures(dldi); + : dld{dld}, physical{physical}, properties{physical.getProperties(dld)}, + format_properties{GetFormatProperties(dld, physical)} { + SetupFamilies(surface); + SetupFeatures(); } VKDevice::~VKDevice() = default; -bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { +bool VKDevice::Create(vk::Instance instance) { const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(dldi); + const std::vector extensions = LoadExtensions(); vk::PhysicalDeviceFeatures2 features2; void** next = &features2.pNext; @@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan nullptr); device_ci.pNext = &features2; - vk::Device dummy_logical; - if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { + vk::Device unsafe_logical; + if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); return false; } - - dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); - logical = UniqueDevice( - dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical); + logical = UniqueDevice(unsafe_logical, {nullptr, dld}); CollectTelemetryParameters(); @@ -235,8 +234,8 @@ void VKDevice::ReportLoss() const { // *(VKGraphicsPipeline*)data[0] } -bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const { +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const { + // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, @@ -260,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | vk::FormatFeatureFlagBits::eTransferDst}; for (const auto format : astc_formats) { - const auto format_properties{physical.getFormatProperties(format, dldi)}; + const auto format_properties{physical.getFormatProperties(format, dld)}; if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { return false; } @@ -279,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface) { - bool is_suitable = true; - - constexpr std::array required_extensions = { +bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, + const vk::DispatchLoaderDynamic& dld) { + static constexpr std::array required_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_8BIT_STORAGE_EXTENSION_NAME, @@ -293,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; + bool is_suitable = true; std::bitset<required_extensions.size()> available_extensions{}; - for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { for (std::size_t i = 0; i < required_extensions.size(); ++i) { if (available_extensions[i]) { continue; @@ -315,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } bool has_graphics{}, has_present{}; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const auto queue_family_properties = physical.getQueueFamilyProperties(dld); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; if (family.queueCount == 0) { @@ -323,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } has_graphics |= (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); - has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0; } if (!has_graphics || !has_present) { LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); @@ -331,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.getProperties(dldi)}; + const auto properties{physical.getProperties(dld)}; const auto& limits{properties.limits}; constexpr u32 required_ubo_size = 65536; @@ -348,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev is_suitable = false; } - const auto features{physical.getFeatures(dldi)}; + const auto features{physical.getFeatures(dld)}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.independentBlend, "independentBlend"), @@ -380,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return is_suitable; } -std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { +std::vector<const char*> VKDevice::LoadExtensions() { std::vector<const char*> extensions; const auto Test = [&](const vk::ExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, @@ -411,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami bool has_khr_shader_float16_int8{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; - for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, @@ -433,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami if (has_khr_shader_float16_int8) { is_float16_supported = - GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; + GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { const auto features = - GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); + GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld); const auto properties = - GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); + GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld); is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; @@ -456,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami if (has_ext_transform_feedback) { const auto features = - GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); + GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld); const auto properties = - GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); + GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld); if (features.transformFeedback && features.geometryStreams && properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && @@ -471,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami return extensions; } -void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { +void VKDevice::SetupFamilies(vk::SurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const auto queue_family_properties = physical.getQueueFamilyProperties(dld); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { if (graphics_family_ && present_family_) break; @@ -483,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK if (queue_family.queueCount == 0) continue; - if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) { graphics_family_ = i; - if (physical.getSurfaceSupportKHR(i, surface, dldi)) + } + if (physical.getSurfaceSupportKHR(i, surface, dld)) { present_family_ = i; + } } ASSERT(graphics_family_ && present_family_); @@ -494,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK present_family = *present_family_; } -void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { - const auto supported_features{physical.getFeatures(dldi)}; +void VKDevice::SetupFeatures() { + const auto supported_features{physical.getFeatures(dld)}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } void VKDevice::CollectTelemetryParameters() { @@ -525,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con } std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) { static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, vk::Format::eA8B8G8R8UintPack32, vk::Format::eA8B8G8R8SnormPack32, @@ -606,7 +606,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eE5B9G9R9UfloatPack32}; std::unordered_map<vk::Format, vk::FormatProperties> format_properties; for (const auto format : formats) { - format_properties.emplace(format, physical.getFormatProperties(format, dldi)); + format_properties.emplace(format, physical.getFormatProperties(format, dld)); } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6e656517f..d9d809852 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, vk::SurfaceKHR surface); ~VKDevice(); /// Initializes the device. Returns true on success. - bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + bool Create(vk::Instance instance); /** * Returns a format supported by the device for the passed requeriments. @@ -188,18 +188,18 @@ public: } /// Checks if the physical device is suitable. - static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface); + static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, + const vk::DispatchLoaderDynamic& dld); private: /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + std::vector<const char*> LoadExtensions(); /// Sets up queue families. - void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + void SetupFamilies(vk::SurfaceKHR surface); /// Sets up device features. - void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + void SetupFeatures(); /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -208,8 +208,7 @@ private: std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const; + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const; /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, @@ -217,10 +216,10 @@ private: /// Returns the device properties for Vulkan formats. static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); + const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. vk::DispatchLoaderDynamic dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. vk::PhysicalDeviceProperties properties; ///< Device properties. UniqueDevice logical; ///< Logical device. vk::Queue graphics_queue; ///< Main graphics queue. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..c2a426aeb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, } // Anonymous namespace CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - ProgramCode program_code, u32 main_offset) - : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, - program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, - shader_ir{this->program_code, main_offset, compiler_settings, registry}, + GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, + u32 main_offset) + : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, + registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, + compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; @@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const auto host_ptr{memory_manager.GetPointer(program_addr)}; - auto shader = TryGet(host_ptr); + const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { + const auto host_ptr{memory_manager.GetPointer(program_addr)}; + // No shader found - create a new one constexpr u32 stage_offset = 10; const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); - shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, - host_ptr, std::move(code), stage_offset); + std::move(code), stage_offset); Register(shader); } shaders[index] = std::move(shader); @@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach auto& memory_manager = system.GPU().MemoryManager(); const auto program_addr = key.shader; - const auto host_ptr = memory_manager.GetPointer(program_addr); - auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { // No shader found - create a new one - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); + const auto host_ptr = memory_manager.GetPointer(program_addr); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); constexpr u32 kernel_main_offset = 0; shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, - program_addr, *cpu_addr, host_ptr, std::move(code), + program_addr, *cpu_addr, std::move(code), kernel_main_offset); Register(shader); } @@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - const auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + const auto shader = TryGet(*cpu_addr); ASSERT(shader); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..27c01732f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -113,17 +113,13 @@ namespace Vulkan { class CachedShader final : public RasterizerCacheObject { public: explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); + VAddr cpu_addr, ProgramCode program_code, u32 main_offset); ~CachedShader(); GPUVAddr GetGpuAddr() const { return gpu_addr; } - VAddr GetCpuAddr() const override { - return cpu_addr; - } - std::size_t GetSizeInBytes() const override { return program_code.size() * sizeof(u64); } @@ -149,7 +145,6 @@ private: Tegra::Engines::ShaderType stage); GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; ProgramCode program_code; VideoCommon::Shader::Registry registry; VideoCommon::Shader::ShaderIR shader_ir; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..0a2ea4fd4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } -void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); } -void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } @@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; - const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return false; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..f642dde76 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -118,9 +118,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d2fe4ec5d..0dd7a1196 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -13,13 +13,247 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" +#include "video_core/textures/texture.h" namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; +using Tegra::Shader::StoreType; +using Tegra::Texture::ComponentType; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TICEntry; namespace { + +ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, + std::size_t component) { + const TextureFormat format{descriptor.format}; + switch (format) { + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.b_type; + } + if (component == 3) { + return descriptor.a_type; + } + break; + case TextureFormat::A8R8G8B8: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.r_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.b_type; + } + break; + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.r_type; + } + break; + case TextureFormat::R32_B24G8: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + break; + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + if (component == 0) { + return descriptor.b_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.r_type; + } + break; + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + if (component == 0) { + return descriptor.g_type; + } + if (component == 1) { + return descriptor.r_type; + } + break; + } + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return ComponentType::FLOAT; +} + +bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array<u8, 16> mask = { + 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), + (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask)}.test(component); +} + +u32 GetComponentSize(TextureFormat format, std::size_t component) { + switch (format) { + case TextureFormat::R32_G32_B32_A32: + return 32; + case TextureFormat::R16_G16_B16_A16: + return 16; + case TextureFormat::R32_G32_B32: + return component <= 2 ? 32 : 0; + case TextureFormat::R32_G32: + return component <= 1 ? 32 : 0; + case TextureFormat::R16_G16: + return component <= 1 ? 16 : 0; + case TextureFormat::R32: + return component == 0 ? 32 : 0; + case TextureFormat::R16: + return component == 0 ? 16 : 0; + case TextureFormat::R8: + return component == 0 ? 8 : 0; + case TextureFormat::R1: + return component == 0 ? 1 : 0; + case TextureFormat::A8R8G8B8: + return 8; + case TextureFormat::A2B10G10R10: + return (component == 3 || component == 2 || component == 1) ? 10 : 2; + case TextureFormat::A4B4G4R4: + return 4; + case TextureFormat::A5B5G5R1: + return (component == 0 || component == 1 || component == 2) ? 5 : 1; + case TextureFormat::A1B5G5R5: + return (component == 1 || component == 2 || component == 3) ? 5 : 1; + case TextureFormat::R32_B24G8: + if (component == 0) { + return 32; + } + if (component == 1) { + return 24; + } + if (component == 2) { + return 8; + } + return 0; + case TextureFormat::B5G6R5: + if (component == 0 || component == 2) { + return 5; + } + if (component == 1) { + return 6; + } + return 0; + case TextureFormat::B6G5R5: + if (component == 1 || component == 2) { + return 5; + } + if (component == 0) { + return 6; + } + return 0; + case TextureFormat::G8R24: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G24R8: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G8R8: + return (component == 0 || component == 1) ? 8 : 0; + case TextureFormat::G4R4: + return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return 0; + } +} + +std::size_t GetImageComponentMask(TextureFormat format) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + switch (format) { + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::A8R8G8B8: + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + return std::size_t{R | G | B | A}; + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_B24G8: + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + return std::size_t{R | G | B}; + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + return std::size_t{R | G}; + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return std::size_t{R | G | B | A}; + } +} + std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: @@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { } } // Anonymous namespace +std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, + Node original_value) { + switch (component_type) { + case ComponentType::SNORM: { + // range [-1.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); + cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); + return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; + } + case ComponentType::SINT: + case ComponentType::UNORM: { + bool is_signed = component_type == ComponentType::SINT; + // range [0.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) - 1.f)); + return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), + is_signed}; + } + case ComponentType::UINT: // range [0, (1 << component_size) - 1] + return {std::move(original_value), false}; + case ComponentType::FLOAT: + if (component_size == 16) { + return {Operation(OperationCode::HCastFloat, original_value), true}; + } else { + return {std::move(original_value), true}; + } + default: + UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + return {std::move(original_value), true}; + } +} + u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); @@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { : GetBindlessImage(instr.gpr39, type)}; image.MarkRead(); - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; + if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.suldst.IsComponentEnabled(element)) { + continue; + } + MetaImage meta{image, {}, element}; + Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); + SetTemporary(bb, indexer++, std::move(value)); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { + UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && + instr.suldst.GetStoreDataLayout() != StoreType::Bits64); + + auto descriptor = [this, instr] { + std::optional<Tegra::Engines::SamplerDescriptor> descriptor; + if (instr.suldst.is_immediate) { + descriptor = + registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); + } else { + const Node image_register = GetRegister(instr.gpr39); + const auto [base_image, buffer, offset] = TrackCbuf( + image_register, global_code, static_cast<s64>(global_code.size())); + descriptor = registry.ObtainBindlessSampler(buffer, offset); + } + if (!descriptor) { + UNREACHABLE_MSG("Failed to obtain image descriptor"); + } + return *descriptor; + }(); + + const auto comp_mask = GetImageComponentMask(descriptor.format); + + switch (instr.suldst.GetStoreDataLayout()) { + case StoreType::Bits32: + case StoreType::Bits64: { + u32 indexer = 0; + u32 shifted_counter = 0; + Node value = Immediate(0); + for (u32 element = 0; element < 4; ++element) { + if (!IsComponentEnabled(comp_mask, element)) { + continue; + } + const auto component_type = GetComponentType(descriptor, element); + const auto component_size = GetComponentSize(descriptor.format, element); + MetaImage meta{image, {}, element}; + + auto [converted_value, is_signed] = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); + + // shift element to correct position + const auto shifted = shifted_counter; + if (shifted > 0) { + converted_value = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, + std::move(converted_value), Immediate(shifted)); + } + shifted_counter += component_size; + + // add value into result + value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); + + // if we shifted enough for 1 byte -> we save it into temp + if (shifted_counter >= 32) { + SetTemporary(bb, indexer++, std::move(value)); + // reset counter and value to prepare pack next byte + value = Immediate(0); + shifted_counter = 0; + } + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + default: + UNREACHABLE(); + break; } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index e6edec459..d4f95b18c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -71,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { bb.push_back(Operation(OperationCode::Discard)); break; } - case OpCode::Id::MOV_SYS: { + case OpCode::Id::S2R: { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); + return Immediate(0U); + case SystemVariable::WscaleFactorXY: + UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); + return Immediate(0U); + case SystemVariable::WscaleFactorZ: + UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 48350e042..6c4a1358b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - // Fill empty entries from the guest sampler - const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); - if (type_coord_count != entry_coord_count) { - LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); - - // When the size is higher we insert zeroes - for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { - coords.push_back(GetRegister(Register::ZeroIndex)); - } - - // Then we ensure the size matches the number of entries (dropping unused values) - coords.resize(entry_coord_count); - } - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b047cf870..64ba60ea2 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -10,16 +10,24 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; using Tegra::Shader::VideoType; using Tegra::Shader::VmadShr; +using Tegra::Shader::VmnmxOperation; +using Tegra::Shader::VmnmxType; u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() == OpCode::Id::VMNMX) { + DecodeVMNMX(bb, instr); + return pc; + } + const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); @@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, } } +void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { + UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); + UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); + UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); + UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); + UNIMPLEMENTED_IF(instr.vmnmx.sat); + UNIMPLEMENTED_IF(instr.generates_cc); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node op_c = GetRegister(instr.gpr39); + + const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed + const bool is_oper2_signed = instr.vmnmx.is_dest_signed; + + const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; + Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); + + switch (instr.vmnmx.operation) { + case VmnmxOperation::Mrg_16H: + value = BitfieldInsert(move(op_c), move(value), 16, 16); + break; + case VmnmxOperation::Mrg_16L: + value = BitfieldInsert(move(op_c), move(value), 0, 16); + break; + case VmnmxOperation::Mrg_8B0: + value = BitfieldInsert(move(op_c), move(value), 0, 8); + break; + case VmnmxOperation::Mrg_8B2: + value = BitfieldInsert(move(op_c), move(value), 16, 8); + break; + case VmnmxOperation::Acc: + value = Operation(OperationCode::IAdd, move(value), move(op_c)); + break; + case VmnmxOperation::Min: + value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); + break; + case VmnmxOperation::Max: + value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); + break; + case VmnmxOperation::Nop: + break; + default: + UNREACHABLE(); + break; + } + + SetRegister(bb, instr.gpr0, move(value)); +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index baf7188d2..8852c8a1b 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { switch (cc) { case Tegra::Shader::ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); + case Tegra::Shader::ConditionCode::FCSM_TR: + UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); + return MakeNode<PredicateNode>(Pred::NeverExecute, false); default: UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); return MakeNode<PredicateNode>(Pred::NeverExecute, false); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 80fc9b82c..c6e7bdf50 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -312,6 +312,10 @@ private: /// Conditionally saturates a half float pair Node GetSaturatedHalfFloat(Node value, bool saturate = true); + /// Get image component value by type and size + std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, + u32 component_size, Node original_value); + /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate comparing two integers @@ -350,6 +354,9 @@ private: /// Marks the usage of a input or output attribute. void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); + /// Decodes VMNMX instruction and inserts its code into the passed basic block. + void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); + void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6fe815135..7af0e792c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, MICROPROFILE_SCOPE(GPU_Load_Texture); auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); - - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", @@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - } + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); @@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } - if (!is_continuous) { - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d7882a031..a39a8661b 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -68,8 +68,8 @@ public: return gpu_addr; } - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { @@ -86,21 +86,13 @@ public: return cpu_addr; } - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - } - - CacheAddr GetCacheAddr() const { - return cache_addr; - } - - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + guest_memory_size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -119,14 +111,6 @@ public: return mipmap_sizes[level]; } - void MarkAsContinuous(const bool is_continuous) { - this->is_continuous = is_continuous; - } - - bool IsContinuous() const { - return is_continuous; - } - bool IsLinear() const { return !params.is_tiled; } @@ -175,10 +159,8 @@ protected: std::size_t guest_memory_size; std::size_t host_memory_size; GPUVAddr gpu_addr{}; - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; VAddr cpu_addr{}; - bool is_continuous{}; + VAddr cpu_addr_end{}; bool is_converted{}; std::vector<std::size_t> mipmap_sizes; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 47b2aafbd..6f3ef45be 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta params.height = tic.Height(); params.depth = tic.Depth(); params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { - params.depth = 1; - } else if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.num_levels = tic.max_mip_level + 1; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8f8d659d..88fe3e25f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { - using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; - using IntervalType = typename IntervalMap::interval_type; public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; for (const auto& surface : GetSurfacesInRegion(addr, size)) { @@ -76,7 +74,7 @@ public: guard_samplers = new_guard; } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; auto surfaces = GetSurfacesInRegion(addr, size); @@ -99,9 +97,9 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -110,7 +108,7 @@ public: } const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -124,13 +122,13 @@ public: if (!gpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -159,14 +157,14 @@ public: SetEmptyDepthBuffer(); return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; - auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); + auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; @@ -199,15 +197,15 @@ public: return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyColorBuffer(index); return {}; } auto surface_view = - GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), + GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false, NO_RT); @@ -257,27 +255,26 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; - const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; - const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; - const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; + const std::optional<VAddr> dst_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); std::pair<TSurface, TView> dst_surface = - GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); + GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); std::pair<TSurface, TView> src_surface = - GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); + GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(const u8* host_ptr) { - const CacheAddr cache_addr = ToCacheAddr(host_ptr); - if (!cache_addr) { + TSurface TryFindFramebufferSurface(VAddr addr) { + if (!addr) { return nullptr; } - const CacheAddr page = cache_addr >> registry_page_bits; + const VAddr page = addr >> registry_page_bits; std::vector<TSurface>& list = registry[page]; for (auto& surface : list) { - if (surface->GetCacheAddr() == cache_addr) { + if (surface->GetCpuAddr() == addr) { return surface; } } @@ -338,18 +335,14 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional<VAddr> cpu_addr = system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_ptr || !cpu_addr) { + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuous); - surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); surface->MarkAsRegistered(true); @@ -634,7 +627,7 @@ private: std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, + const VAddr cpu_addr, bool preserve_contents) { if (params.target == SurfaceTarget::Texture3D) { bool failed = false; @@ -659,7 +652,7 @@ private: failed = true; break; } - const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); + const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, @@ -679,7 +672,7 @@ private: } else { for (const auto& surface : overlaps) { if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { if (Settings::values.use_accurate_gpu_emulation) { return std::nullopt; } @@ -688,7 +681,7 @@ private: } return std::nullopt; } - if (surface->GetCacheAddr() != cache_addr) { + if (surface->GetCpuAddr() != cpu_addr) { continue; } if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { @@ -722,13 +715,13 @@ private: * left blank. * @param is_render Whether or not the surface is a render target. **/ - std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, + std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -755,7 +748,7 @@ private: // Step 2 // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { @@ -777,7 +770,7 @@ private: // Check if it's a 3D texture if (params.block_depth > 0) { auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } @@ -852,16 +845,16 @@ private: * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_addr) { + if (!cpu_addr) { Deduction result{}; result.type = DeductionType::DeductionFailed; return result; } - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -880,7 +873,7 @@ private: } const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; if (overlaps.empty()) { Deduction result{}; @@ -1024,10 +1017,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache[cache_addr] = surface; + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache[cpu_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -1035,10 +1028,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cache_addr); + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cpu_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -1046,18 +1039,18 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { + std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } - const CacheAddr cache_addr_end = cache_addr + size; - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; + const VAddr cpu_addr_end = cpu_addr + size; + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; std::vector<TSurface> surfaces; while (start <= end) { std::vector<TSurface>& list = registry[start]; for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { surface->MarkAsPicked(true); surfaces.push_back(surface); } @@ -1146,14 +1139,14 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map<CacheAddr, std::vector<TSurface>> registry; + std::unordered_map<VAddr, std::vector<TSurface>> registry; static constexpr u32 DEPTH_RT = 8; static constexpr u32 NO_RT = 0xFFFFFFFF; // The L1 Cache is used for fast texture lookup before checking the overlaps // This avoids calculating size and other stuffs. - std::unordered_map<CacheAddr, TSurface> l1_cache; + std::unordered_map<VAddr, TSurface> l1_cache; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp new file mode 100644 index 000000000..d1939d744 --- /dev/null +++ b/src/video_core/textures/texture.cpp @@ -0,0 +1,80 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> + +#include "core/settings.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Texture { + +namespace { + +constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { + 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f, + 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f, + 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f, + 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f, + 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f, + 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f, + 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f, + 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f, + 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f, + 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f, + 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f, + 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f, + 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f, + 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f, + 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f, + 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f, + 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f, + 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f, + 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f, + 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f, + 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f, + 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f, + 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f, + 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f, + 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f, + 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f, + 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f, + 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f, + 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f, + 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f, + 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f, + 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, +}; + +unsigned SettingsMinimumAnisotropy() noexcept { + switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { + default: + case Anisotropy::Default: + return 1U; + case Anisotropy::Filter2x: + return 2U; + case Anisotropy::Filter4x: + return 4U; + case Anisotropy::Filter8x: + return 8U; + case Anisotropy::Filter16x: + return 16U; + } +} + +} // Anonymous namespace + +std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { + if (!srgb_conversion) { + return border_color; + } + return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], + SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; +} + +float TSCEntry::GetMaxAnisotropy() const noexcept { + return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); +} + +} // namespace Tegra::Texture diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 7edc4abe1..eba05aced 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" -#include "core/settings.h" namespace Tegra::Texture { @@ -132,6 +131,20 @@ enum class SwizzleSource : u32 { OneFloat = 7, }; +enum class MsaaMode : u32 { + Msaa1x1 = 0, + Msaa2x1 = 1, + Msaa2x2 = 2, + Msaa4x2 = 3, + Msaa4x2_D3D = 4, + Msaa2x1_D3D = 5, + Msaa4x4 = 6, + Msaa2x2_VC4 = 8, + Msaa2x2_VC12 = 9, + Msaa4x2_VC8 = 10, + Msaa4x2_VC24 = 11, +}; + union TextureHandle { TextureHandle(u32 raw) : raw{raw} {} @@ -198,6 +211,7 @@ struct TICEntry { union { BitField<0, 4, u32> res_min_mip_level; BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; BitField<12, 12, u32> min_lod_clamp; }; @@ -336,24 +350,9 @@ struct TSCEntry { std::array<u8, 0x20> raw; }; - float GetMaxAnisotropy() const { - const u32 min_value = [] { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } - }(); - return static_cast<float>(std::max(1U << max_anisotropy, min_value)); - } + std::array<float, 4> GetBorderColor() const noexcept; + + float GetMaxAnisotropy() const noexcept; float GetMinLod() const { return static_cast<float>(min_lod_clamp) / 256.0f; @@ -368,15 +367,6 @@ struct TSCEntry { constexpr u32 mask = 1U << (13 - 1); return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } - - std::array<float, 4> GetBorderColor() const { - if (srgb_conversion) { - return {static_cast<float>(srgb_border_color_r) / 255.0f, - static_cast<float>(srgb_border_color_g) / 255.0f, - static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; - } - return border_color; - } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index d34b47b3f..8b9404718 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +if (ENABLE_VULKAN AND NOT WIN32) + target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) +endif() + target_compile_definitions(yuzu PRIVATE # Use QStringBuilder for string concatenation to reduce # the overall number of temporary strings created. diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp index d39b3f07a..695b2ef5f 100644 --- a/src/yuzu/about_dialog.cpp +++ b/src/yuzu/about_dialog.cpp @@ -3,15 +3,22 @@ // Refer to the license.txt file included. #include <QIcon> +#include <fmt/format.h> #include "common/scm_rev.h" #include "ui_aboutdialog.h" #include "yuzu/about_dialog.h" AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { + const auto build_id = std::string(Common::g_build_id); + const auto fmt = std::string(Common::g_title_bar_format_idle); + const auto yuzu_build_version = + fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{}, + std::string{}, std::string{}, std::string{}, build_id); + ui->setupUi(this); ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( - QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch), + QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch), QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); } diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index eaded2640..1cac2f942 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -14,8 +14,9 @@ #include <QScreen> #include <QStringList> #include <QWindow> -#ifdef HAS_VULKAN -#include <QVulkanWindow> + +#if !defined(WIN32) && HAS_VULKAN +#include <qpa/qplatformnativeinterface.h> #endif #include <fmt/format.h> @@ -224,7 +225,6 @@ public: } context->MakeCurrent(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (Core::System::GetInstance().Renderer().TryPresent(100)) { context->SwapBuffers(); glFinish(); @@ -238,16 +238,50 @@ private: #ifdef HAS_VULKAN class VulkanRenderWidget : public RenderWidget { public: - explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance) - : RenderWidget(parent) { + explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { windowHandle()->setSurfaceType(QWindow::VulkanSurface); - windowHandle()->setVulkanInstance(instance); } }; #endif -GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread) - : QWidget(parent_), emu_thread(emu_thread) { +static Core::Frontend::WindowSystemType GetWindowSystemType() { + // Determine WSI type based on Qt platform. + QString platform_name = QGuiApplication::platformName(); + if (platform_name == QStringLiteral("windows")) + return Core::Frontend::WindowSystemType::Windows; + else if (platform_name == QStringLiteral("xcb")) + return Core::Frontend::WindowSystemType::X11; + else if (platform_name == QStringLiteral("wayland")) + return Core::Frontend::WindowSystemType::Wayland; + + LOG_CRITICAL(Frontend, "Unknown Qt platform!"); + return Core::Frontend::WindowSystemType::Windows; +} + +static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { + Core::Frontend::EmuWindow::WindowSystemInfo wsi; + wsi.type = GetWindowSystemType(); + +#ifdef HAS_VULKAN + // Our Win32 Qt external doesn't have the private API. +#if defined(WIN32) || defined(__APPLE__) + wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; +#else + QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); + wsi.display_connection = pni->nativeResourceForWindow("display", window); + if (wsi.type == Core::Frontend::WindowSystemType::Wayland) + wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; + else + wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; +#endif + wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; +#endif + + return wsi; +} + +GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_) + : QWidget(parent_), emu_thread(emu_thread_) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") .arg(QString::fromUtf8(Common::g_build_name), QString::fromUtf8(Common::g_scm_branch), @@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() { break; } + // Update the Window System information with the new render target + window_info = GetWindowSystemInfo(child_widget->windowHandle()); + child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); layout()->addWidget(child_widget); // Reset minimum required size to avoid resizing issues on the main window after restarting. @@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() { bool GRenderWindow::InitializeVulkan() { #ifdef HAS_VULKAN - vk_instance = std::make_unique<QVulkanInstance>(); - vk_instance->setApiVersion(QVersionNumber(1, 1, 0)); - vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect); - if (Settings::values.renderer_debug) { - const auto supported_layers{vk_instance->supportedLayers()}; - const bool found = - std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) { - constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation"; - return layer.name == searched_layer; - }); - if (found) { - vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation"); - vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - } - if (!vk_instance->create()) { - QMessageBox::critical( - this, tr("Error while initializing Vulkan 1.1!"), - tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the " - "latest graphics drivers.")); - return false; - } - - auto child = new VulkanRenderWidget(this, vk_instance.get()); + auto child = new VulkanRenderWidget(this); child_widget = child; child_widget->windowHandle()->create(); main_context = std::make_unique<DummyContext>(); @@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() { #endif } -void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { -#ifdef HAS_VULKAN - const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); - const VkInstance instance_copy = vk_instance->vkInstance(); - const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle()); - - std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); - std::memcpy(instance, &instance_copy, sizeof(instance_copy)); - std::memcpy(surface, &surface_copy, sizeof(surface_copy)); -#else - UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan"); -#endif -} - bool GRenderWindow::LoadOpenGL() { auto context = CreateSharedContext(); auto scope = context->Acquire(); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index d69078df1..3626604ca 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -22,9 +22,6 @@ class GMainWindow; class QKeyEvent; class QTouchEvent; class QStringList; -#ifdef HAS_VULKAN -class QVulkanInstance; -#endif namespace VideoCore { enum class LoadCallbackStage; @@ -122,8 +119,6 @@ public: // EmuWindow implementation. void PollEvents() override; bool IsShown() const override; - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; void BackupGeometry(); @@ -186,10 +181,6 @@ private: // should instead be shared from std::shared_ptr<Core::Frontend::GraphicsContext> main_context; -#ifdef HAS_VULKAN - std::unique_ptr<QVulkanInstance> vk_instance; -#endif - /// Temporary storage of the screenshot taken QImage screenshot_image; diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index a821c7b3c..ea667caef 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -15,6 +15,10 @@ #include "ui_configure_graphics.h" #include "yuzu/configuration/configure_graphics.h" +#ifdef HAS_VULKAN +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#endif + namespace { enum class Resolution : int { Auto, @@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() { void ConfigureGraphics::RetrieveVulkanDevices() { #ifdef HAS_VULKAN - QVulkanInstance instance; - instance.setApiVersion(QVersionNumber(1, 1, 0)); - if (!instance.create()) { - LOG_INFO(Frontend, "Vulkan 1.1 not available"); - return; - } - const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( - instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))}; - if (vkEnumeratePhysicalDevices == nullptr) { - LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices"); - return; - } - u32 physical_device_count; - if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) != - VK_SUCCESS) { - LOG_INFO(Frontend, "Failed to get physical devices count"); - return; - } - std::vector<VkPhysicalDevice> physical_devices(physical_device_count); - if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, - physical_devices.data()) != VK_SUCCESS) { - LOG_INFO(Frontend, "Failed to get physical devices"); - return; - } - - const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>( - instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))}; - if (vkGetPhysicalDeviceProperties == nullptr) { - LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties"); - return; - } - for (const auto physical_device : physical_devices) { - VkPhysicalDeviceProperties properties; - vkGetPhysicalDeviceProperties(physical_device, &properties); - vulkan_devices.push_back(QString::fromUtf8(properties.deviceName)); + vulkan_devices.clear(); + for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { + vulkan_devices.push_back(QString::fromStdString(name)); } #endif } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 96dec50e2..15ac30f12 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick( button->setText(tr("[press key]")); button->setFocus(); - const auto iter = std::find(button_map.begin(), button_map.end(), button); - ASSERT(iter != button_map.end()); - const auto index = std::distance(button_map.begin(), iter); - ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + // Keyboard keys can only be used as button devices + want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; + if (want_keyboard_keys) { + const auto iter = std::find(button_map.begin(), button_map.end(), button); + ASSERT(iter != button_map.end()); + const auto index = std::distance(button_map.begin(), iter); + ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + } input_setter = new_input_setter; device_pollers = InputCommon::Polling::GetPollers(type); - // Keyboard keys can only be used as button devices - want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; - for (auto& poller : device_pollers) { poller->Start(); } diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index c3a1b68f0..4b37746a1 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui @@ -927,7 +927,7 @@ </item> </layout> </item> - <item row="2" column="0"> + <item row="0" column="2"> <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout"> <item> <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout"> @@ -949,7 +949,7 @@ </item> </layout> </item> - <item row="2" column="1"> + <item row="1" column="2"> <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout"> <item> <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout"> diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp index ab3a11d30..0e0e8f113 100644 --- a/src/yuzu/configuration/configure_input_simple.cpp +++ b/src/yuzu/configuration/configure_input_simple.cpp @@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) { // - Open any dialogs // - Block in any way +constexpr std::size_t PLAYER_0_INDEX = 0; constexpr std::size_t HANDHELD_INDEX = 8; void HandheldOnProfileSelect() { @@ -53,8 +54,8 @@ void HandheldOnProfileSelect() { } void DualJoyconsDockedOnProfileSelect() { - Settings::values.players[0].connected = true; - Settings::values.players[0].type = Settings::ControllerType::DualJoycon; + Settings::values.players[PLAYER_0_INDEX].connected = true; + Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon; for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { Settings::values.players[player].connected = false; @@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() { Settings::values.keyboard_enabled = false; Settings::values.mouse_enabled = false; Settings::values.debug_pad_enabled = false; - Settings::values.touchscreen.enabled = false; + Settings::values.touchscreen.enabled = true; } // Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure @@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{ }}, {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, [](ConfigureInputSimple* caller) { - CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false); + CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false); }}, {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, }}; diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index 0a4abe34f..e0647ea5b 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp @@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick( button->setText(tr("[press key]")); button->setFocus(); - const auto iter = std::find(button_map.begin(), button_map.end(), button); - ASSERT(iter != button_map.end()); - const auto index = std::distance(button_map.begin(), iter); - ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + // Keyboard keys can only be used as button devices + want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; + if (want_keyboard_keys) { + const auto iter = std::find(button_map.begin(), button_map.end(), button); + ASSERT(iter != button_map.end()); + const auto index = std::distance(button_map.begin(), iter); + ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); + } input_setter = new_input_setter; device_pollers = InputCommon::Polling::GetPollers(type); - // Keyboard keys can only be used as button devices - want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; - for (auto& poller : device_pollers) { poller->Start(); } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index a2b88c787..dccbabcbf 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); } - item_model->setSortRole(GameListItemPath::TitleRole); + item_model->setSortRole(GameListItemPath::SortRole); connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); @@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) { if (children_total > 0) { search_field->setFocus(); } + item_model->sort(tree_view->header()->sortIndicatorSection(), + tree_view->header()->sortIndicatorOrder()); } void GameList::PopupContextMenu(const QPoint& menu_location) { @@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() { // so make it as large as possible as default. header->resizeSection(COLUMN_NAME, header->width()); } - - item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); } const QStringList GameList::supported_file_extensions = { diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 7cde72d1b..3e6d5a7cd 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -65,10 +65,10 @@ public: */ class GameListItemPath : public GameListItem { public: - static const int TitleRole = SortRole; - static const int FullPathRole = SortRole + 1; - static const int ProgramIdRole = SortRole + 2; - static const int FileTypeRole = SortRole + 3; + static const int TitleRole = SortRole + 1; + static const int FullPathRole = SortRole + 2; + static const int ProgramIdRole = SortRole + 3; + static const int FileTypeRole = SortRole + 4; GameListItemPath() = default; GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, @@ -95,7 +95,7 @@ public: } QVariant data(int role) const override { - if (role == Qt::DisplayRole) { + if (role == Qt::DisplayRole || role == SortRole) { std::string filename; Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr); @@ -110,6 +110,9 @@ public: const auto& row1 = row_data.at(UISettings::values.row_1_text_id); const int row2_id = UISettings::values.row_2_text_id; + if (role == SortRole) + return row1.toLower(); + if (row2_id == 4) // None return row1; @@ -123,6 +126,13 @@ public: return GameListItem::data(role); } + + /** + * Override to prevent automatic sorting. + */ + bool operator<(const QStandardItem& other) const override { + return false; + } }; class GameListItemCompat : public GameListItem { @@ -289,6 +299,10 @@ public: int type() const override { return static_cast<int>(GameListItemType::AddDir); } + + bool operator<(const QStandardItem& other) const override { + return false; + } }; class GameList; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 940f24dc8..1717e06f9 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -205,7 +205,13 @@ GMainWindow::GMainWindow() ConnectMenuEvents(); ConnectWidgetEvents(); - LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, + const auto build_id = std::string(Common::g_build_id); + const auto fmt = std::string(Common::g_title_bar_format_idle); + const auto yuzu_build_version = + fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{}, + std::string{}, std::string{}, std::string{}, build_id); + + LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch, Common::g_scm_desc); #ifdef ARCHITECTURE_x86_64 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 3522dcf6d..411e7e647 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { SDL_GL_DeleteContext(window_context); } -void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { - // Should not have been called from OpenGL - UNREACHABLE(); -} - std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { return std::make_unique<SDLGLContext>(); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index e092021d7..48bb41683 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h @@ -15,10 +15,6 @@ public: void Present() override; - /// Ignored in OpenGL - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; - std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index 46d053f04..f2990910e 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -2,102 +2,62 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> +#include <cstdlib> +#include <memory> #include <string> -#include <vector> -#include <SDL.h> -#include <SDL_vulkan.h> + #include <fmt/format.h> -#include <vulkan/vulkan.h> + #include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "core/settings.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" +// Include these late to avoid polluting everything with Xlib macros +#include <SDL.h> +#include <SDL_syswm.h> + EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) : EmuWindow_SDL2{system, fullscreen} { - if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { - LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); - exit(EXIT_FAILURE); - } - - vkGetInstanceProcAddr = - reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr()); - if (vkGetInstanceProcAddr == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - exit(EXIT_FAILURE); - } - const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc); render_window = - SDL_CreateWindow(window_title.c_str(), - SDL_WINDOWPOS_UNDEFINED, // x position - SDL_WINDOWPOS_UNDEFINED, // y position + SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, - SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); - - const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr); - - u32 extra_ext_count{}; - if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) { - LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}", - SDL_GetError()); - exit(1); - } - - auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count); - if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) { - LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError()); - exit(1); - } - std::vector<const char*> enabled_extensions; - enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(), - extra_ext_names.get() + extra_ext_count); - - std::vector<const char*> enabled_layers; - if (use_standard_layers) { - enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation"); - } - - VkApplicationInfo app_info{}; - app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - app_info.apiVersion = VK_API_VERSION_1_1; - app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.pApplicationName = "yuzu-emu"; - app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.pEngineName = "yuzu-emu"; + SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); - VkInstanceCreateInfo instance_ci{}; - instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - instance_ci.pApplicationInfo = &app_info; - instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); - instance_ci.ppEnabledExtensionNames = enabled_extensions.data(); - if (Settings::values.renderer_debug) { - instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size()); - instance_ci.ppEnabledLayerNames = enabled_layers.data(); + SDL_SysWMinfo wm; + if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { + LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); + std::exit(EXIT_FAILURE); } - const auto vkCreateInstance = - reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); - if (vkCreateInstance == nullptr || - vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); - exit(EXIT_FAILURE); - } - - vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( - vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); - if (vkDestroyInstance == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - exit(EXIT_FAILURE); - } - - if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { - LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); - exit(EXIT_FAILURE); + switch (wm.subsystem) { +#ifdef SDL_VIDEO_DRIVER_WINDOWS + case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: + window_info.type = Core::Frontend::WindowSystemType::Windows; + window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_X11 + case SDL_SYSWM_TYPE::SDL_SYSWM_X11: + window_info.type = Core::Frontend::WindowSystemType::X11; + window_info.display_connection = wm.info.x11.display; + window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_WAYLAND + case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND: + window_info.type = Core::Frontend::WindowSystemType::Wayland; + window_info.display_connection = wm.info.wl.display; + window_info.render_surface = wm.info.wl.surface; + break; +#endif + default: + LOG_CRITICAL(Frontend, "Window manager subsystem not implemented"); + std::exit(EXIT_FAILURE); } OnResize(); @@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) Common::g_scm_branch, Common::g_scm_desc); } -EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { - vkDestroyInstance(vk_instance, nullptr); -} - -void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const { - const auto instance_proc_addr = vkGetInstanceProcAddr; - std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); - std::memcpy(instance, &vk_instance, sizeof(vk_instance)); - std::memcpy(surface, &vk_surface, sizeof(vk_surface)); -} +EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { return nullptr; } -bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const { - if (!Settings::values.renderer_debug) { - return false; - } - - const auto vkEnumerateInstanceLayerProperties = - reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>( - vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties")); - if (vkEnumerateInstanceLayerProperties == nullptr) { - LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); - return false; - } - - u32 available_layers_count{}; - if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); - return false; - } - std::vector<VkLayerProperties> layers(available_layers_count); - if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) { - LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); - return false; - } - - return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) { - return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); - }) != layers.end(); -} - void EmuWindow_SDL2_VK::Present() { // TODO (bunnei): ImplementMe } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index 3dd1f3f61..b8021ebea 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h @@ -4,27 +4,21 @@ #pragma once -#include <vulkan/vulkan.h> +#include <memory> + #include "core/frontend/emu_window.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" +namespace Core { +class System; +} + class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { public: explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); ~EmuWindow_SDL2_VK(); void Present() override; - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; - -private: - bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const; - - VkInstance vk_instance{}; - VkSurfaceKHR vk_surface{}; - - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; - PFN_vkDestroyInstance vkDestroyInstance{}; }; diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index a837430cc..8584f6671 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp @@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const { return false; } -void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const { - UNREACHABLE(); -} - class SDLGLContext : public Core::Frontend::GraphicsContext { public: explicit SDLGLContext() { diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index 9f5d04fca..c13a82df2 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h @@ -19,10 +19,6 @@ public: /// Whether the screen is being shown or not. bool IsShown() const override; - /// Retrieves Vulkan specific handlers from the window - void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, - void* surface) const override; - std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: |