diff options
27 files changed, 527 insertions, 317 deletions
diff --git a/.ci/scripts/windows/upload.ps1 b/.ci/scripts/windows/upload.ps1 index 21abcd752..492763420 100644 --- a/.ci/scripts/windows/upload.ps1 +++ b/.ci/scripts/windows/upload.ps1 @@ -26,7 +26,11 @@ $env:BUILD_ZIP = $MSVC_BUILD_ZIP $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB $env:BUILD_UPDATE = $MSVC_SEVENZIP -$BUILD_DIR = ".\build\bin\Release" +if (Test-Path -Path ".\build\bin\Release") { + $BUILD_DIR = ".\build\bin\Release" +} else { + $BUILD_DIR = ".\build\bin\" +} # Cleanup unneeded data in submodules git submodule foreach git clean -fxd diff --git a/CMakeLists.txt b/CMakeLists.txt index 561eaafb2..7276ac9dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -453,6 +453,7 @@ endif() # List of all FFmpeg components required set(FFmpeg_COMPONENTS avcodec + avfilter avutil swscale) diff --git a/CMakeModules/CopyYuzuFFmpegDeps.cmake b/CMakeModules/CopyYuzuFFmpegDeps.cmake index c6231737e..7aaa073ee 100644 --- a/CMakeModules/CopyYuzuFFmpegDeps.cmake +++ b/CMakeModules/CopyYuzuFFmpegDeps.cmake @@ -3,7 +3,7 @@ function(copy_yuzu_FFmpeg_deps target_dir) include(WindowsCopyFiles) - set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/") + set(DLL_DEST "$<TARGET_FILE_DIR:${target_dir}>/") file(READ "${FFmpeg_PATH}/requirements.txt" FFmpeg_REQUIRED_DLLS) string(STRIP "${FFmpeg_REQUIRED_DLLS}" FFmpeg_REQUIRED_DLLS) windows_copy_files(${target_dir} ${FFmpeg_DLL_DIR} ${DLL_DEST} ${FFmpeg_REQUIRED_DLLS}) diff --git a/CMakeModules/CopyYuzuQt5Deps.cmake b/CMakeModules/CopyYuzuQt5Deps.cmake index ab56de444..b3a65c347 100644 --- a/CMakeModules/CopyYuzuQt5Deps.cmake +++ b/CMakeModules/CopyYuzuQt5Deps.cmake @@ -4,7 +4,7 @@ function(copy_yuzu_Qt5_deps target_dir) include(WindowsCopyFiles) if (MSVC) - set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/") + set(DLL_DEST "$<TARGET_FILE_DIR:${target_dir}>/") set(Qt5_DLL_DIR "${Qt5_DIR}/../../../bin") else() set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/") diff --git a/CMakeModules/CopyYuzuSDLDeps.cmake b/CMakeModules/CopyYuzuSDLDeps.cmake index 7ffdd8a1d..464eed5e9 100644 --- a/CMakeModules/CopyYuzuSDLDeps.cmake +++ b/CMakeModules/CopyYuzuSDLDeps.cmake @@ -3,6 +3,6 @@ function(copy_yuzu_SDL_deps target_dir) include(WindowsCopyFiles) - set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/") + set(DLL_DEST "$<TARGET_FILE_DIR:${target_dir}>/") windows_copy_files(${target_dir} ${SDL2_DLL_DIR} ${DLL_DEST} SDL2.dll) endfunction(copy_yuzu_SDL_deps) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index e59eeb489..d78d10147 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -8,15 +8,21 @@ set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) # Disable tests in all externals supporting the standard option name set(BUILD_TESTING OFF) +# Build only static externals +set(BUILD_SHARED_LIBS OFF) + +# Skip install rules for all externals +set_directory_properties(PROPERTIES EXCLUDE_FROM_ALL ON) + # xbyak if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak) - add_subdirectory(xbyak EXCLUDE_FROM_ALL) + add_subdirectory(xbyak) endif() # Dynarmic if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) set(DYNARMIC_IGNORE_ASSERTS ON) - add_subdirectory(dynarmic EXCLUDE_FROM_ALL) + add_subdirectory(dynarmic) add_library(dynarmic::dynarmic ALIAS dynarmic) endif() @@ -34,7 +40,7 @@ if (NOT TARGET inih::INIReader) endif() # mbedtls -add_subdirectory(mbedtls EXCLUDE_FROM_ALL) +add_subdirectory(mbedtls) target_include_directories(mbedtls PUBLIC ./mbedtls/include) # MicroProfile @@ -48,7 +54,7 @@ endif() # libusb if (ENABLE_LIBUSB AND NOT TARGET libusb::usb) - add_subdirectory(libusb EXCLUDE_FROM_ALL) + add_subdirectory(libusb) endif() # SDL2 @@ -67,18 +73,16 @@ if (YUZU_USE_EXTERNAL_SDL2) set(HIDAPI ON) endif() - set(SDL_STATIC ON) - set(SDL_SHARED OFF) if (APPLE) set(SDL_FILE ON) endif() - add_subdirectory(SDL EXCLUDE_FROM_ALL) + add_subdirectory(SDL) endif() # ENet if (NOT TARGET enet::enet) - add_subdirectory(enet EXCLUDE_FROM_ALL) + add_subdirectory(enet) target_include_directories(enet INTERFACE ./enet/include) add_library(enet::enet ALIAS enet) endif() @@ -86,24 +90,26 @@ endif() # Cubeb if (ENABLE_CUBEB AND NOT TARGET cubeb::cubeb) set(BUILD_TESTS OFF) - add_subdirectory(cubeb EXCLUDE_FROM_ALL) + set(BUILD_TOOLS OFF) + add_subdirectory(cubeb) add_library(cubeb::cubeb ALIAS cubeb) endif() # DiscordRPC if (USE_DISCORD_PRESENCE AND NOT TARGET DiscordRPC::discord-rpc) - add_subdirectory(discord-rpc EXCLUDE_FROM_ALL) + set(BUILD_EXAMPLES OFF) + add_subdirectory(discord-rpc) target_include_directories(discord-rpc INTERFACE ./discord-rpc/include) add_library(DiscordRPC::discord-rpc ALIAS discord-rpc) endif() # Sirit -add_subdirectory(sirit EXCLUDE_FROM_ALL) +add_subdirectory(sirit) # httplib if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib) set(HTTPLIB_REQUIRE_OPENSSL ON) - add_subdirectory(cpp-httplib EXCLUDE_FROM_ALL) + add_subdirectory(cpp-httplib) endif() # cpp-jwt @@ -111,12 +117,12 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET cpp-jwt::cpp-jwt) set(CPP_JWT_BUILD_EXAMPLES OFF) set(CPP_JWT_BUILD_TESTS OFF) set(CPP_JWT_USE_VENDORED_NLOHMANN_JSON OFF) - add_subdirectory(cpp-jwt EXCLUDE_FROM_ALL) + add_subdirectory(cpp-jwt) endif() # Opus if (NOT TARGET Opus::opus) - add_subdirectory(opus EXCLUDE_FROM_ALL) + add_subdirectory(opus) endif() # FFMpeg @@ -130,16 +136,14 @@ endif() # Vulkan-Headers if (YUZU_USE_EXTERNAL_VULKAN_HEADERS) - add_subdirectory(Vulkan-Headers EXCLUDE_FROM_ALL) + add_subdirectory(Vulkan-Headers) endif() if (NOT TARGET LLVM::Demangle) - add_library(demangle STATIC) + add_library(demangle demangle/ItaniumDemangle.cpp) target_include_directories(demangle PUBLIC ./demangle) - target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp) add_library(LLVM::Demangle ALIAS demangle) endif() -add_library(stb STATIC) +add_library(stb stb/stb_dxt.cpp) target_include_directories(stb PUBLIC ./stb) -target_sources(stb PRIVATE stb/stb_dxt.cpp) diff --git a/externals/ffmpeg/CMakeLists.txt b/externals/ffmpeg/CMakeLists.txt index 0baac8e17..03fad0778 100644 --- a/externals/ffmpeg/CMakeLists.txt +++ b/externals/ffmpeg/CMakeLists.txt @@ -131,7 +131,6 @@ if (NOT WIN32) COMMAND /bin/bash ${FFmpeg_PREFIX}/configure --disable-avdevice - --disable-avfilter --disable-avformat --disable-doc --disable-everything @@ -143,6 +142,7 @@ if (NOT WIN32) --enable-decoder=h264 --enable-decoder=vp8 --enable-decoder=vp9 + --enable-filter=yadif --cc="${FFmpeg_CC}" --cxx="${FFmpeg_CXX}" ${FFmpeg_HWACCEL_FLAGS} @@ -199,7 +199,7 @@ if (NOT WIN32) endif() else(WIN32) # Use yuzu FFmpeg binaries - set(FFmpeg_EXT_NAME "ffmpeg-4.4") + set(FFmpeg_EXT_NAME "ffmpeg-5.1.3") set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}") download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "") set(FFmpeg_FOUND YES) @@ -210,6 +210,7 @@ else(WIN32) set(FFmpeg_LIBRARIES ${FFmpeg_LIBRARY_DIR}/swscale.lib ${FFmpeg_LIBRARY_DIR}/avcodec.lib + ${FFmpeg_LIBRARY_DIR}/avfilter.lib ${FFmpeg_LIBRARY_DIR}/avutil.lib CACHE PATH "Paths to FFmpeg libraries" FORCE) # exported variables diff --git a/externals/glad/CMakeLists.txt b/externals/glad/CMakeLists.txt index 3dfcac2fd..0c8e285a4 100644 --- a/externals/glad/CMakeLists.txt +++ b/externals/glad/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2015 Yuri Kunde Schlesner <yuriks@yuriks.net> # SPDX-License-Identifier: GPL-2.0-or-later -add_library(glad STATIC +add_library(glad src/glad.c include/KHR/khrplatform.h include/glad/glad.h diff --git a/externals/libusb/CMakeLists.txt b/externals/libusb/CMakeLists.txt index 6317ea807..6757b59da 100644 --- a/externals/libusb/CMakeLists.txt +++ b/externals/libusb/CMakeLists.txt @@ -122,7 +122,7 @@ else() # MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") add_compile_options(/utf-8) endif() - add_library(usb STATIC EXCLUDE_FROM_ALL + add_library(usb libusb/libusb/core.c libusb/libusb/core.c libusb/libusb/descriptor.c diff --git a/externals/opus/CMakeLists.txt b/externals/opus/CMakeLists.txt index 410ff7c08..d9a03423d 100644 --- a/externals/opus/CMakeLists.txt +++ b/externals/opus/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() endif() -add_library(opus STATIC +add_library(opus # CELT sources opus/celt/bands.c opus/celt/celt.c diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index 9bbb54162..2331aaff9 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -273,6 +273,9 @@ void SinkStream::WaitFreeSpace() { std::unique_lock lk{release_mutex}; release_cv.wait_for(lk, std::chrono::milliseconds(5), [this]() { return queued_buffers < max_queue_size; }); + if (queued_buffers > max_queue_size + 3) { + release_cv.wait(lk, [this]() { return queued_buffers < max_queue_size; }); + } } } // namespace AudioCore::Sink diff --git a/src/core/core.cpp b/src/core/core.cpp index b5f62690e..4406ae30e 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -117,8 +117,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, return nullptr; } - return FileSys::ConcatenatedVfsFile::MakeConcatenatedFile(std::move(concat), - dir->GetName()); + return FileSys::ConcatenatedVfsFile::MakeConcatenatedFile(concat, dir->GetName()); } if (Common::FS::IsDir(path)) { diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp index ddcfe5980..fb5683a6b 100644 --- a/src/core/file_sys/romfs.cpp +++ b/src/core/file_sys/romfs.cpp @@ -140,7 +140,8 @@ VirtualFile CreateRomFS(VirtualDir dir, VirtualDir ext) { return nullptr; RomFSBuildContext ctx{dir, ext}; - return ConcatenatedVfsFile::MakeConcatenatedFile(0, ctx.Build(), dir->GetName()); + auto file_map = ctx.Build(); + return ConcatenatedVfsFile::MakeConcatenatedFile(0, file_map, dir->GetName()); } } // namespace FileSys diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index d23623aa0..853b893a1 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -10,84 +10,105 @@ namespace FileSys { -static bool VerifyConcatenationMapContinuity(const std::multimap<u64, VirtualFile>& map) { - const auto last_valid = --map.end(); - for (auto iter = map.begin(); iter != last_valid;) { - const auto old = iter++; - if (old->first + old->second->GetSize() != iter->first) { +ConcatenatedVfsFile::ConcatenatedVfsFile(ConcatenationMap&& concatenation_map_, std::string&& name_) + : concatenation_map(std::move(concatenation_map_)), name(std::move(name_)) { + DEBUG_ASSERT(this->VerifyContinuity()); +} + +bool ConcatenatedVfsFile::VerifyContinuity() const { + u64 last_offset = 0; + for (auto& entry : concatenation_map) { + if (entry.offset != last_offset) { return false; } - } - - return map.begin()->first == 0; -} -ConcatenatedVfsFile::ConcatenatedVfsFile(std::vector<VirtualFile> files_, std::string name_) - : name(std::move(name_)) { - std::size_t next_offset = 0; - for (const auto& file : files_) { - files.emplace(next_offset, file); - next_offset += file->GetSize(); + last_offset = entry.offset + entry.file->GetSize(); } -} -ConcatenatedVfsFile::ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files_, std::string name_) - : files(std::move(files_)), name(std::move(name_)) { - ASSERT(VerifyConcatenationMapContinuity(files)); + return true; } ConcatenatedVfsFile::~ConcatenatedVfsFile() = default; -VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> files, - std::string name) { - if (files.empty()) +VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(const std::vector<VirtualFile>& files, + std::string&& name) { + // Fold trivial cases. + if (files.empty()) { return nullptr; - if (files.size() == 1) - return files[0]; + } + if (files.size() == 1) { + return files.front(); + } + + // Make the concatenation map from the input. + std::vector<ConcatenationEntry> concatenation_map; + concatenation_map.reserve(files.size()); + u64 last_offset = 0; + + for (auto& file : files) { + concatenation_map.emplace_back(ConcatenationEntry{ + .offset = last_offset, + .file = file, + }); + + last_offset += file->GetSize(); + } - return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(concatenation_map), std::move(name))); } VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, - std::multimap<u64, VirtualFile> files, - std::string name) { - if (files.empty()) + const std::multimap<u64, VirtualFile>& files, + std::string&& name) { + // Fold trivial cases. + if (files.empty()) { return nullptr; - if (files.size() == 1) + } + if (files.size() == 1) { return files.begin()->second; + } - const auto last_valid = --files.end(); - for (auto iter = files.begin(); iter != last_valid;) { - const auto old = iter++; - if (old->first + old->second->GetSize() != iter->first) { - files.emplace(old->first + old->second->GetSize(), - std::make_shared<StaticVfsFile>(filler_byte, iter->first - old->first - - old->second->GetSize())); + // Make the concatenation map from the input. + std::vector<ConcatenationEntry> concatenation_map; + + concatenation_map.reserve(files.size()); + u64 last_offset = 0; + + // Iteration of a multimap is ordered, so offset will be strictly non-decreasing. + for (auto& [offset, file] : files) { + if (offset > last_offset) { + concatenation_map.emplace_back(ConcatenationEntry{ + .offset = last_offset, + .file = std::make_shared<StaticVfsFile>(filler_byte, offset - last_offset), + }); } - } - // Ensure the map starts at offset 0 (start of file), otherwise pad to fill. - if (files.begin()->first != 0) - files.emplace(0, std::make_shared<StaticVfsFile>(filler_byte, files.begin()->first)); + concatenation_map.emplace_back(ConcatenationEntry{ + .offset = offset, + .file = file, + }); + + last_offset = offset + file->GetSize(); + } - return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(concatenation_map), std::move(name))); } std::string ConcatenatedVfsFile::GetName() const { - if (files.empty()) { + if (concatenation_map.empty()) { return ""; } if (!name.empty()) { return name; } - return files.begin()->second->GetName(); + return concatenation_map.front().file->GetName(); } std::size_t ConcatenatedVfsFile::GetSize() const { - if (files.empty()) { + if (concatenation_map.empty()) { return 0; } - return files.rbegin()->first + files.rbegin()->second->GetSize(); + return concatenation_map.back().offset + concatenation_map.back().file->GetSize(); } bool ConcatenatedVfsFile::Resize(std::size_t new_size) { @@ -95,10 +116,10 @@ bool ConcatenatedVfsFile::Resize(std::size_t new_size) { } VirtualDir ConcatenatedVfsFile::GetContainingDirectory() const { - if (files.empty()) { + if (concatenation_map.empty()) { return nullptr; } - return files.begin()->second->GetContainingDirectory(); + return concatenation_map.front().file->GetContainingDirectory(); } bool ConcatenatedVfsFile::IsWritable() const { @@ -110,25 +131,45 @@ bool ConcatenatedVfsFile::IsReadable() const { } std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const { - auto entry = --files.end(); - for (auto iter = files.begin(); iter != files.end(); ++iter) { - if (iter->first > offset) { - entry = --iter; + const ConcatenationEntry key{ + .offset = offset, + .file = nullptr, + }; + + // Read nothing if the map is empty. + if (concatenation_map.empty()) { + return 0; + } + + // Binary search to find the iterator to the first position we can check. + // It must exist, since we are not empty and are comparing unsigned integers. + auto it = std::prev(std::upper_bound(concatenation_map.begin(), concatenation_map.end(), key)); + u64 cur_length = length; + u64 cur_offset = offset; + + while (cur_length > 0 && it != concatenation_map.end()) { + // Check if we can read the file at this position. + const auto& file = it->file; + const u64 file_offset = it->offset; + const u64 file_size = file->GetSize(); + + if (cur_offset >= file_offset + file_size) { + // Entirely out of bounds read. break; } - } - if (entry->first + entry->second->GetSize() <= offset) - return 0; + // Read the file at this position. + const u64 intended_read_size = std::min<u64>(cur_length, file_size); + const u64 actual_read_size = + file->Read(data + (cur_offset - offset), intended_read_size, cur_offset - file_offset); - const auto read_in = - std::min<u64>(entry->first + entry->second->GetSize() - offset, entry->second->GetSize()); - if (length > read_in) { - return entry->second->Read(data, read_in, offset - entry->first) + - Read(data + read_in, length - read_in, offset + read_in); + // Update tracking. + cur_offset += actual_read_size; + cur_length -= actual_read_size; + it++; } - return entry->second->Read(data, std::min<u64>(read_in, length), offset - entry->first); + return cur_offset - offset; } std::size_t ConcatenatedVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) { diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index 9be0261b6..6b329d545 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -3,6 +3,7 @@ #pragma once +#include <compare> #include <map> #include <memory> #include "core/file_sys/vfs.h" @@ -12,19 +13,33 @@ namespace FileSys { // Class that wraps multiple vfs files and concatenates them, making reads seamless. Currently // read-only. class ConcatenatedVfsFile : public VfsFile { - explicit ConcatenatedVfsFile(std::vector<VirtualFile> files, std::string name_); - explicit ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files, std::string name_); +private: + struct ConcatenationEntry { + u64 offset; + VirtualFile file; + + auto operator<=>(const ConcatenationEntry& other) const { + return this->offset <=> other.offset; + } + }; + using ConcatenationMap = std::vector<ConcatenationEntry>; + + explicit ConcatenatedVfsFile(std::vector<ConcatenationEntry>&& concatenation_map, + std::string&& name); + bool VerifyContinuity() const; public: ~ConcatenatedVfsFile() override; /// Wrapper function to allow for more efficient handling of files.size() == 0, 1 cases. - static VirtualFile MakeConcatenatedFile(std::vector<VirtualFile> files, std::string name); + static VirtualFile MakeConcatenatedFile(const std::vector<VirtualFile>& files, + std::string&& name); /// Convenience function that turns a map of offsets to files into a concatenated file, filling /// gaps with a given filler byte. - static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::multimap<u64, VirtualFile> files, - std::string name); + static VirtualFile MakeConcatenatedFile(u8 filler_byte, + const std::multimap<u64, VirtualFile>& files, + std::string&& name); std::string GetName() const override; std::size_t GetSize() const override; @@ -37,8 +52,7 @@ public: bool Rename(std::string_view new_name) override; private: - // Maps starting offset to file -- more efficient. - std::multimap<u64, VirtualFile> files; + ConcatenationMap concatenation_map; std::string name; }; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 40db243d2..4b4f7061b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache_base.h" +#include "video_core/control/channel_state_cache.inc" namespace VideoCommon { @@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12 MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); +template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>; + } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 65494097b..c336be707 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -64,17 +64,22 @@ void BufferCache<P>::RunGarbageCollector() { template <class P> void BufferCache<P>::TickFrame() { // Calculate hits and shots and move hit bits to the right - const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); - const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); - std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, - uniform_cache_hits.begin() + 1); - std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, - uniform_cache_shots.begin() + 1); - uniform_cache_hits[0] = 0; - uniform_cache_shots[0] = 0; + + const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.end()); + const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.end()); + std::copy_n(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.size() - 1, + channel_state->uniform_cache_hits.begin() + 1); + std::copy_n(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.size() - 1, + channel_state->uniform_cache_shots.begin() + 1); + channel_state->uniform_cache_hits[0] = 0; + channel_state->uniform_cache_shots[0] = 0; const bool skip_preferred = hits * 256 < shots * 251; - uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; // If we can obtain the memory info, use it instead of the estimate. if (runtime.CanReportMemoryUsage()) { @@ -164,10 +169,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am BufferId buffer_a; BufferId buffer_b; do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); @@ -272,30 +277,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr .size = size, .buffer_id = BufferId{}, }; - uniform_buffers[stage][index] = binding; + channel_state->uniform_buffers[stage][index] = binding; } template <class P> void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { - uniform_buffers[stage][index] = NULL_BINDING; + channel_state->uniform_buffers[stage][index] = NULL_BINDING; } template <class P> void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateGraphicsBuffers(is_indexed); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> void BufferCache<P>::UpdateComputeBuffers() { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateComputeBuffers(); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -338,98 +343,102 @@ template <class P> void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffer_masks != mask) { + if (channel_state->enabled_uniform_buffer_masks != mask) { if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers.fill(0); + channel_state->fast_bound_uniform_buffers.fill(0); } - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } } - enabled_uniform_buffer_masks = mask; - uniform_buffer_sizes = sizes; + channel_state->enabled_uniform_buffer_masks = mask; + channel_state->uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes) { - enabled_compute_uniform_buffer_mask = mask; - compute_uniform_buffer_sizes = sizes; + channel_state->enabled_compute_uniform_buffer_mask = mask; + channel_state->compute_uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { - enabled_storage_buffers[stage] = 0; - written_storage_buffers[stage] = 0; + channel_state->enabled_storage_buffers[stage] = 0; + channel_state->written_storage_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_storage_buffers[stage] |= 1U << ssbo_index; - written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index; + channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; const auto& cbufs = maxwell3d->state.shader_stages[stage]; const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; - storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->storage_buffers[stage][ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { - enabled_texture_buffers[stage] = 0; - written_texture_buffers[stage] = 0; - image_texture_buffers[stage] = 0; + channel_state->enabled_texture_buffers[stage] = 0; + channel_state->written_texture_buffers[stage] = 0; + channel_state->image_texture_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_texture_buffers[stage] |= 1U << tbo_index; - written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index; + channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; } - texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->texture_buffers[stage][tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> void BufferCache<P>::UnbindComputeStorageBuffers() { - enabled_compute_storage_buffers = 0; - written_compute_storage_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_storage_buffers = 0; + channel_state->written_compute_storage_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_compute_storage_buffers |= 1U << ssbo_index; - written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index; + channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; const auto& launch_desc = kepler_compute->launch_description; ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); const auto& cbufs = launch_desc.const_buffer_config; const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; - compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->compute_storage_buffers[ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindComputeTextureBuffers() { - enabled_compute_texture_buffers = 0; - written_compute_texture_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_texture_buffers = 0; + channel_state->written_compute_texture_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_compute_texture_buffers |= 1U << tbo_index; - written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_compute_texture_buffers |= 1U << tbo_index; + channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; } - compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->compute_texture_buffers[tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> @@ -672,10 +681,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { - Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer, index_buffer.buffer_id); - const u32 offset = buffer.Offset(index_buffer.cpu_addr); - const u32 size = index_buffer.size; + Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; + TouchBuffer(buffer, channel_state->index_buffer.buffer_id); + const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); + const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if constexpr (USE_MEMORY_MAPS) { @@ -689,7 +698,7 @@ void BufferCache<P>::BindHostIndexBuffer() { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } } else { - SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); + SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); } if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { const u32 new_offset = @@ -706,7 +715,7 @@ template <class P> void BufferCache<P>::BindHostVertexBuffers() { auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = vertex_buffers[index]; + const Binding& binding = channel_state->vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); @@ -729,19 +738,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); }; if (current_draw_indirect->include_count) { - bind_buffer(count_buffer_binding); + bind_buffer(channel_state->count_buffer_binding); } - bind_buffer(indirect_buffer_binding); + bind_buffer(channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { u32 dirty = ~0U; if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty = std::exchange(dirty_uniform_buffers[stage], 0); + dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -753,13 +762,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind) { - const Binding& binding = uniform_buffers[stage][index]; + const Binding& binding = channel_state->uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); + const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && - size <= uniform_buffer_skip_cache_size && + size <= channel_state->uniform_buffer_skip_cache_size && !memory_tracker.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { @@ -767,11 +776,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia const bool should_fast_bind = !HasFastUniformBufferBound(stage, binding_index) || - uniform_buffer_binding_sizes[stage][binding_index] != size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -780,8 +789,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } } if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -791,15 +800,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Classic cached path const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); if (sync_cached) { - ++uniform_cache_hits[0]; + ++channel_state->uniform_cache_hits[0]; } - ++uniform_cache_shots[0]; + ++channel_state->uniform_cache_shots[0]; // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens needs_bind |= HasFastUniformBufferBound(stage, binding_index); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; } if (!needs_bind) { return; @@ -807,14 +816,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const u32 offset = buffer.Offset(cpu_addr); if constexpr (IS_OPENGL) { // Fast buffer will be unbound - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); // Mark the index as dirty if offset doesn't match const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); - dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); @@ -826,15 +835,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 template <class P> void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { u32 binding_index = 0; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { - const Binding& binding = storage_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { + const Binding& binding = channel_state->storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; + const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); ++binding_index; @@ -846,8 +855,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - const TextureBufferBinding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -855,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -872,7 +881,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { return; } for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { - const Binding& binding = transform_feedback_buffers[index]; + const Binding& binding = channel_state->transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; @@ -887,15 +896,16 @@ template <class P> void BufferCache<P>::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty - dirty_uniform_buffers.fill(~u32{0}); - fast_bound_uniform_buffers.fill(0); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - const Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + const Binding& binding = channel_state->compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); - const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); + const u32 size = + std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -911,15 +921,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { template <class P> void BufferCache<P>::BindHostComputeStorageBuffers() { u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { - const Binding& binding = compute_storage_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { + const Binding& binding = channel_state->compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; + const bool is_written = + ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); ++binding_index; @@ -931,8 +942,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { template <class P> void BufferCache<P>::BindHostComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - const TextureBufferBinding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -940,7 +951,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -954,7 +965,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; if (is_indexed) { UpdateIndexBuffer(); } @@ -968,7 +979,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (current_draw_indirect) { UpdateDrawIndirect(); } - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -999,7 +1010,7 @@ void BufferCache<P>::UpdateIndexBuffer() { slot_buffers.erase(inline_buffer_id); inline_buffer_id = CreateBuffer(0, buffer_size); } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = 0, .size = inline_index_size, .buffer_id = inline_buffer_id, @@ -1015,10 +1026,10 @@ void BufferCache<P>::UpdateIndexBuffer() { (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); const u32 size = std::min(address_size, draw_size); if (size == 0 || !cpu_addr) { - index_buffer = NULL_BINDING; + channel_state->index_buffer = NULL_BINDING; return; } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1051,13 +1062,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); u32 size = address_size; // TODO: Analyze stride and number of vertices if (array.enable == 0 || size == 0 || !cpu_addr) { - vertex_buffers[index] = NULL_BINDING; + channel_state->vertex_buffers[index] = NULL_BINDING; return; } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); } - vertex_buffers[index] = Binding{ + channel_state->vertex_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1079,23 +1090,24 @@ void BufferCache<P>::UpdateDrawIndirect() { }; }; if (current_draw_indirect->include_count) { - update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); + update(current_draw_indirect->count_start_address, sizeof(u32), + channel_state->count_buffer_binding); } update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, - indirect_buffer_binding); + channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { - Binding& binding = uniform_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { + Binding& binding = channel_state->uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated return; } // Mark as dirty if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers[stage] |= 1U << index; + channel_state->dirty_uniform_buffers[stage] |= 1U << index; } // Resolve buffer binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); @@ -1104,10 +1116,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateStorageBuffers(size_t stage) { - const u32 written_mask = written_storage_buffers[stage]; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { + const u32 written_mask = channel_state->written_storage_buffers[stage]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { // Resolve buffer - Binding& binding = storage_buffers[stage][index]; + Binding& binding = channel_state->storage_buffers[stage][index]; const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); binding.buffer_id = buffer_id; // Mark buffer as written if needed @@ -1119,11 +1131,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - Binding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = channel_state->texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark buffer as written if needed - if (((written_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1146,11 +1158,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { const u32 size = binding.size; const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (binding.enable == 0 || size == 0 || !cpu_addr) { - transform_feedback_buffers[index] = NULL_BINDING; + channel_state->transform_feedback_buffers[index] = NULL_BINDING; return; } const BufferId buffer_id = FindBuffer(*cpu_addr, size); - transform_feedback_buffers[index] = Binding{ + channel_state->transform_feedback_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = buffer_id, @@ -1160,8 +1172,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + Binding& binding = channel_state->compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute->launch_description; if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { @@ -1178,12 +1190,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { template <class P> void BufferCache<P>::UpdateComputeStorageBuffers() { - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer - Binding& binding = compute_storage_buffers[index]; + Binding& binding = channel_state->compute_storage_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_storage_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1191,11 +1203,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { template <class P> void BufferCache<P>::UpdateComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - Binding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = channel_state->compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1610,13 +1622,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span<Binding> bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(index_buffer); - replace(vertex_buffers); - std::ranges::for_each(uniform_buffers, replace); - std::ranges::for_each(storage_buffers, replace); - replace(transform_feedback_buffers); - replace(compute_uniform_buffers); - replace(compute_storage_buffers); + scalar_replace(channel_state->index_buffer); + replace(channel_state->vertex_buffers); + std::ranges::for_each(channel_state->uniform_buffers, replace); + std::ranges::for_each(channel_state->storage_buffers, replace); + replace(channel_state->transform_feedback_buffers); + replace(channel_state->compute_uniform_buffers); + replace(channel_state->compute_storage_buffers); // Mark the whole buffer as CPU written to stop tracking CPU writes if (!do_not_mark) { @@ -1634,8 +1646,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d->dirty.flags; flags[Dirty::IndexBuffer] = true; @@ -1643,13 +1655,12 @@ void BufferCache<P>::NotifyBufferDeletion() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { flags[Dirty::VertexBuffer0 + index] = true; } - has_deleted_buffers = true; + channel_state->has_deleted_buffers = true; } template <class P> -typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, - u32 cbuf_index, - bool is_written) const { +Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, + bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const auto size = [&]() { const bool is_nvn_cbuf = cbuf_index == 0; @@ -1681,8 +1692,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s } template <class P> -typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( - GPUVAddr gpu_addr, u32 size, PixelFormat format) { +TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format) { const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); TextureBufferBinding binding; if (!cpu_addr || size == 0) { @@ -1721,7 +1732,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { template <class P> bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { if constexpr (IS_OPENGL) { - return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; + return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; } else { // Only OpenGL has fast uniform buffers return false; @@ -1730,14 +1741,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { - auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); } template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { - auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); } } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index ac00d4d9d..c689fe06b 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 { MarkQuery = 3, }; -template <typename P> -class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { +static constexpr BufferId NULL_BUFFER_ID{0}; +static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); + +struct Binding { + VAddr cpu_addr{}; + u32 size{}; + BufferId buffer_id; +}; + +struct TextureBufferBinding : Binding { + PixelFormat format; +}; + +static constexpr Binding NULL_BINDING{ + .cpu_addr = 0, + .size = 0, + .buffer_id = NULL_BUFFER_ID, +}; + +class BufferCacheChannelInfo : public ChannelInfo { +public: + BufferCacheChannelInfo() = delete; + BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {} + BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete; + BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete; + + Binding index_buffer; + std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; + std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; + std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; + std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; + Binding count_buffer_binding; + Binding indirect_buffer_binding; + + std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; + std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; + + std::array<u32, NUM_STAGES> enabled_storage_buffers{}; + std::array<u32, NUM_STAGES> written_storage_buffers{}; + u32 enabled_compute_storage_buffers = 0; + u32 written_compute_storage_buffers = 0; + + std::array<u32, NUM_STAGES> enabled_texture_buffers{}; + std::array<u32, NUM_STAGES> written_texture_buffers{}; + std::array<u32, NUM_STAGES> image_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; + + std::array<u32, 16> uniform_cache_hits{}; + std::array<u32, 16> uniform_cache_shots{}; + + u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; + + bool has_deleted_buffers = false; + + std::array<u32, NUM_STAGES> dirty_uniform_buffers{}; + std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> + uniform_buffer_binding_sizes{}; +}; + +template <class P> +class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. static constexpr u32 CACHING_PAGEBITS = 16; @@ -104,8 +174,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; - static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; static constexpr s64 TARGET_THRESHOLD = 4_GiB; @@ -149,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI using OverlapSection = boost::icl::inter_section<int>; using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; - struct Empty {}; - struct OverlapResult { std::vector<BufferId> ids; VAddr begin; @@ -158,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI bool has_stream_leap = false; }; - struct Binding { - VAddr cpu_addr{}; - u32 size{}; - BufferId buffer_id; - }; - - struct TextureBufferBinding : Binding { - PixelFormat format; - }; - - static constexpr Binding NULL_BINDING{ - .cpu_addr = 0, - .size = 0, - .buffer_id = NULL_BUFFER_ID, - }; - public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_); @@ -496,51 +544,6 @@ private: u32 last_index_count = 0; - Binding index_buffer; - std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; - std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; - std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; - std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; - std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; - Binding count_buffer_binding; - Binding indirect_buffer_binding; - - std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; - std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; - std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; - - std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; - u32 enabled_compute_uniform_buffer_mask = 0; - - const UniformBufferSizes* uniform_buffer_sizes{}; - const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; - - std::array<u32, NUM_STAGES> enabled_storage_buffers{}; - std::array<u32, NUM_STAGES> written_storage_buffers{}; - u32 enabled_compute_storage_buffers = 0; - u32 written_compute_storage_buffers = 0; - - std::array<u32, NUM_STAGES> enabled_texture_buffers{}; - std::array<u32, NUM_STAGES> written_texture_buffers{}; - std::array<u32, NUM_STAGES> image_texture_buffers{}; - u32 enabled_compute_texture_buffers = 0; - u32 written_compute_texture_buffers = 0; - u32 image_compute_texture_buffers = 0; - - std::array<u32, 16> uniform_cache_hits{}; - std::array<u32, 16> uniform_cache_shots{}; - - u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; - - bool has_deleted_buffers = false; - - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> - dirty_uniform_buffers{}; - std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, - std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> - uniform_buffer_binding_sizes{}; - MemoryTracker memory_tracker; IntervalSet uncommitted_ranges; IntervalSet common_ranges; diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 3e9022dce..cd6a3a9b8 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "video_core/host1x/codecs/codec.h" #include "video_core/host1x/codecs/h264.h" @@ -14,6 +15,8 @@ #include "video_core/memory_manager.h" extern "C" { +#include <libavfilter/buffersink.h> +#include <libavfilter/buffersrc.h> #include <libavutil/opt.h> #ifdef LIBVA_FOUND // for querying VAAPI driver information @@ -85,6 +88,10 @@ Codec::~Codec() { // Free libav memory avcodec_free_context(&av_codec_ctx); av_buffer_unref(&av_gpu_decoder); + + if (filters_initialized) { + avfilter_graph_free(&av_filter_graph); + } } bool Codec::CreateGpuAvDevice() { @@ -167,6 +174,62 @@ void Codec::InitializeGpuDecoder() { av_codec_ctx->get_format = GetGpuFormat; } +void Codec::InitializeAvFilters(AVFrame* frame) { + const AVFilter* buffer_src = avfilter_get_by_name("buffer"); + const AVFilter* buffer_sink = avfilter_get_by_name("buffersink"); + AVFilterInOut* inputs = avfilter_inout_alloc(); + AVFilterInOut* outputs = avfilter_inout_alloc(); + SCOPE_EXIT({ + avfilter_inout_free(&inputs); + avfilter_inout_free(&outputs); + }); + + // Don't know how to get the accurate time_base but it doesn't matter for yadif filter + // so just use 1/1 to make buffer filter happy + std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width, + frame->height, frame->format); + + av_filter_graph = avfilter_graph_alloc(); + int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(), + nullptr, av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret); + return; + } + + ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr, + av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret); + return; + } + + inputs->name = av_strdup("out"); + inputs->filter_ctx = av_filter_sink_ctx; + inputs->pad_idx = 0; + inputs->next = nullptr; + + outputs->name = av_strdup("in"); + outputs->filter_ctx = av_filter_src_ctx; + outputs->pad_idx = 0; + outputs->next = nullptr; + + const char* description = "yadif=1:-1:0"; + ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret); + return; + } + + ret = avfilter_graph_config(av_filter_graph, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret); + return; + } + + filters_initialized = true; +} + void Codec::Initialize() { const AVCodecID codec = [&] { switch (current_codec) { @@ -271,8 +334,34 @@ void Codec::Decode() { UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(std::move(final_frame)); - if (av_frames.size() > 10) { + if (!final_frame->interlaced_frame) { + av_frames.push(std::move(final_frame)); + } else { + if (!filters_initialized) { + InitializeAvFilters(final_frame.get()); + } + if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(), + AV_BUFFERSRC_FLAG_KEEP_REF); + ret) { + LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret); + return; + } + while (true) { + auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + + int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); + + if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) + break; + if (ret < 0) { + LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret); + return; + } + + av_frames.push(std::move(filter_frame)); + } + } + while (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); } diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 0d45fb7fe..06fe00a4b 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -15,6 +15,7 @@ extern "C" { #pragma GCC diagnostic ignored "-Wconversion" #endif #include <libavcodec/avcodec.h> +#include <libavfilter/avfilter.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -61,17 +62,24 @@ public: private: void InitializeAvCodecContext(); + void InitializeAvFilters(AVFrame* frame); + void InitializeGpuDecoder(); bool CreateGpuAvDevice(); bool initialized{}; + bool filters_initialized{}; Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; const AVCodec* av_codec{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; AVBufferRef* av_gpu_decoder{nullptr}; + AVFilterContext* av_filter_src_ctx{nullptr}; + AVFilterContext* av_filter_sink_ctx{nullptr}; + AVFilterGraph* av_filter_graph{nullptr}; + Host1x::Host1x& host1x; const Host1x::NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6af4ae793..6d3bda192 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -117,7 +117,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); - glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, + glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1e0823836..56d0ff869 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,6 +439,11 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } +[[nodiscard]] bool IsAstcRecompressionEnabled() { + return Settings::values.astc_recompression.GetValue() != + Settings::AstcRecompression::Uncompressed; +} + [[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) { switch (Settings::values.astc_recompression.GetValue()) { case Settings::AstcRecompression::Bc1: @@ -760,7 +765,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { gl_internal_format = SelectAstcFormat(info.format, is_srgb); gl_format = GL_NONE; } @@ -1155,7 +1160,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI const bool is_srgb = IsPixelFormatSRGB(info.format); internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { internal_format = SelectAstcFormat(info.format, is_srgb); } } else { diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 2d7b9ab65..84d9ca796 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -378,11 +378,7 @@ if(UNIX AND NOT APPLE) endif() if (WIN32 AND QT_VERSION VERSION_GREATER_EQUAL 6) - if (MSVC AND NOT ${CMAKE_GENERATOR} STREQUAL "Ninja") - set(YUZU_EXE_DIR "${CMAKE_BINARY_DIR}/bin/$<CONFIG>") - else() - set(YUZU_EXE_DIR "${CMAKE_BINARY_DIR}/bin") - endif() + set(YUZU_EXE_DIR "$<TARGET_FILE_DIR:yuzu>") add_custom_command(TARGET yuzu POST_BUILD COMMAND ${WINDEPLOYQT_EXECUTABLE} "${YUZU_EXE_DIR}/yuzu.exe" --dir "${YUZU_EXE_DIR}" --libdir "${YUZU_EXE_DIR}" --plugindir "${YUZU_EXE_DIR}/plugins" --no-compiler-runtime --no-opengl-sw --no-system-d3d-compiler --no-translations --verbose 0) endif() diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index c21828b1d..465084fea 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -544,6 +544,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); + QAction* remove_cache_storage = remove_menu->addAction(tr("Remove Cache Storage")); QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); remove_menu->addSeparator(); @@ -614,6 +615,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); }); + connect(remove_cache_storage, &QAction::triggered, [this, program_id, path] { + emit RemoveFileRequested(program_id, GameListRemoveTarget::CacheStorage, path); + }); connect(dump_romfs, &QAction::triggered, [this, program_id, path]() { emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal); }); diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 64e5af4c1..6c2f75e53 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -45,6 +45,7 @@ enum class GameListRemoveTarget { VkShaderCache, AllShaderCache, CustomConfiguration, + CacheStorage, }; enum class DumpRomFSTarget { diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4489f43af..25cfef6d5 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2323,6 +2323,8 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ return tr("Delete All Transferable Shader Caches?"); case GameListRemoveTarget::CustomConfiguration: return tr("Remove Custom Game Configuration?"); + case GameListRemoveTarget::CacheStorage: + return tr("Remove Cache Storage?"); default: return QString{}; } @@ -2346,6 +2348,9 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ case GameListRemoveTarget::CustomConfiguration: RemoveCustomConfiguration(program_id, game_path); break; + case GameListRemoveTarget::CacheStorage: + RemoveCacheStorage(program_id); + break; } } @@ -2435,6 +2440,21 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g } } +void GMainWindow::RemoveCacheStorage(u64 program_id) { + const auto nand_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir); + auto vfs_nand_dir = + vfs->OpenDirectory(Common::FS::PathToUTF8String(nand_dir), FileSys::Mode::Read); + + const auto cache_storage_path = FileSys::SaveDataFactory::GetFullPath( + *system, vfs_nand_dir, FileSys::SaveDataSpaceId::NandUser, + FileSys::SaveDataType::CacheStorage, 0 /* program_id */, {}, 0); + + const auto path = Common::FS::ConcatPathSafe(nand_dir, cache_storage_path); + + // Not an error if it wasn't cleared. + Common::FS::RemoveDirRecursively(path); +} + void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target) { const auto failed = [this] { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 17631a2d9..6bb70972f 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -370,6 +370,7 @@ private: void RemoveVulkanDriverPipelineCache(u64 program_id); void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); + void RemoveCacheStorage(u64 program_id); std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); InstallResult InstallNSPXCI(const QString& filename); InstallResult InstallNCA(const QString& filename); |