diff options
Diffstat (limited to '')
40 files changed, 2081 insertions, 370 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 97fbdcbf9..7534eb8f1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -141,6 +141,7 @@ add_library(common STATIC logging/log.h logging/text_formatter.cpp logging/text_formatter.h + logging/types.h lz4_compression.cpp lz4_compression.h math_util.h diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp index 9f3de1cb0..710e88b39 100644 --- a/src/common/fs/file.cpp +++ b/src/common/fs/file.cpp @@ -183,10 +183,6 @@ size_t WriteStringToFile(const std::filesystem::path& path, FileType type, size_t AppendStringToFile(const std::filesystem::path& path, FileType type, std::string_view string) { - if (!Exists(path)) { - return WriteStringToFile(path, type, string); - } - if (!IsFile(path)) { return 0; } @@ -309,7 +305,11 @@ bool IOFile::Flush() const { errno = 0; - const auto flush_result = std::fflush(file) == 0; +#ifdef _WIN32 + const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; +#else + const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; +#endif if (!flush_result) { const auto ec = std::error_code{errno, std::generic_category()}; diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 50e270c5b..0f10b6003 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h @@ -71,7 +71,7 @@ template <typename Path> /** * Writes a string to a file at path and returns the number of characters successfully written. - * If an file already exists at path, its contents will be erased. + * If a file already exists at path, its contents will be erased. * If the filesystem object at path is not a file, this function returns 0. * * @param path Filesystem path @@ -95,7 +95,6 @@ template <typename Path> /** * Appends a string to a file at path and returns the number of characters successfully written. - * If a file does not exist at path, WriteStringToFile is called instead. * If the filesystem object at path is not a file, this function returns 0. * * @param path Filesystem path diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp index d492480d9..d3159e908 100644 --- a/src/common/fs/fs.cpp +++ b/src/common/fs/fs.cpp @@ -321,7 +321,8 @@ bool RemoveDirContentsRecursively(const fs::path& path) { std::error_code ec; - for (const auto& entry : fs::recursive_directory_iterator(path, ec)) { + // TODO (Morph): Replace this with recursive_directory_iterator once it's fixed in MSVC. + for (const auto& entry : fs::directory_iterator(path, ec)) { if (ec) { LOG_ERROR(Common_Filesystem, "Failed to completely enumerate the directory at path={}, ec_message={}", @@ -337,6 +338,12 @@ bool RemoveDirContentsRecursively(const fs::path& path) { PathToUTF8String(entry.path()), ec.message()); break; } + + // TODO (Morph): Remove this when MSVC fixes recursive_directory_iterator. + // recursive_directory_iterator throws an exception despite passing in a std::error_code. + if (entry.status().type() == fs::file_type::directory) { + return RemoveDirContentsRecursively(entry.path()); + } } if (ec) { @@ -475,7 +482,8 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, std::error_code ec; - for (const auto& entry : fs::recursive_directory_iterator(path, ec)) { + // TODO (Morph): Replace this with recursive_directory_iterator once it's fixed in MSVC. + for (const auto& entry : fs::directory_iterator(path, ec)) { if (ec) { break; } @@ -495,6 +503,12 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, break; } } + + // TODO (Morph): Remove this when MSVC fixes recursive_directory_iterator. + // recursive_directory_iterator throws an exception despite passing in a std::error_code. + if (entry.status().type() == fs::file_type::directory) { + IterateDirEntriesRecursively(entry.path(), callback, filter); + } } if (callback_error || ec) { diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 6aa8ac960..d5cff400f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -17,6 +17,7 @@ #endif #include "common/assert.h" +#include "common/fs/file.h" #include "common/fs/fs.h" #include "common/logging/backend.h" #include "common/logging/log.h" @@ -140,10 +141,14 @@ private: std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; }; +ConsoleBackend::~ConsoleBackend() = default; + void ConsoleBackend::Write(const Entry& entry) { PrintMessage(entry); } +ColorConsoleBackend::~ColorConsoleBackend() = default; + void ColorConsoleBackend::Write(const Entry& entry) { PrintColoredMessage(entry); } @@ -157,16 +162,19 @@ FileBackend::FileBackend(const std::filesystem::path& filename) { void(FS::RemoveFile(old_filename)); void(FS::RenameFile(filename, old_filename)); - file = FS::IOFile(filename, FS::FileAccessMode::Write, FS::FileType::TextFile); + file = + std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write, FS::FileType::TextFile); } +FileBackend::~FileBackend() = default; + void FileBackend::Write(const Entry& entry) { // prevent logs from going over the maximum size (in case its spamming and the user doesn't // know) constexpr std::size_t MAX_BYTES_WRITTEN = 100 * 1024 * 1024; constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1024 * 1024 * 1024; - if (!file.IsOpen()) { + if (!file->IsOpen()) { return; } @@ -176,147 +184,20 @@ void FileBackend::Write(const Entry& entry) { return; } - bytes_written += file.WriteString(FormatLogMessage(entry).append(1, '\n')); + bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n')); if (entry.log_level >= Level::Error) { - void(file.Flush()); + void(file->Flush()); } } +DebuggerBackend::~DebuggerBackend() = default; + void DebuggerBackend::Write(const Entry& entry) { #ifdef _WIN32 ::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str()); #endif } -/// Macro listing all log classes. Code should define CLS and SUB as desired before invoking this. -#define ALL_LOG_CLASSES() \ - CLS(Log) \ - CLS(Common) \ - SUB(Common, Filesystem) \ - SUB(Common, Memory) \ - CLS(Core) \ - SUB(Core, ARM) \ - SUB(Core, Timing) \ - CLS(Config) \ - CLS(Debug) \ - SUB(Debug, Emulated) \ - SUB(Debug, GPU) \ - SUB(Debug, Breakpoint) \ - SUB(Debug, GDBStub) \ - CLS(Kernel) \ - SUB(Kernel, SVC) \ - CLS(Service) \ - SUB(Service, ACC) \ - SUB(Service, Audio) \ - SUB(Service, AM) \ - SUB(Service, AOC) \ - SUB(Service, APM) \ - SUB(Service, ARP) \ - SUB(Service, BCAT) \ - SUB(Service, BPC) \ - SUB(Service, BGTC) \ - SUB(Service, BTDRV) \ - SUB(Service, BTM) \ - SUB(Service, Capture) \ - SUB(Service, ERPT) \ - SUB(Service, ETicket) \ - SUB(Service, EUPLD) \ - SUB(Service, Fatal) \ - SUB(Service, FGM) \ - SUB(Service, Friend) \ - SUB(Service, FS) \ - SUB(Service, GRC) \ - SUB(Service, HID) \ - SUB(Service, IRS) \ - SUB(Service, LBL) \ - SUB(Service, LDN) \ - SUB(Service, LDR) \ - SUB(Service, LM) \ - SUB(Service, Migration) \ - SUB(Service, Mii) \ - SUB(Service, MM) \ - SUB(Service, NCM) \ - SUB(Service, NFC) \ - SUB(Service, NFP) \ - SUB(Service, NIFM) \ - SUB(Service, NIM) \ - SUB(Service, NPNS) \ - SUB(Service, NS) \ - SUB(Service, NVDRV) \ - SUB(Service, OLSC) \ - SUB(Service, PCIE) \ - SUB(Service, PCTL) \ - SUB(Service, PCV) \ - SUB(Service, PM) \ - SUB(Service, PREPO) \ - SUB(Service, PSC) \ - SUB(Service, PSM) \ - SUB(Service, SET) \ - SUB(Service, SM) \ - SUB(Service, SPL) \ - SUB(Service, SSL) \ - SUB(Service, TCAP) \ - SUB(Service, Time) \ - SUB(Service, USB) \ - SUB(Service, VI) \ - SUB(Service, WLAN) \ - CLS(HW) \ - SUB(HW, Memory) \ - SUB(HW, LCD) \ - SUB(HW, GPU) \ - SUB(HW, AES) \ - CLS(IPC) \ - CLS(Frontend) \ - CLS(Render) \ - SUB(Render, Software) \ - SUB(Render, OpenGL) \ - SUB(Render, Vulkan) \ - CLS(Audio) \ - SUB(Audio, DSP) \ - SUB(Audio, Sink) \ - CLS(Input) \ - CLS(Network) \ - CLS(Loader) \ - CLS(CheatEngine) \ - CLS(Crypto) \ - CLS(WebService) - -// GetClassName is a macro defined by Windows.h, grrr... -const char* GetLogClassName(Class log_class) { - switch (log_class) { -#define CLS(x) \ - case Class::x: \ - return #x; -#define SUB(x, y) \ - case Class::x##_##y: \ - return #x "." #y; - ALL_LOG_CLASSES() -#undef CLS -#undef SUB - case Class::Count: - break; - } - return "Invalid"; -} - -const char* GetLevelName(Level log_level) { -#define LVL(x) \ - case Level::x: \ - return #x - switch (log_level) { - LVL(Trace); - LVL(Debug); - LVL(Info); - LVL(Warning); - LVL(Error); - LVL(Critical); - case Level::Count: - break; - } -#undef LVL - return "Invalid"; -} - void SetGlobalFilter(const Filter& filter) { Impl::Instance().SetGlobalFilter(filter); } diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index eb629a33f..4b9a910c1 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -1,43 +1,32 @@ // Copyright 2014 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. + #pragma once -#include <chrono> #include <filesystem> #include <memory> #include <string> #include <string_view> -#include "common/fs/file.h" #include "common/logging/filter.h" #include "common/logging/log.h" +namespace Common::FS { +class IOFile; +} + namespace Common::Log { class Filter; /** - * A log entry. Log entries are store in a structured format to permit more varied output - * formatting on different frontends, as well as facilitating filtering and aggregation. - */ -struct Entry { - std::chrono::microseconds timestamp; - Class log_class{}; - Level log_level{}; - const char* filename = nullptr; - unsigned int line_num = 0; - std::string function; - std::string message; - bool final_entry = false; -}; - -/** * Interface for logging backends. As loggers can be created and removed at runtime, this can be * used by a frontend for adding a custom logging backend as needed */ class Backend { public: virtual ~Backend() = default; + virtual void SetFilter(const Filter& new_filter) { filter = new_filter; } @@ -53,6 +42,8 @@ private: */ class ConsoleBackend : public Backend { public: + ~ConsoleBackend() override; + static const char* Name() { return "console"; } @@ -67,6 +58,8 @@ public: */ class ColorConsoleBackend : public Backend { public: + ~ColorConsoleBackend() override; + static const char* Name() { return "color_console"; } @@ -83,6 +76,7 @@ public: class FileBackend : public Backend { public: explicit FileBackend(const std::filesystem::path& filename); + ~FileBackend() override; static const char* Name() { return "file"; @@ -95,7 +89,7 @@ public: void Write(const Entry& entry) override; private: - FS::IOFile file; + std::unique_ptr<FS::IOFile> file; std::size_t bytes_written = 0; }; @@ -104,6 +98,8 @@ private: */ class DebuggerBackend : public Backend { public: + ~DebuggerBackend() override; + static const char* Name() { return "debugger"; } @@ -120,17 +116,6 @@ void RemoveBackend(std::string_view backend_name); Backend* GetBackend(std::string_view backend_name); /** - * Returns the name of the passed log class as a C-string. Subclasses are separated by periods - * instead of underscores as in the enumeration. - */ -const char* GetLogClassName(Class log_class); - -/** - * Returns the name of the passed log level as a C-string. - */ -const char* GetLevelName(Level log_level); - -/** * The global filter will prevent any messages from even being processed if they are filtered. Each * backend can have a filter, but if the level is lower than the global filter, the backend will * never get the message diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 20a2dd106..4f2cc29e1 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <algorithm> -#include "common/logging/backend.h" #include "common/logging/filter.h" #include "common/string_util.h" @@ -22,7 +21,7 @@ Level GetLevelByName(const It begin, const It end) { template <typename It> Class GetClassByName(const It begin, const It end) { - for (ClassType i = 0; i < static_cast<ClassType>(Class::Count); ++i) { + for (u8 i = 0; i < static_cast<u8>(Class::Count); ++i) { const char* level_name = GetLogClassName(static_cast<Class>(i)); if (Common::ComparePartialString(begin, end, level_name)) { return static_cast<Class>(i); @@ -62,6 +61,135 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { } } // Anonymous namespace +/// Macro listing all log classes. Code should define CLS and SUB as desired before invoking this. +#define ALL_LOG_CLASSES() \ + CLS(Log) \ + CLS(Common) \ + SUB(Common, Filesystem) \ + SUB(Common, Memory) \ + CLS(Core) \ + SUB(Core, ARM) \ + SUB(Core, Timing) \ + CLS(Config) \ + CLS(Debug) \ + SUB(Debug, Emulated) \ + SUB(Debug, GPU) \ + SUB(Debug, Breakpoint) \ + SUB(Debug, GDBStub) \ + CLS(Kernel) \ + SUB(Kernel, SVC) \ + CLS(Service) \ + SUB(Service, ACC) \ + SUB(Service, Audio) \ + SUB(Service, AM) \ + SUB(Service, AOC) \ + SUB(Service, APM) \ + SUB(Service, ARP) \ + SUB(Service, BCAT) \ + SUB(Service, BPC) \ + SUB(Service, BGTC) \ + SUB(Service, BTDRV) \ + SUB(Service, BTM) \ + SUB(Service, Capture) \ + SUB(Service, ERPT) \ + SUB(Service, ETicket) \ + SUB(Service, EUPLD) \ + SUB(Service, Fatal) \ + SUB(Service, FGM) \ + SUB(Service, Friend) \ + SUB(Service, FS) \ + SUB(Service, GRC) \ + SUB(Service, HID) \ + SUB(Service, IRS) \ + SUB(Service, LBL) \ + SUB(Service, LDN) \ + SUB(Service, LDR) \ + SUB(Service, LM) \ + SUB(Service, Migration) \ + SUB(Service, Mii) \ + SUB(Service, MM) \ + SUB(Service, NCM) \ + SUB(Service, NFC) \ + SUB(Service, NFP) \ + SUB(Service, NIFM) \ + SUB(Service, NIM) \ + SUB(Service, NPNS) \ + SUB(Service, NS) \ + SUB(Service, NVDRV) \ + SUB(Service, OLSC) \ + SUB(Service, PCIE) \ + SUB(Service, PCTL) \ + SUB(Service, PCV) \ + SUB(Service, PM) \ + SUB(Service, PREPO) \ + SUB(Service, PSC) \ + SUB(Service, PSM) \ + SUB(Service, SET) \ + SUB(Service, SM) \ + SUB(Service, SPL) \ + SUB(Service, SSL) \ + SUB(Service, TCAP) \ + SUB(Service, Time) \ + SUB(Service, USB) \ + SUB(Service, VI) \ + SUB(Service, WLAN) \ + CLS(HW) \ + SUB(HW, Memory) \ + SUB(HW, LCD) \ + SUB(HW, GPU) \ + SUB(HW, AES) \ + CLS(IPC) \ + CLS(Frontend) \ + CLS(Render) \ + SUB(Render, Software) \ + SUB(Render, OpenGL) \ + SUB(Render, Vulkan) \ + CLS(Audio) \ + SUB(Audio, DSP) \ + SUB(Audio, Sink) \ + CLS(Input) \ + CLS(Network) \ + CLS(Loader) \ + CLS(CheatEngine) \ + CLS(Crypto) \ + CLS(WebService) + +// GetClassName is a macro defined by Windows.h, grrr... +const char* GetLogClassName(Class log_class) { + switch (log_class) { +#define CLS(x) \ + case Class::x: \ + return #x; +#define SUB(x, y) \ + case Class::x##_##y: \ + return #x "." #y; + ALL_LOG_CLASSES() +#undef CLS +#undef SUB + case Class::Count: + break; + } + return "Invalid"; +} + +const char* GetLevelName(Level log_level) { +#define LVL(x) \ + case Level::x: \ + return #x + switch (log_level) { + LVL(Trace); + LVL(Debug); + LVL(Info); + LVL(Warning); + LVL(Error); + LVL(Critical); + case Level::Count: + break; + } +#undef LVL + return "Invalid"; +} + Filter::Filter(Level default_level) { ResetAll(default_level); } diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h index f5673a9f6..1a3074e04 100644 --- a/src/common/logging/filter.h +++ b/src/common/logging/filter.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <chrono> #include <cstddef> #include <string_view> #include "common/logging/log.h" @@ -12,6 +13,17 @@ namespace Common::Log { /** + * Returns the name of the passed log class as a C-string. Subclasses are separated by periods + * instead of underscores as in the enumeration. + */ +const char* GetLogClassName(Class log_class); + +/** + * Returns the name of the passed log level as a C-string. + */ +const char* GetLevelName(Level log_level); + +/** * Implements a log message filter which allows different log classes to have different minimum * severity levels. The filter can be changed at runtime and can be parsed from a string to allow * editing via the interface or loading from a configuration file. diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 1f0f8db52..8d43eddc7 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -5,7 +5,7 @@ #pragma once #include <fmt/format.h> -#include "common/common_types.h" +#include "common/logging/types.h" namespace Common::Log { @@ -18,124 +18,6 @@ constexpr const char* TrimSourcePath(std::string_view source) { return source.data() + idx; } -/// Specifies the severity or level of detail of the log message. -enum class Level : u8 { - Trace, ///< Extremely detailed and repetitive debugging information that is likely to - ///< pollute logs. - Debug, ///< Less detailed debugging information. - Info, ///< Status information from important points during execution. - Warning, ///< Minor or potential problems found during execution of a task. - Error, ///< Major problems found during execution of a task that prevent it from being - ///< completed. - Critical, ///< Major problems during execution that threaten the stability of the entire - ///< application. - - Count ///< Total number of logging levels -}; - -typedef u8 ClassType; - -/** - * Specifies the sub-system that generated the log message. - * - * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in - * backend.cpp. - */ -enum class Class : ClassType { - Log, ///< Messages about the log system itself - Common, ///< Library routines - Common_Filesystem, ///< Filesystem interface library - Common_Memory, ///< Memory mapping and management functions - Core, ///< LLE emulation core - Core_ARM, ///< ARM CPU core - Core_Timing, ///< CoreTiming functions - Config, ///< Emulator configuration (including commandline) - Debug, ///< Debugging tools - Debug_Emulated, ///< Debug messages from the emulated programs - Debug_GPU, ///< GPU debugging tools - Debug_Breakpoint, ///< Logging breakpoints and watchpoints - Debug_GDBStub, ///< GDB Stub - Kernel, ///< The HLE implementation of the CTR kernel - Kernel_SVC, ///< Kernel system calls - Service, ///< HLE implementation of system services. Each major service - ///< should have its own subclass. - Service_ACC, ///< The ACC (Accounts) service - Service_AM, ///< The AM (Applet manager) service - Service_AOC, ///< The AOC (AddOn Content) service - Service_APM, ///< The APM (Performance) service - Service_ARP, ///< The ARP service - Service_Audio, ///< The Audio (Audio control) service - Service_BCAT, ///< The BCAT service - Service_BGTC, ///< The BGTC (Background Task Controller) service - Service_BPC, ///< The BPC service - Service_BTDRV, ///< The Bluetooth driver service - Service_BTM, ///< The BTM service - Service_Capture, ///< The capture service - Service_ERPT, ///< The error reporting service - Service_ETicket, ///< The ETicket service - Service_EUPLD, ///< The error upload service - Service_Fatal, ///< The Fatal service - Service_FGM, ///< The FGM service - Service_Friend, ///< The friend service - Service_FS, ///< The FS (Filesystem) service - Service_GRC, ///< The game recording service - Service_HID, ///< The HID (Human interface device) service - Service_IRS, ///< The IRS service - Service_LBL, ///< The LBL (LCD backlight) service - Service_LDN, ///< The LDN (Local domain network) service - Service_LDR, ///< The loader service - Service_LM, ///< The LM (Logger) service - Service_Migration, ///< The migration service - Service_Mii, ///< The Mii service - Service_MM, ///< The MM (Multimedia) service - Service_NCM, ///< The NCM service - Service_NFC, ///< The NFC (Near-field communication) service - Service_NFP, ///< The NFP service - Service_NIFM, ///< The NIFM (Network interface) service - Service_NIM, ///< The NIM service - Service_NPNS, ///< The NPNS service - Service_NS, ///< The NS services - Service_NVDRV, ///< The NVDRV (Nvidia driver) service - Service_OLSC, ///< The OLSC service - Service_PCIE, ///< The PCIe service - Service_PCTL, ///< The PCTL (Parental control) service - Service_PCV, ///< The PCV service - Service_PM, ///< The PM service - Service_PREPO, ///< The PREPO (Play report) service - Service_PSC, ///< The PSC service - Service_PSM, ///< The PSM service - Service_SET, ///< The SET (Settings) service - Service_SM, ///< The SM (Service manager) service - Service_SPL, ///< The SPL service - Service_SSL, ///< The SSL service - Service_TCAP, ///< The TCAP service. - Service_Time, ///< The time service - Service_USB, ///< The USB (Universal Serial Bus) service - Service_VI, ///< The VI (Video interface) service - Service_WLAN, ///< The WLAN (Wireless local area network) service - HW, ///< Low-level hardware emulation - HW_Memory, ///< Memory-map and address translation - HW_LCD, ///< LCD register emulation - HW_GPU, ///< GPU control emulation - HW_AES, ///< AES engine emulation - IPC, ///< IPC interface - Frontend, ///< Emulator UI - Render, ///< Emulator video output and hardware acceleration - Render_Software, ///< Software renderer backend - Render_OpenGL, ///< OpenGL backend - Render_Vulkan, ///< Vulkan backend - Audio, ///< Audio emulation - Audio_DSP, ///< The HLE implementation of the DSP - Audio_Sink, ///< Emulator audio output backend - Loader, ///< ROM loader - CheatEngine, ///< Memory manipulation and engine VM functions - Crypto, ///< Cryptographic engine/functions - Input, ///< Input emulation - Network, ///< Network emulation - WebService, ///< Interface to yuzu Web Services - Count ///< Total number of logging classes -}; - /// Logs a message to the global logger, using fmt void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, diff --git a/src/common/logging/text_formatter.cpp b/src/common/logging/text_formatter.cpp index 80ee2cca1..cfc0d5846 100644 --- a/src/common/logging/text_formatter.cpp +++ b/src/common/logging/text_formatter.cpp @@ -11,7 +11,7 @@ #include "common/assert.h" #include "common/common_funcs.h" -#include "common/logging/backend.h" +#include "common/logging/filter.h" #include "common/logging/log.h" #include "common/logging/text_formatter.h" #include "common/string_util.h" diff --git a/src/common/logging/types.h b/src/common/logging/types.h new file mode 100644 index 000000000..ee9a1ed84 --- /dev/null +++ b/src/common/logging/types.h @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> + +#include "common/common_types.h" + +namespace Common::Log { + +/// Specifies the severity or level of detail of the log message. +enum class Level : u8 { + Trace, ///< Extremely detailed and repetitive debugging information that is likely to + ///< pollute logs. + Debug, ///< Less detailed debugging information. + Info, ///< Status information from important points during execution. + Warning, ///< Minor or potential problems found during execution of a task. + Error, ///< Major problems found during execution of a task that prevent it from being + ///< completed. + Critical, ///< Major problems during execution that threaten the stability of the entire + ///< application. + + Count ///< Total number of logging levels +}; + +/** + * Specifies the sub-system that generated the log message. + * + * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in + * filter.cpp. + */ +enum class Class : u8 { + Log, ///< Messages about the log system itself + Common, ///< Library routines + Common_Filesystem, ///< Filesystem interface library + Common_Memory, ///< Memory mapping and management functions + Core, ///< LLE emulation core + Core_ARM, ///< ARM CPU core + Core_Timing, ///< CoreTiming functions + Config, ///< Emulator configuration (including commandline) + Debug, ///< Debugging tools + Debug_Emulated, ///< Debug messages from the emulated programs + Debug_GPU, ///< GPU debugging tools + Debug_Breakpoint, ///< Logging breakpoints and watchpoints + Debug_GDBStub, ///< GDB Stub + Kernel, ///< The HLE implementation of the CTR kernel + Kernel_SVC, ///< Kernel system calls + Service, ///< HLE implementation of system services. Each major service + ///< should have its own subclass. + Service_ACC, ///< The ACC (Accounts) service + Service_AM, ///< The AM (Applet manager) service + Service_AOC, ///< The AOC (AddOn Content) service + Service_APM, ///< The APM (Performance) service + Service_ARP, ///< The ARP service + Service_Audio, ///< The Audio (Audio control) service + Service_BCAT, ///< The BCAT service + Service_BGTC, ///< The BGTC (Background Task Controller) service + Service_BPC, ///< The BPC service + Service_BTDRV, ///< The Bluetooth driver service + Service_BTM, ///< The BTM service + Service_Capture, ///< The capture service + Service_ERPT, ///< The error reporting service + Service_ETicket, ///< The ETicket service + Service_EUPLD, ///< The error upload service + Service_Fatal, ///< The Fatal service + Service_FGM, ///< The FGM service + Service_Friend, ///< The friend service + Service_FS, ///< The FS (Filesystem) service + Service_GRC, ///< The game recording service + Service_HID, ///< The HID (Human interface device) service + Service_IRS, ///< The IRS service + Service_LBL, ///< The LBL (LCD backlight) service + Service_LDN, ///< The LDN (Local domain network) service + Service_LDR, ///< The loader service + Service_LM, ///< The LM (Logger) service + Service_Migration, ///< The migration service + Service_Mii, ///< The Mii service + Service_MM, ///< The MM (Multimedia) service + Service_NCM, ///< The NCM service + Service_NFC, ///< The NFC (Near-field communication) service + Service_NFP, ///< The NFP service + Service_NIFM, ///< The NIFM (Network interface) service + Service_NIM, ///< The NIM service + Service_NPNS, ///< The NPNS service + Service_NS, ///< The NS services + Service_NVDRV, ///< The NVDRV (Nvidia driver) service + Service_OLSC, ///< The OLSC service + Service_PCIE, ///< The PCIe service + Service_PCTL, ///< The PCTL (Parental control) service + Service_PCV, ///< The PCV service + Service_PM, ///< The PM service + Service_PREPO, ///< The PREPO (Play report) service + Service_PSC, ///< The PSC service + Service_PSM, ///< The PSM service + Service_SET, ///< The SET (Settings) service + Service_SM, ///< The SM (Service manager) service + Service_SPL, ///< The SPL service + Service_SSL, ///< The SSL service + Service_TCAP, ///< The TCAP service. + Service_Time, ///< The time service + Service_USB, ///< The USB (Universal Serial Bus) service + Service_VI, ///< The VI (Video interface) service + Service_WLAN, ///< The WLAN (Wireless local area network) service + HW, ///< Low-level hardware emulation + HW_Memory, ///< Memory-map and address translation + HW_LCD, ///< LCD register emulation + HW_GPU, ///< GPU control emulation + HW_AES, ///< AES engine emulation + IPC, ///< IPC interface + Frontend, ///< Emulator UI + Render, ///< Emulator video output and hardware acceleration + Render_Software, ///< Software renderer backend + Render_OpenGL, ///< OpenGL backend + Render_Vulkan, ///< Vulkan backend + Audio, ///< Audio emulation + Audio_DSP, ///< The HLE implementation of the DSP + Audio_Sink, ///< Emulator audio output backend + Loader, ///< ROM loader + CheatEngine, ///< Memory manipulation and engine VM functions + Crypto, ///< Cryptographic engine/functions + Input, ///< Input emulation + Network, ///< Network emulation + WebService, ///< Interface to yuzu Web Services + Count ///< Total number of logging classes +}; + +/** + * A log entry. Log entries are store in a structured format to permit more varied output + * formatting on different frontends, as well as facilitating filtering and aggregation. + */ +struct Entry { + std::chrono::microseconds timestamp; + Class log_class{}; + Level log_level{}; + const char* filename = nullptr; + unsigned int line_num = 0; + std::string function; + std::string message; + bool final_entry = false; +}; + +} // namespace Common::Log diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 360e878d6..9ec71eced 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -55,6 +55,7 @@ void LogSettings() { log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); @@ -135,6 +136,7 @@ void RestoreGlobalState(bool is_powered_on) { values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); values.use_nvdec_emulation.SetGlobal(true); + values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 1af8c5ac2..6198f2d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -147,6 +147,7 @@ struct Values { Setting<GPUAccuracy> gpu_accuracy; Setting<bool> use_asynchronous_gpu_emulation; Setting<bool> use_nvdec_emulation; + Setting<bool> accelerate_astc; Setting<bool> use_vsync; Setting<bool> use_assembly_shaders; Setting<bool> use_asynchronous_shaders; @@ -218,6 +219,7 @@ struct Values { std::string program_args; bool dump_exefs; bool dump_nso; + bool enable_fs_access_log; bool reporting_services; bool quest_flag; bool disable_macro_jit; diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp index 215e1cb1a..368419eca 100644 --- a/src/core/file_sys/vfs.cpp +++ b/src/core/file_sys/vfs.cpp @@ -6,7 +6,6 @@ #include <numeric> #include <string> #include "common/fs/path_util.h" -#include "common/logging/backend.h" #include "core/file_sys/mode.h" #include "core/file_sys/vfs.h" diff --git a/src/core/file_sys/vfs_libzip.cpp b/src/core/file_sys/vfs_libzip.cpp index cd162c0c3..00e256779 100644 --- a/src/core/file_sys/vfs_libzip.cpp +++ b/src/core/file_sys/vfs_libzip.cpp @@ -14,7 +14,6 @@ #endif #include "common/fs/path_util.h" -#include "common/logging/backend.h" #include "core/file_sys/vfs.h" #include "core/file_sys/vfs_libzip.h" #include "core/file_sys/vfs_vector.h" diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index d9fdc2dca..a2844ea8c 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -19,7 +19,6 @@ #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/hex_util.h" -#include "common/logging/backend.h" #include "common/logging/log.h" #include "common/settings.h" #include "core/core.h" diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 3af9881c2..db4d44c12 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "common/hex_util.h" #include "common/logging/log.h" +#include "common/settings.h" #include "common/string_util.h" #include "core/core.h" #include "core/file_sys/directory.h" @@ -785,6 +786,10 @@ FSP_SRV::FSP_SRV(Core::System& system_) }; // clang-format on RegisterHandlers(functions); + + if (Settings::values.enable_fs_access_log) { + access_log_mode = AccessLogMode::SdCard; + } } FSP_SRV::~FSP_SRV() = default; @@ -1041,9 +1046,9 @@ void FSP_SRV::DisableAutoSaveDataCreation(Kernel::HLERequestContext& ctx) { void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - log_mode = rp.PopEnum<LogMode>(); + access_log_mode = rp.PopEnum<AccessLogMode>(); - LOG_DEBUG(Service_FS, "called, log_mode={:08X}", log_mode); + LOG_DEBUG(Service_FS, "called, access_log_mode={}", access_log_mode); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -1054,7 +1059,7 @@ void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); - rb.PushEnum(log_mode); + rb.PushEnum(access_log_mode); } void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { @@ -1062,9 +1067,9 @@ void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { auto log = Common::StringFromFixedZeroTerminatedBuffer( reinterpret_cast<const char*>(raw.data()), raw.size()); - LOG_DEBUG(Service_FS, "called, log='{}'", log); + LOG_DEBUG(Service_FS, "called"); - reporter.SaveFilesystemAccessReport(log_mode, std::move(log)); + reporter.SaveFSAccessLog(log); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index ff7455a20..556708284 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h @@ -24,11 +24,10 @@ enum class AccessLogVersion : u32 { Latest = V7_0_0, }; -enum class LogMode : u32 { - Off, +enum class AccessLogMode : u32 { + None, Log, - RedirectToSdCard, - LogToSdCard = Log | RedirectToSdCard, + SdCard, }; class FSP_SRV final : public ServiceFramework<FSP_SRV> { @@ -59,13 +58,12 @@ private: FileSystemController& fsc; const FileSys::ContentProvider& content_provider; + const Core::Reporter& reporter; FileSys::VirtualFile romfs; u64 current_process_id = 0; u32 access_log_program_index = 0; - LogMode log_mode = LogMode::LogToSdCard; - - const Core::Reporter& reporter; + AccessLogMode access_log_mode = AccessLogMode::None; }; } // namespace Service::FileSystem diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index fa6213d3c..d68b023d0 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -236,7 +236,7 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} { {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"}, {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"}, {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"}, - {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"}, + {83, &Hid::IsFirmwareUpdateAvailableForSixAxisSensor, "IsFirmwareUpdateAvailableForSixAxisSensor"}, {91, &Hid::ActivateGesture, "ActivateGesture"}, {100, &Hid::SetSupportedNpadStyleSet, "SetSupportedNpadStyleSet"}, {101, &Hid::GetSupportedNpadStyleSet, "GetSupportedNpadStyleSet"}, @@ -710,6 +710,27 @@ void Hid::IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx) { .IsSixAxisSensorAtRest()); } +void Hid::IsFirmwareUpdateAvailableForSixAxisSensor(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + Controller_NPad::DeviceHandle sixaxis_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; + }; + + const auto parameters{rp.PopRaw<Parameters>()}; + + LOG_WARNING( + Service_HID, + "(STUBBED) called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}", + parameters.sixaxis_handle.npad_type, parameters.sixaxis_handle.npad_id, + parameters.sixaxis_handle.device_index, parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(false); +} + void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; struct Parameters { diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index aa3307955..83fc2ea1d 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -100,6 +100,7 @@ private: void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx); + void IsFirmwareUpdateAvailableForSixAxisSensor(Kernel::HLERequestContext& ctx); void ActivateGesture(Kernel::HLERequestContext& ctx); void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp index 311e4fb2d..794504314 100644 --- a/src/core/hle/service/lm/lm.cpp +++ b/src/core/hle/service/lm/lm.cpp @@ -51,6 +51,24 @@ struct hash<Service::LM::LogPacketHeaderEntry> { } // namespace std namespace Service::LM { +namespace { +std::string_view NameOf(LogSeverity severity) { + switch (severity) { + case LogSeverity::Trace: + return "TRACE"; + case LogSeverity::Info: + return "INFO"; + case LogSeverity::Warning: + return "WARNING"; + case LogSeverity::Error: + return "ERROR"; + case LogSeverity::Fatal: + return "FATAL"; + default: + return "UNKNOWN"; + } +} +} // Anonymous namespace enum class LogDestination : u32 { TargetManager = 1 << 0, @@ -262,33 +280,8 @@ private: if (text_log) { output_log += fmt::format("Log Text: {}\n", *text_log); } - - switch (entry.severity) { - case LogSeverity::Trace: - LOG_DEBUG(Service_LM, "LogManager TRACE ({}):\n{}", DestinationToString(destination), - output_log); - break; - case LogSeverity::Info: - LOG_INFO(Service_LM, "LogManager INFO ({}):\n{}", DestinationToString(destination), - output_log); - break; - case LogSeverity::Warning: - LOG_WARNING(Service_LM, "LogManager WARNING ({}):\n{}", - DestinationToString(destination), output_log); - break; - case LogSeverity::Error: - LOG_ERROR(Service_LM, "LogManager ERROR ({}):\n{}", DestinationToString(destination), - output_log); - break; - case LogSeverity::Fatal: - LOG_CRITICAL(Service_LM, "LogManager FATAL ({}):\n{}", DestinationToString(destination), - output_log); - break; - default: - LOG_CRITICAL(Service_LM, "LogManager UNKNOWN ({}):\n{}", - DestinationToString(destination), output_log); - break; - } + LOG_DEBUG(Service_LM, "LogManager {} ({}):\n{}", NameOf(entry.severity), + DestinationToString(destination), output_log); } static std::string DestinationToString(LogDestination destination) { diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index ec2a16e62..82b0f535a 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -195,7 +195,9 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx, Core::Memory::Memo namespace Core { -Reporter::Reporter(System& system_) : system(system_) {} +Reporter::Reporter(System& system_) : system(system_) { + ClearFSAccessLog(); +} Reporter::~Reporter() = default; @@ -362,22 +364,12 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result, SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); } -void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, - std::string log_message) const { - if (!IsReportingEnabled()) - return; - - const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); - json out; +void Reporter::SaveFSAccessLog(std::string_view log_message) const { + const auto access_log_path = + Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "FsAccessLog.txt"; - out["yuzu_version"] = GetYuzuVersionData(); - out["report_common"] = GetReportCommonData(title_id, ResultSuccess, timestamp); - - out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode)); - out["log_message"] = std::move(log_message); - - SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp)); + void(Common::FS::AppendStringToFile(access_log_path, Common::FS::FileType::TextFile, + log_message)); } void Reporter::SaveUserReport() const { @@ -392,6 +384,18 @@ void Reporter::SaveUserReport() const { GetPath("user_report", title_id, timestamp)); } +void Reporter::ClearFSAccessLog() const { + const auto access_log_path = + Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "FsAccessLog.txt"; + + Common::FS::IOFile access_log_file{access_log_path, Common::FS::FileAccessMode::Write, + Common::FS::FileType::TextFile}; + + if (!access_log_file.IsOpen()) { + LOG_ERROR(Common_Filesystem, "Failed to clear the filesystem access log."); + } +} + bool Reporter::IsReportingEnabled() const { return Settings::values.reporting_services; } diff --git a/src/core/reporter.h b/src/core/reporter.h index 6fb6ebffa..6e9edeea3 100644 --- a/src/core/reporter.h +++ b/src/core/reporter.h @@ -16,10 +16,6 @@ namespace Kernel { class HLERequestContext; } // namespace Kernel -namespace Service::FileSystem { -enum class LogMode : u32; -} - namespace Service::LM { struct LogMessage; } // namespace Service::LM @@ -69,14 +65,15 @@ public: std::optional<std::string> custom_text_main = {}, std::optional<std::string> custom_text_detail = {}) const; - void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, - std::string log_message) const; + void SaveFSAccessLog(std::string_view log_message) const; // Can be used anywhere to generate a backtrace and general info report at any point during // execution. Not intended to be used for anything other than debugging or testing. void SaveUserReport() const; private: + void ClearFSAccessLog() const; + bool IsReportingEnabled() const; System& system; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index ad1a9ffb4..d4c23ced2 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -230,6 +230,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_asynchronous_gpu_emulation.GetValue()); AddField(field_type, "Renderer_UseNvdecEmulation", Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 47190c464..f9454bbaa 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -237,6 +237,7 @@ add_library(video_core STATIC texture_cache/util.cpp texture_cache/util.h textures/astc.h + textures/astc.cpp textures/decoders.cpp textures/decoders.h textures/texture.cpp diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 703e34587..eaba1b103 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -763,7 +763,7 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode) { case 1: { READ_UINT_VALUES(2) uint L0 = (v[0] >> 2) | (v[1] & 0xC0); - uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); + uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); ep1 = uvec4(0xFF, L0, L0, L0); ep2 = uvec4(0xFF, L1, L1, L1); break; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ffe9edc1b..9b4038615 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,8 @@ #include <glad/glad.h> +#include "common/settings.h" + #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -307,7 +309,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info) { - return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format); + if (IsPixelFormatASTC(info.format)) { + return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue(); + } // Disable other accelerated uploads for now as they don't implement swizzled uploads return false; switch (info.type) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bdd0ce8bc..52860b4cf 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -8,6 +8,7 @@ #include <vector> #include "common/bit_cast.h" +#include "common/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" @@ -828,7 +829,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { - flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + if (Settings::values.accelerate_astc.GetValue()) { + flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + } else { + flags |= VideoCommon::ImageFlagBits::Converted; + } } if (runtime.device.HasDebuggingToolAttached()) { if (image) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index f07ce5500..6835fd747 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -47,6 +47,7 @@ #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" namespace VideoCommon { @@ -884,8 +885,16 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 ASSERT(copy.image_extent == mip_size); ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); - DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, - output.subspan(output_offset)); + if (IsPixelFormatASTC(info.format)) { + ASSERT(copy.image_extent.depth == 1); + Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), + copy.image_extent.width, copy.image_extent.height, + copy.image_subresource.num_layers, tile_size.width, + tile_size.height, output.subspan(output_offset)); + } else { + DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, + output.subspan(output_offset)); + } copy.buffer_offset = output_offset; copy.buffer_row_length = mip_size.width; copy.buffer_image_height = mip_size.height; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp new file mode 100644 index 000000000..9b2177ebd --- /dev/null +++ b/src/video_core/textures/astc.cpp @@ -0,0 +1,1577 @@ +// Copyright 2016 The University of North Carolina at Chapel Hill +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Please send all BUG REPORTS to <pavel@cs.unc.edu>. +// <http://gamma.cs.unc.edu/FasTC/> + +#include <algorithm> +#include <cassert> +#include <cstring> +#include <span> +#include <vector> + +#include <boost/container/static_vector.hpp> + +#include "common/common_types.h" +#include "video_core/textures/astc.h" + +class InputBitStream { +public: + constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0) + : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {} + + constexpr size_t GetBitsRead() const { + return bits_read; + } + + constexpr bool ReadBit() { + if (bits_read >= total_bits * 8) { + return 0; + } + const bool bit = ((*cur_byte >> next_bit) & 1) != 0; + ++next_bit; + while (next_bit >= 8) { + next_bit -= 8; + ++cur_byte; + } + ++bits_read; + return bit; + } + + constexpr u32 ReadBits(std::size_t nBits) { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + + template <std::size_t nBits> + constexpr u32 ReadBits() { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + +private: + const u8* cur_byte; + size_t total_bits = 0; + size_t next_bit = 0; + size_t bits_read = 0; +}; + +class OutputBitStream { +public: + constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) + : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} + + constexpr std::size_t GetBitsWritten() const { + return bits_written; + } + + constexpr void WriteBitsR(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { + WriteBit((val >> (nBits - i - 1)) & 1); + } + } + + constexpr void WriteBits(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { + WriteBit((val >> i) & 1); + } + } + +private: + constexpr void WriteBit(bool b) { + if (bits_written >= num_bits) { + return; + } + + const u32 mask = 1 << next_bit++; + + // clear the bit + *cur_byte &= static_cast<u8>(~mask); + + // Write the bit, if necessary + if (b) + *cur_byte |= static_cast<u8>(mask); + + // Next byte? + if (next_bit >= 8) { + cur_byte += 1; + next_bit = 0; + } + } + + u8* cur_byte; + std::size_t num_bits; + std::size_t bits_written = 0; + std::size_t next_bit = 0; +}; + +template <typename IntType> +class Bits { +public: + explicit Bits(const IntType& v) : m_Bits(v) {} + + Bits(const Bits&) = delete; + Bits& operator=(const Bits&) = delete; + + u8 operator[](u32 bitPos) const { + return static_cast<u8>((m_Bits >> bitPos) & 1); + } + + IntType operator()(u32 start, u32 end) const { + if (start == end) { + return (*this)[start]; + } else if (start > end) { + u32 t = start; + start = end; + end = t; + } + + u64 mask = (1 << (end - start + 1)) - 1; + return (m_Bits >> start) & static_cast<IntType>(mask); + } + +private: + const IntType& m_Bits; +}; + +namespace Tegra::Texture::ASTC { +using IntegerEncodedVector = boost::container::static_vector< + IntegerEncodedValue, 256, + boost::container::static_vector_options< + boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, + boost::container::throw_on_overflow<false>>::type>; + +static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + std::array<u32, 5> m; + std::array<u32, 5> t; + u32 T; + + // Read the trit encoded block according to + // table C.2.14 + m[0] = bits.ReadBits(nBitsPerValue); + T = bits.ReadBits<2>(); + m[1] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 2; + m[2] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 4; + m[3] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 5; + m[4] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 7; + + u32 C = 0; + + Bits<u32> Tb(T); + if (Tb(2, 4) == 7) { + C = (Tb(5, 7) << 2) | Tb(0, 1); + t[4] = t[3] = 2; + } else { + C = Tb(0, 4); + if (Tb(5, 6) == 3) { + t[4] = 2; + t[3] = Tb[7]; + } else { + t[4] = Tb[7]; + t[3] = Tb(5, 6); + } + } + + Bits<u32> Cb(C); + if (Cb(0, 1) == 3) { + t[2] = 2; + t[1] = Cb[4]; + t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); + } else if (Cb(2, 3) == 3) { + t[2] = 2; + t[1] = 2; + t[0] = Cb(0, 1); + } else { + t[2] = Cb[4]; + t[1] = Cb(2, 3); + t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + } + + for (std::size_t i = 0; i < 5; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); + val.bit_value = m[i]; + val.trit_value = t[i]; + } +} + +static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[3]; + u32 q[3]; + u32 Q; + + // Read the trit encoded block according to + // table C.2.15 + m[0] = bits.ReadBits(nBitsPerValue); + Q = bits.ReadBits<3>(); + m[1] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 3; + m[2] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 5; + + Bits<u32> Qb(Q); + if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { + q[0] = q[1] = 4; + q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); + } else { + u32 C = 0; + if (Qb(1, 2) == 3) { + q[2] = 4; + C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; + } else { + q[2] = Qb(5, 6); + C = Qb(0, 4); + } + + Bits<u32> Cb(C); + if (Cb(0, 2) == 5) { + q[1] = 4; + q[0] = Cb(3, 4); + } else { + q[1] = Cb(3, 4); + q[0] = Cb(0, 2); + } + } + + for (std::size_t i = 0; i < 3; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Quint, nBitsPerValue); + val.bit_value = m[i]; + val.quint_value = q[i]; + } +} + +// Fills result with the values that are encoded in the given +// bitstream. We must know beforehand what the maximum possible +// value is, and how many values we're decoding. +static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, + u32 nValues) { + // Determine encoding parameters + IntegerEncodedValue val = EncodingsValues[maxRange]; + + // Start decoding + u32 nValsDecoded = 0; + while (nValsDecoded < nValues) { + switch (val.encoding) { + case IntegerEncoding::Quint: + DecodeQuintBlock(bits, result, val.num_bits); + nValsDecoded += 3; + break; + + case IntegerEncoding::Trit: + DecodeTritBlock(bits, result, val.num_bits); + nValsDecoded += 5; + break; + + case IntegerEncoding::JustBits: + val.bit_value = bits.ReadBits(val.num_bits); + result.push_back(val); + nValsDecoded++; + break; + } + } +} + +struct TexelWeightParams { + u32 m_Width = 0; + u32 m_Height = 0; + bool m_bDualPlane = false; + u32 m_MaxWeight = 0; + bool m_bError = false; + bool m_bVoidExtentLDR = false; + bool m_bVoidExtentHDR = false; + + u32 GetPackedBitSize() const { + // How many indices do we have? + u32 nIdxs = m_Height * m_Width; + if (m_bDualPlane) { + nIdxs *= 2; + } + + return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); + } + + u32 GetNumWeightValues() const { + u32 ret = m_Width * m_Height; + if (m_bDualPlane) { + ret *= 2; + } + return ret; + } +}; + +static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { + TexelWeightParams params; + + // Read the entire block mode all at once + u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); + + // Does this match the void extent block mode? + if ((modeBits & 0x01FF) == 0x1FC) { + if (modeBits & 0x200) { + params.m_bVoidExtentHDR = true; + } else { + params.m_bVoidExtentLDR = true; + } + + // Next two bits must be one. + if (!(modeBits & 0x400) || !strm.ReadBit()) { + params.m_bError = true; + } + + return params; + } + + // First check if the last four bits are zero + if ((modeBits & 0xF) == 0) { + params.m_bError = true; + return params; + } + + // If the last two bits are zero, then if bits + // [6-8] are all ones, this is also reserved. + if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) { + params.m_bError = true; + return params; + } + + // Otherwise, there is no error... Figure out the layout + // of the block mode. Layout is determined by a number + // between 0 and 9 corresponding to table C.2.8 of the + // ASTC spec. + u32 layout = 0; + + if ((modeBits & 0x1) || (modeBits & 0x2)) { + // layout is in [0-4] + if (modeBits & 0x8) { + // layout is in [2-4] + if (modeBits & 0x4) { + // layout is in [3-4] + if (modeBits & 0x100) { + layout = 4; + } else { + layout = 3; + } + } else { + layout = 2; + } + } else { + // layout is in [0-1] + if (modeBits & 0x4) { + layout = 1; + } else { + layout = 0; + } + } + } else { + // layout is in [5-9] + if (modeBits & 0x100) { + // layout is in [7-9] + if (modeBits & 0x80) { + // layout is in [7-8] + assert((modeBits & 0x40) == 0U); + if (modeBits & 0x20) { + layout = 8; + } else { + layout = 7; + } + } else { + layout = 9; + } + } else { + // layout is in [5-6] + if (modeBits & 0x80) { + layout = 6; + } else { + layout = 5; + } + } + } + + assert(layout < 10); + + // Determine R + u32 R = !!(modeBits & 0x10); + if (layout < 5) { + R |= (modeBits & 0x3) << 1; + } else { + R |= (modeBits & 0xC) >> 1; + } + assert(2 <= R && R <= 7); + + // Determine width & height + switch (layout) { + case 0: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = B + 4; + params.m_Height = A + 2; + break; + } + + case 1: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = B + 8; + params.m_Height = A + 2; + break; + } + + case 2: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = A + 2; + params.m_Height = B + 8; + break; + } + + case 3: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; + params.m_Width = A + 2; + params.m_Height = B + 6; + break; + } + + case 4: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; + params.m_Width = B + 2; + params.m_Height = A + 2; + break; + } + + case 5: { + u32 A = (modeBits >> 5) & 0x3; + params.m_Width = 12; + params.m_Height = A + 2; + break; + } + + case 6: { + u32 A = (modeBits >> 5) & 0x3; + params.m_Width = A + 2; + params.m_Height = 12; + break; + } + + case 7: { + params.m_Width = 6; + params.m_Height = 10; + break; + } + + case 8: { + params.m_Width = 10; + params.m_Height = 6; + break; + } + + case 9: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 9) & 0x3; + params.m_Width = A + 6; + params.m_Height = B + 6; + break; + } + + default: + assert(false && "Don't know this layout..."); + params.m_bError = true; + break; + } + + // Determine whether or not we're using dual planes + // and/or high precision layouts. + bool D = (layout != 9) && (modeBits & 0x400); + bool H = (layout != 9) && (modeBits & 0x200); + + if (H) { + const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; + params.m_MaxWeight = maxWeights[R - 2]; + } else { + const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; + params.m_MaxWeight = maxWeights[R - 2]; + } + + params.m_bDualPlane = D; + + return params; +} + +static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast<u16>(strm.ReadBits<16>()); + u16 g = static_cast<u16>(strm.ReadBits<16>()); + u16 b = static_cast<u16>(strm.ReadBits<16>()); + u16 a = static_cast<u16>(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | + (static_cast<u32>(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} +static constexpr u32 ReplicateByteTo16(std::size_t value) { + return REPLICATE_BYTE_TO_16_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); +static constexpr u32 ReplicateBitTo7(std::size_t value) { + return REPLICATE_BIT_TO_7_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); +static constexpr u32 ReplicateBitTo9(std::size_t value) { + return REPLICATE_BIT_TO_9_TABLE[value]; +} + +static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); +static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); +static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); +static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); +static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); +/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback +/// to the runtime implementation +static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_8_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_8_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_8_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_8_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_8_TABLE[value]; + case 6: + return REPLICATE_6_BIT_TO_8_TABLE[value]; + case 7: + return REPLICATE_7_BIT_TO_8_TABLE[value]; + case 8: + return REPLICATE_8_BIT_TO_8_TABLE[value]; + default: + return Replicate(value, num_bits, 8); + } +} + +static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); +static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); +static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); +static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); +static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); +static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_6_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_6_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_6_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_6_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_6_TABLE[value]; + default: + return Replicate(value, num_bits, 6); + } +} + +class Pixel { +protected: + using ChannelType = s16; + u8 m_BitDepth[4] = {8, 8, 8, 8}; + s16 color[4] = {}; + +public: + Pixel() = default; + Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) + : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, + color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), + static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} + + // Changes the depth of each pixel. This scales the values to + // the appropriate bit depth by either truncating the least + // significant bits when going from larger to smaller bit depth + // or by repeating the most significant bits when going from + // smaller to larger bit depths. + void ChangeBitDepth() { + for (u32 i = 0; i < 4; i++) { + Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); + m_BitDepth[i] = 8; + } + } + + template <typename IntType> + static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { + float denominator = static_cast<float>((1 << bitDepth) - 1); + return static_cast<float>(channel) / denominator; + } + + // Changes the bit depth of a single component. See the comment + // above for how we do this. + static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { + assert(oldDepth <= 8); + + if (oldDepth == 8) { + // Do nothing + return val; + } else if (oldDepth == 0) { + return static_cast<ChannelType>((1 << 8) - 1); + } else if (8 > oldDepth) { + return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); + } else { + // oldDepth > newDepth + const u8 bitsWasted = static_cast<u8>(oldDepth - 8); + u16 v = static_cast<u16>(val); + v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); + return static_cast<u8>(v); + } + + assert(false && "We shouldn't get here."); + return 0; + } + + const ChannelType& A() const { + return color[0]; + } + ChannelType& A() { + return color[0]; + } + const ChannelType& R() const { + return color[1]; + } + ChannelType& R() { + return color[1]; + } + const ChannelType& G() const { + return color[2]; + } + ChannelType& G() { + return color[2]; + } + const ChannelType& B() const { + return color[3]; + } + ChannelType& B() { + return color[3]; + } + const ChannelType& Component(u32 idx) const { + return color[idx]; + } + ChannelType& Component(u32 idx) { + return color[idx]; + } + + void GetBitDepth(u8 (&outDepth)[4]) const { + for (s32 i = 0; i < 4; i++) { + outDepth[i] = m_BitDepth[i]; + } + } + + // Take all of the components, transform them to their 8-bit variants, + // and then pack each channel into an R8G8B8A8 32-bit integer. We assume + // that the architecture is little-endian, so the alpha channel will end + // up in the most-significant byte. + u32 Pack() const { + Pixel eightBit(*this); + eightBit.ChangeBitDepth(); + + u32 r = 0; + r |= eightBit.A(); + r <<= 8; + r |= eightBit.B(); + r <<= 8; + r |= eightBit.G(); + r <<= 8; + r |= eightBit.R(); + return r; + } + + // Clamps the pixel to the range [0,255] + void ClampByte() { + for (u32 i = 0; i < 4; i++) { + color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); + } + } + + void MakeOpaque() { + A() = 255; + } +}; + +static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions, + const u32 nBitsForColorData) { + // First figure out how many color values we have + u32 nValues = 0; + for (u32 i = 0; i < nPartitions; i++) { + nValues += ((modes[i] >> 2) + 1) << 1; + } + + // Then based on the number of values and the remaining number of bits, + // figure out the max value for each of them... + u32 range = 256; + while (--range > 0) { + IntegerEncodedValue val = EncodingsValues[range]; + u32 bitLength = val.GetBitLength(nValues); + if (bitLength <= nBitsForColorData) { + // Find the smallest possible range that matches the given encoding + while (--range > 0) { + IntegerEncodedValue newval = EncodingsValues[range]; + if (!newval.MatchesEncoding(val)) { + break; + } + } + + // Return to last matching range. + range++; + break; + } + } + + // We now have enough to decode our integer sequence. + IntegerEncodedVector decodedColorValues; + + InputBitStream colorStream(data, 0); + DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); + + // Once we have the decoded values, we need to dequantize them to the 0-255 range + // This procedure is outlined in ASTC spec C.2.13 + u32 outIdx = 0; + for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { + // Have we already decoded all that we need? + if (outIdx >= nValues) { + break; + } + + const IntegerEncodedValue& val = *itr; + u32 bitlen = val.num_bits; + u32 bitval = val.bit_value; + + assert(bitlen >= 1); + + u32 A = 0, B = 0, C = 0, D = 0; + // A is just the lsb replicated 9 times. + A = ReplicateBitTo9(bitval & 1); + + switch (val.encoding) { + // Replicate bits + case IntegerEncoding::JustBits: + out[outIdx++] = FastReplicateTo8(bitval, bitlen); + break; + + // Use algorithm in C.2.13 + case IntegerEncoding::Trit: { + + D = val.trit_value; + + switch (bitlen) { + case 1: { + C = 204; + } break; + + case 2: { + C = 93; + // B = b000b0bb0 + u32 b = (bitval >> 1) & 1; + B = (b << 8) | (b << 4) | (b << 2) | (b << 1); + } break; + + case 3: { + C = 44; + // B = cb000cbcb + u32 cb = (bitval >> 1) & 3; + B = (cb << 7) | (cb << 2) | cb; + } break; + + case 4: { + C = 22; + // B = dcb000dcb + u32 dcb = (bitval >> 1) & 7; + B = (dcb << 6) | dcb; + } break; + + case 5: { + C = 11; + // B = edcb000ed + u32 edcb = (bitval >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 2); + } break; + + case 6: { + C = 5; + // B = fedcb000f + u32 fedcb = (bitval >> 1) & 0x1F; + B = (fedcb << 4) | (fedcb >> 4); + } break; + + default: + assert(false && "Unsupported trit encoding for color values!"); + break; + } // switch(bitlen) + } // case IntegerEncoding::Trit + break; + + case IntegerEncoding::Quint: { + + D = val.quint_value; + + switch (bitlen) { + case 1: { + C = 113; + } break; + + case 2: { + C = 54; + // B = b0000bb00 + u32 b = (bitval >> 1) & 1; + B = (b << 8) | (b << 3) | (b << 2); + } break; + + case 3: { + C = 26; + // B = cb0000cbc + u32 cb = (bitval >> 1) & 3; + B = (cb << 7) | (cb << 1) | (cb >> 1); + } break; + + case 4: { + C = 13; + // B = dcb0000dc + u32 dcb = (bitval >> 1) & 7; + B = (dcb << 6) | (dcb >> 1); + } break; + + case 5: { + C = 6; + // B = edcb0000e + u32 edcb = (bitval >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 3); + } break; + + default: + assert(false && "Unsupported quint encoding for color values!"); + break; + } // switch(bitlen) + } // case IntegerEncoding::Quint + break; + } // switch(val.encoding) + + if (val.encoding != IntegerEncoding::JustBits) { + u32 T = D * C + B; + T ^= A; + T = (A & 0x80) | (T >> 2); + out[outIdx++] = T; + } + } + + // Make sure that each of our values is in the proper range... + for (u32 i = 0; i < nValues; i++) { + assert(out[i] <= 255); + } +} + +static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { + u32 bitval = val.bit_value; + u32 bitlen = val.num_bits; + + u32 A = ReplicateBitTo7(bitval & 1); + u32 B = 0, C = 0, D = 0; + + u32 result = 0; + switch (val.encoding) { + case IntegerEncoding::JustBits: + result = FastReplicateTo6(bitval, bitlen); + break; + + case IntegerEncoding::Trit: { + D = val.trit_value; + assert(D < 3); + + switch (bitlen) { + case 0: { + u32 results[3] = {0, 32, 63}; + result = results[D]; + } break; + + case 1: { + C = 50; + } break; + + case 2: { + C = 23; + u32 b = (bitval >> 1) & 1; + B = (b << 6) | (b << 2) | b; + } break; + + case 3: { + C = 11; + u32 cb = (bitval >> 1) & 3; + B = (cb << 5) | cb; + } break; + + default: + assert(false && "Invalid trit encoding for texel weight"); + break; + } + } break; + + case IntegerEncoding::Quint: { + D = val.quint_value; + assert(D < 5); + + switch (bitlen) { + case 0: { + u32 results[5] = {0, 16, 32, 47, 63}; + result = results[D]; + } break; + + case 1: { + C = 28; + } break; + + case 2: { + C = 13; + u32 b = (bitval >> 1) & 1; + B = (b << 6) | (b << 1); + } break; + + default: + assert(false && "Invalid quint encoding for texel weight"); + break; + } + } break; + } + + if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { + // Decode the value... + result = D * C + B; + result ^= A; + result = (A & 0x20) | (result >> 2); + } + + assert(result < 64); + + // Change from [0,63] to [0,64] + if (result > 32) { + result += 1; + } + + return result; +} + +static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, + const TexelWeightParams& params, const u32 blockWidth, + const u32 blockHeight) { + u32 weightIdx = 0; + u32 unquantized[2][144]; + + for (auto itr = weights.begin(); itr != weights.end(); ++itr) { + unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); + + if (params.m_bDualPlane) { + ++itr; + unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); + if (itr == weights.end()) { + break; + } + } + + if (++weightIdx >= (params.m_Width * params.m_Height)) + break; + } + + // Do infill if necessary (Section C.2.18) ... + u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); + u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); + + const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; + for (u32 plane = 0; plane < kPlaneScale; plane++) + for (u32 t = 0; t < blockHeight; t++) + for (u32 s = 0; s < blockWidth; s++) { + u32 cs = Ds * s; + u32 ct = Dt * t; + + u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; + u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; + + u32 js = gs >> 4; + u32 fs = gs & 0xF; + + u32 jt = gt >> 4; + u32 ft = gt & 0x0F; + + u32 w11 = (fs * ft + 8) >> 4; + u32 w10 = ft - w11; + u32 w01 = fs - w11; + u32 w00 = 16 - fs - ft + w11; + + u32 v0 = js + jt * params.m_Width; + +#define FIND_TEXEL(tidx, bidx) \ + u32 p##bidx = 0; \ + do { \ + if ((tidx) < (params.m_Width * params.m_Height)) { \ + p##bidx = unquantized[plane][(tidx)]; \ + } \ + } while (0) + + FIND_TEXEL(v0, 00); + FIND_TEXEL(v0 + 1, 01); + FIND_TEXEL(v0 + params.m_Width, 10); + FIND_TEXEL(v0 + params.m_Width + 1, 11); + +#undef FIND_TEXEL + + out[plane][t * blockWidth + s] = + (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; + } +} + +// Transfers a bit as described in C.2.14 +static inline void BitTransferSigned(int& a, int& b) { + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3F; + if (a & 0x20) + a -= 0x40; +} + +// Adds more precision to the blue channel as described +// in C.2.14 +static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { + return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), + static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); +} + +// Partition selection functions as specified in +// C.2.21 +static inline u32 hash52(u32 p) { + p ^= p >> 15; + p -= p << 17; + p += p << 7; + p += p << 4; + p ^= p >> 5; + p += p << 16; + p ^= p >> 7; + p ^= p >> 3; + p ^= p << 6; + p ^= p >> 17; + return p; +} + +static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { + if (1 == partitionCount) + return 0; + + if (smallBlock) { + x <<= 1; + y <<= 1; + z <<= 1; + } + + seed += (partitionCount - 1) * 1024; + + u32 rnum = hash52(static_cast<u32>(seed)); + u8 seed1 = static_cast<u8>(rnum & 0xF); + u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); + u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); + u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); + u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); + u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); + u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); + u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); + u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); + u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); + u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); + u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); + + seed1 = static_cast<u8>(seed1 * seed1); + seed2 = static_cast<u8>(seed2 * seed2); + seed3 = static_cast<u8>(seed3 * seed3); + seed4 = static_cast<u8>(seed4 * seed4); + seed5 = static_cast<u8>(seed5 * seed5); + seed6 = static_cast<u8>(seed6 * seed6); + seed7 = static_cast<u8>(seed7 * seed7); + seed8 = static_cast<u8>(seed8 * seed8); + seed9 = static_cast<u8>(seed9 * seed9); + seed10 = static_cast<u8>(seed10 * seed10); + seed11 = static_cast<u8>(seed11 * seed11); + seed12 = static_cast<u8>(seed12 * seed12); + + s32 sh1, sh2, sh3; + if (seed & 1) { + sh1 = (seed & 2) ? 4 : 5; + sh2 = (partitionCount == 3) ? 6 : 5; + } else { + sh1 = (partitionCount == 3) ? 6 : 5; + sh2 = (seed & 2) ? 4 : 5; + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 = static_cast<u8>(seed1 >> sh1); + seed2 = static_cast<u8>(seed2 >> sh2); + seed3 = static_cast<u8>(seed3 >> sh1); + seed4 = static_cast<u8>(seed4 >> sh2); + seed5 = static_cast<u8>(seed5 >> sh1); + seed6 = static_cast<u8>(seed6 >> sh2); + seed7 = static_cast<u8>(seed7 >> sh1); + seed8 = static_cast<u8>(seed8 >> sh2); + seed9 = static_cast<u8>(seed9 >> sh3); + seed10 = static_cast<u8>(seed10 >> sh3); + seed11 = static_cast<u8>(seed11 >> sh3); + seed12 = static_cast<u8>(seed12 >> sh3); + + s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + + a &= 0x3F; + b &= 0x3F; + c &= 0x3F; + d &= 0x3F; + + if (partitionCount < 4) + d = 0; + if (partitionCount < 3) + c = 0; + + if (a >= b && a >= c && a >= d) + return 0; + else if (b >= c && b >= d) + return 1; + else if (c >= d) + return 2; + return 3; +} + +static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { + return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); +} + +// Section C.2.14 +static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, + u32 colorEndpointMode) { +#define READ_UINT_VALUES(N) \ + u32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = *(colorValues++); \ + } + +#define READ_INT_VALUES(N) \ + s32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = static_cast<int>(*(colorValues++)); \ + } + + switch (colorEndpointMode) { + case 0: { + READ_UINT_VALUES(2) + ep1 = Pixel(0xFF, v[0], v[0], v[0]); + ep2 = Pixel(0xFF, v[1], v[1], v[1]); + } break; + + case 1: { + READ_UINT_VALUES(2) + u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); + u32 L1 = std::min(L0 + (v[1] & 0x3F), 0xFFU); + ep1 = Pixel(0xFF, L0, L0, L0); + ep2 = Pixel(0xFF, L1, L1, L1); + } break; + + case 4: { + READ_UINT_VALUES(4) + ep1 = Pixel(v[2], v[0], v[0], v[0]); + ep2 = Pixel(v[3], v[1], v[1], v[1]); + } break; + + case 5: { + READ_INT_VALUES(4) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + ep1 = Pixel(v[2], v[0], v[0], v[0]); + ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]); + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + case 6: { + READ_UINT_VALUES(4) + ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); + ep2 = Pixel(0xFF, v[0], v[1], v[2]); + } break; + + case 8: { + READ_UINT_VALUES(6) + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { + ep1 = Pixel(0xFF, v[0], v[2], v[4]); + ep2 = Pixel(0xFF, v[1], v[3], v[5]); + } else { + ep1 = BlueContract(0xFF, v[1], v[3], v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + } break; + + case 9: { + READ_INT_VALUES(6) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + if (v[1] + v[3] + v[5] >= 0) { + ep1 = Pixel(0xFF, v[0], v[2], v[4]); + ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); + } else { + ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + case 10: { + READ_UINT_VALUES(6) + ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); + ep2 = Pixel(v[5], v[0], v[1], v[2]); + } break; + + case 12: { + READ_UINT_VALUES(8) + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { + ep1 = Pixel(v[6], v[0], v[2], v[4]); + ep2 = Pixel(v[7], v[1], v[3], v[5]); + } else { + ep1 = BlueContract(v[7], v[1], v[3], v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + } break; + + case 13: { + READ_INT_VALUES(8) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + BitTransferSigned(v[7], v[6]); + if (v[1] + v[3] + v[5] >= 0) { + ep1 = Pixel(v[6], v[0], v[2], v[4]); + ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]); + } else { + ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + default: + assert(false && "Unsupported color endpoint mode (is it HDR?)"); + break; + } + +#undef READ_UINT_VALUES +#undef READ_INT_VALUES +} + +static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, + const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { + InputBitStream strm(inBuf); + TexelWeightParams weightParams = DecodeBlockInfo(strm); + + // Was there an error? + if (weightParams.m_bError) { + assert(false && "Invalid block mode"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_bVoidExtentLDR) { + FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_bVoidExtentHDR) { + assert(false && "HDR void extent blocks are unsupported!"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_Width > blockWidth) { + assert(false && "Texel weight grid width should be smaller than block width"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_Height > blockHeight) { + assert(false && "Texel weight grid height should be smaller than block height"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + // Read num partitions + u32 nPartitions = strm.ReadBits<2>() + 1; + assert(nPartitions <= 4); + + if (nPartitions == 4 && weightParams.m_bDualPlane) { + assert(false && "Dual plane mode is incompatible with four partition blocks"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + // Based on the number of partitions, read the color endpoint mode for + // each partition. + + // Determine partitions, partition index, and color endpoint modes + s32 planeIdx = -1; + u32 partitionIndex; + u32 colorEndpointMode[4] = {0, 0, 0, 0}; + + // Define color data. + u8 colorEndpointData[16]; + memset(colorEndpointData, 0, sizeof(colorEndpointData)); + OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); + + // Read extra config data... + u32 baseCEM = 0; + if (nPartitions == 1) { + colorEndpointMode[0] = strm.ReadBits<4>(); + partitionIndex = 0; + } else { + partitionIndex = strm.ReadBits<10>(); + baseCEM = strm.ReadBits<6>(); + } + u32 baseMode = (baseCEM & 3); + + // Remaining bits are color endpoint data... + u32 nWeightBits = weightParams.GetPackedBitSize(); + s32 remainingBits = 128 - nWeightBits - static_cast<int>(strm.GetBitsRead()); + + // Consider extra bits prior to texel data... + u32 extraCEMbits = 0; + if (baseMode) { + switch (nPartitions) { + case 2: + extraCEMbits += 2; + break; + case 3: + extraCEMbits += 5; + break; + case 4: + extraCEMbits += 8; + break; + default: + assert(false); + break; + } + } + remainingBits -= extraCEMbits; + + // Do we have a dual plane situation? + u32 planeSelectorBits = 0; + if (weightParams.m_bDualPlane) { + planeSelectorBits = 2; + } + remainingBits -= planeSelectorBits; + + // Read color data... + u32 colorDataBits = remainingBits; + while (remainingBits > 0) { + u32 nb = std::min(remainingBits, 8); + u32 b = strm.ReadBits(nb); + colorEndpointStream.WriteBits(b, nb); + remainingBits -= 8; + } + + // Read the plane selection bits + planeIdx = strm.ReadBits(planeSelectorBits); + + // Read the rest of the CEM + if (baseMode) { + u32 extraCEM = strm.ReadBits(extraCEMbits); + u32 CEM = (extraCEM << 6) | baseCEM; + CEM >>= 2; + + bool C[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { + C[i] = CEM & 1; + CEM >>= 1; + } + + u8 M[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { + M[i] = CEM & 3; + CEM >>= 2; + assert(M[i] <= 3); + } + + for (u32 i = 0; i < nPartitions; i++) { + colorEndpointMode[i] = baseMode; + if (!(C[i])) + colorEndpointMode[i] -= 1; + colorEndpointMode[i] <<= 2; + colorEndpointMode[i] |= M[i]; + } + } else if (nPartitions > 1) { + u32 CEM = baseCEM >> 2; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpointMode[i] = CEM; + } + } + + // Make sure everything up till here is sane. + for (u32 i = 0; i < nPartitions; i++) { + assert(colorEndpointMode[i] < 16); + } + assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); + + // Decode both color data and texel weight data + u32 colorValues[32]; // Four values, two endpoints, four maximum paritions + DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, + colorDataBits); + + Pixel endpoints[4][2]; + const u32* colorValuesPtr = colorValues; + for (u32 i = 0; i < nPartitions; i++) { + ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); + } + + // Read the texel weight data.. + std::array<u8, 16> texelWeightData; + std::ranges::copy(inBuf, texelWeightData.begin()); + + // Reverse everything + for (u32 i = 0; i < 8; i++) { +// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits +#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 + u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); + u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); +#undef REVERSE_BYTE + + texelWeightData[i] = b; + texelWeightData[15 - i] = a; + } + + // Make sure that higher non-texel bits are set to zero + const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; + if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) { + texelWeightData[clearByteStart - 1] &= + static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + std::memset(texelWeightData.data() + clearByteStart, 0, + std::min(16U - clearByteStart, 16U)); + } + + IntegerEncodedVector texelWeightValues; + + InputBitStream weightStream(texelWeightData); + + DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, + weightParams.GetNumWeightValues()); + + // Blocks can be at most 12x12, so we can have as many as 144 weights + u32 weights[2][144]; + UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); + + // Now that we have endpoints and weights, we can interpolate and generate + // the proper decoding... + for (u32 j = 0; j < blockHeight; j++) + for (u32 i = 0; i < blockWidth; i++) { + u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, + (blockHeight * blockWidth) < 32); + assert(partition < nPartitions); + + Pixel p; + for (u32 c = 0; c < 4; c++) { + u32 C0 = endpoints[partition][0].Component(c); + C0 = ReplicateByteTo16(C0); + u32 C1 = endpoints[partition][1].Component(c); + C1 = ReplicateByteTo16(C1); + + u32 plane = 0; + if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { + plane = 1; + } + + u32 weight = weights[plane][j * blockWidth + i]; + u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; + if (C == 65535) { + p.Component(c) = 255; + } else { + double Cf = static_cast<double>(C); + p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); + } + } + + outBuf[j * blockWidth + i] = p.Pack(); + } +} + +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { + u32 block_index = 0; + std::size_t depth_offset = 0; + for (u32 z = 0; z < depth; z++) { + for (u32 y = 0; y < height; y += block_height) { + for (u32 x = 0; x < width; x += block_width) { + const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; + + // Blocks can be at most 12x12 + std::array<u32, 12 * 12> uncompData; + DecompressBlock(blockPtr, block_width, block_height, uncompData); + + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); + + const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); + for (u32 jj = 0; jj < decompHeight; jj++) { + std::memcpy(outRow.data() + jj * width * 4, + uncompData.data() + jj * block_width, decompWidth * 4); + } + ++block_index; + } + } + depth_offset += height * width * 4; + } +} + +} // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c73fda5..c1c37dfe7 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -129,4 +129,7 @@ struct AstcBufferData { decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; } constexpr ASTC_BUFFER_DATA; +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); + } // namespace Tegra::Texture::ASTC diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a59b36e13..916a22724 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -647,6 +647,8 @@ void Config::ReadDebuggingValues() { ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString(); Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool(); Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool(); + Settings::values.enable_fs_access_log = + ReadSetting(QStringLiteral("enable_fs_access_log"), false).toBool(); Settings::values.reporting_services = ReadSetting(QStringLiteral("reporting_services"), false).toBool(); Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); @@ -807,6 +809,7 @@ void Config::ReadRendererValues() { QStringLiteral("use_asynchronous_gpu_emulation"), true); ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), true); + ReadSettingGlobal(Settings::values.accelerate_astc, QStringLiteral("accelerate_astc"), true); ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), false); @@ -1258,6 +1261,8 @@ void Config::SaveDebuggingValues() { QString::fromStdString(Settings::values.program_args), QString{}); WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); + WriteSetting(QStringLiteral("enable_fs_access_log"), Settings::values.enable_fs_access_log, + false); WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); WriteSetting(QStringLiteral("use_debug_asserts"), Settings::values.use_debug_asserts, false); WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false); @@ -1388,6 +1393,7 @@ void Config::SaveRendererValues() { Settings::values.use_asynchronous_gpu_emulation, true); WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, true); + WriteSettingGlobal(QStringLiteral("accelerate_astc"), Settings::values.accelerate_astc, true); WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, false); diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index 11ee19a12..c43f89a5a 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -34,7 +34,7 @@ <br> If you're not sure what these do, keep all of these enabled. <br> - These settings only take effect when CPU Accuracy is "Debug Mode". + These settings, when disabled, only take effect when CPU Accuracy is "Debug Mode". </div> </string> </property> diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index b207e07cb..15d6a5ad7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -28,17 +28,21 @@ ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::Co ConfigureDebug::~ConfigureDebug() = default; void ConfigureDebug::SetConfiguration() { - ui->toggle_console->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + + ui->toggle_console->setEnabled(runtime_lock); ui->toggle_console->setChecked(UISettings::values.show_console); ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter)); ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); + ui->fs_access_log->setEnabled(runtime_lock); + ui->fs_access_log->setChecked(Settings::values.enable_fs_access_log); ui->reporting_services->setChecked(Settings::values.reporting_services); ui->quest_flag->setChecked(Settings::values.quest_flag); ui->use_debug_asserts->setChecked(Settings::values.use_debug_asserts); ui->use_auto_stub->setChecked(Settings::values.use_auto_stub); - ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); - ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit); ui->extended_logging->setChecked(Settings::values.extended_logging); } @@ -47,6 +51,7 @@ void ConfigureDebug::ApplyConfiguration() { UISettings::values.show_console = ui->toggle_console->isChecked(); Settings::values.log_filter = ui->log_filter_edit->text().toStdString(); Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); + Settings::values.enable_fs_access_log = ui->fs_access_log->isChecked(); Settings::values.reporting_services = ui->reporting_services->isChecked(); Settings::values.quest_flag = ui->quest_flag->isChecked(); Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index c9e60ee08..c8087542f 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -144,10 +144,17 @@ <item> <widget class="QGroupBox" name="groupBox_5"> <property name="title"> - <string>Dump</string> + <string>Debugging</string> </property> <layout class="QVBoxLayout" name="verticalLayout_7"> <item> + <widget class="QCheckBox" name="fs_access_log"> + <property name="text"> + <string>Enable FS Access Log</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="reporting_services"> <property name="text"> <string>Enable Verbose Reporting Services</string> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index fb9ec093c..41a69d9b8 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -70,10 +70,12 @@ void ConfigureGraphics::SetConfiguration() { ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); + ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); @@ -118,6 +120,8 @@ void ConfigureGraphics::ApplyConfiguration() { use_asynchronous_gpu_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, ui->use_nvdec_emulation, use_nvdec_emulation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, + accelerate_astc); if (Settings::IsConfiguringGlobal()) { // Guard if during game and set to game-specific value @@ -254,6 +258,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -269,6 +274,8 @@ void ConfigureGraphics::SetupPerGameUI() { ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); ConfigurationShared::SetColoredTristate( ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); + ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, + accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, Settings::values.use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c162048a2..6418115cf 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -47,6 +47,7 @@ private: QColor bg_color; ConfigurationShared::CheckState use_nvdec_emulation; + ConfigurationShared::CheckState accelerate_astc; ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_asynchronous_gpu_emulation; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index ab0bd4d77..5b999d84d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -105,6 +105,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="accelerate_astc"> + <property name="text"> + <string>Accelerate ASTC texture decoding</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="fullscreen_mode_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 63f368fe5..621b31571 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -447,8 +447,10 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true)); Settings::values.use_asynchronous_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); - Settings::values.use_asynchronous_shaders.SetValue( - sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); + Settings::values.use_nvdec_emulation.SetValue( + sdl2_config->GetBoolean("Renderer", "use_nvdec_emulation", true)); + Settings::values.accelerate_astc.SetValue( + sdl2_config->GetBoolean("Renderer", "accelerate_astc", true)); Settings::values.use_fast_gpu_time.SetValue( sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); @@ -477,6 +479,8 @@ void Config::ReadValues() { Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", ""); Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); + Settings::values.enable_fs_access_log = + sdl2_config->GetBoolean("Debugging", "enable_fs_access_log", false); Settings::values.reporting_services = sdl2_config->GetBoolean("Debugging", "reporting_services", false); Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index f48d935a1..efa1b1d18 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -194,6 +194,14 @@ use_assembly_shaders = # 0 (default): Off, 1: On use_asynchronous_shaders = +# Enable NVDEC emulation. +# 0: Off, 1 (default): On +use_nvdec_emulation = + +# Accelerate ASTC texture decoding. +# 0: Off, 1 (default): On +accelerate_astc = + # Turns on the frame limiter, which will limit frames output to the target game speed # 0: Off, 1: On (default) use_frame_limit = @@ -338,6 +346,8 @@ record_frame_times = dump_exefs=false # Determines whether or not yuzu will dump all NSOs it attempts to load while loading them dump_nso=false +# Determines whether or not yuzu will save the filesystem access log. +enable_fs_access_log=false # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode # false: Retail/Normal Mode (default), true: Kiosk Mode quest_flag = |